/* data.c: Routines for C-- data manipulation. */

#define _POSIX_SOURCE

#include <stdlib.h>
#include <ctype.h>
#include "x.tab.h"
#include "data.h"
#include "object.h"
#include "ident.h"
#include "util.h"
#include "cache.h"
#include "cmstring.h"
#include "memory.h"
#include "token.h"
#include "log.h"
#include "lookup.h"

static int sublist_cmp(Sublist *s1, Sublist *s2);
static String *data_add_list_literal_to_str(String *str, Data *data, int len);

/* Effects: Returns 0 if and only if d1 and d2 are equal according to C--
 *	    conventions.  If d1 and d2 are of the same type and are integers or
 *	    strings, returns greater than 0 if d1 is greater than d2 according
 *	    to C-- conventions, and less than 0 if d1 is less than d2. */
int data_cmp(Data *d1, Data *d2)
{
    int l1, l2, l, val;

    if (d1->type != d2->type) {
	return 1;
    }

    switch (d1->type) {

      case INTEGER:
	return d1->u.val - d2->u.val;

      case STRING:
	l1 = d1->u.substr.span;
	l2 = d2->u.substr.span;
	l = (l1 < l2) ? l1 : l2;
	val = strnccmp(data_sptr(d1), data_sptr(d2), l);
	if (val)
	    return val;
	else if (l1 > l2)
	    return data_sptr(d1)[l2];
	else if (l1 < l2)
	    return -data_sptr(d2)[l1];
	else
	    return 0;

      case DBREF:
	return (d1->u.dbref != d2->u.dbref);

      case LIST:
	return sublist_cmp(&d1->u.sublist, &d2->u.sublist);

      case SYMBOL:
	return (d1->u.symbol != d2->u.symbol);

      case ERROR:
	return (d1->u.error != d2->u.error);

      case FROB:
	if (d1->u.frob.class != d2->u.frob.class)
	    return 1;
	else if (d1->u.frob.rep_type != d2->u.frob.rep_type)
	    return 1;
	else if (d1->u.frob.rep_type == LIST)
	    return list_cmp(d1->u.frob.rep.list, d2->u.frob.rep.list);
	else
	    return dict_cmp(d1->u.frob.rep.dict, d2->u.frob.rep.dict);

      case DICT:
	return dict_cmp(d1->u.dict, d2->u.dict);

      case BUFFER:
	if (d1->u.buffer == d2->u.buffer)
	    return 0;
	if (d1->u.buffer->len != d2->u.buffer->len)
	    return 1;
	return MEMCMP(d1->u.buffer->s, d2->u.buffer->s, d1->u.buffer->len);

      default:
	return 1;
    }
}

/* Effects: Returns 1 if data is true according to C-- conventions, or 0 if
 *	    data is false. */
int data_true(Data *data)
{
    switch (data->type) {

      case INTEGER:
	return (data->u.val != 0);

      case STRING:
	return (data->u.substr.span != 0);

      case DBREF:
	return 1;

      case LIST:
	return (data->u.sublist.span != 0);

      case SYMBOL:
	return 1;

      case ERROR:
	return 0;

      case FROB:
	return 1;

      case DICT:
	return (data->u.dict->keys->len != 0);

      case BUFFER:
	return (data->u.buffer->len != 0);

      default:
	return 0;
    }
}

unsigned long data_hash(Data *d)
{
    List *values;
    Sublist *sub;

    switch (d->type) {

      case INTEGER:
	return d->u.val;

      case STRING:
	return hash_case(d->u.substr.str->s + d->u.substr.start,
			 d->u.substr.span);

      case DBREF:
	return d->u.dbref;

      case LIST:
	sub = &d->u.sublist;
	if (sub->span)
	    return data_hash(&sub->list->el[sub->start + sub->span - 1]);
	else
	    return 100;

      case SYMBOL:
	return hash(ident_name(d->u.symbol));

      case ERROR:
	return hash(ident_name(d->u.error));

      case FROB:
	if (d->u.frob.rep_type == LIST)
	    values = d->u.frob.rep.list;
	else
	    values = d->u.frob.rep.dict->values;
	if (values->len)
	    return d->u.frob.class + data_hash(&values->el[values->len - 1]);
	else
	    return d->u.frob.class;

      case DICT:
	values = d->u.dict->values;
	if (values->len)
	    return data_hash(&values->el[values->len - 1]);
	else
	    return 200;

      case BUFFER:
	if (d->u.buffer->len)
	    return d->u.buffer->s[0] + d->u.buffer->s[d->u.buffer->len - 1];
	else
	    return 300;

      default:
	return -1;
    }
}

/* Modifies: dest.
 * Effects: Copies src into dest, updating reference counts as necessary. */
void data_dup(Data *dest, Data *src)
{
    dest->type = src->type;
    switch (src->type) {

      case INTEGER:
	dest->u.val = src->u.val;
	break;

      case STRING:
	dest->u.substr.start = src->u.substr.start;
	dest->u.substr.span = src->u.substr.span;
	dest->u.substr.str = string_dup(src->u.substr.str);
	break;

      case DBREF:
	dest->u.dbref = src->u.dbref;
	break;

      case LIST:
	dest->u.sublist.start = src->u.sublist.start;
	dest->u.sublist.span = src->u.sublist.span;
	dest->u.sublist.list = list_dup(src->u.sublist.list);
	break;

      case SYMBOL:
	dest->u.symbol = ident_dup(src->u.symbol);
	break;

      case ERROR:
	dest->u.error = ident_dup(src->u.error);
	break;

      case FROB:
	dest->u.frob.class = src->u.frob.class;
	dest->u.frob.rep_type = src->u.frob.rep_type;
	if (dest->u.frob.rep_type == LIST)
	    dest->u.frob.rep.list = list_dup(src->u.frob.rep.list);
	else
	    dest->u.frob.rep.dict = dict_dup(src->u.frob.rep.dict);
	break;

      case DICT:
	dest->u.dict = dict_dup(src->u.dict);
	break;

      case BUFFER:
	dest->u.buffer = buffer_dup(src->u.buffer);
	break;
    }
}

/* Modifies: The value referred to by data.
 * Effects: Updates the reference counts for the value referred to by data
 *	    when we are no longer using it. */
void data_discard(Data *data)
{
    switch (data->type) {

      case STRING:
	string_discard(data->u.substr.str);
	break;

      case LIST:
	list_discard(data->u.sublist.list);
	break;

      case SYMBOL:
	ident_discard(data->u.symbol);
	break;

      case ERROR:
	ident_discard(data->u.error);
	break;

      case FROB:
	if (data->u.frob.rep_type == LIST)
	    list_discard(data->u.frob.rep.list);
	else
	    dict_discard(data->u.frob.rep.dict);
	break;

      case DICT:
	dict_discard(data->u.dict);
	break;

      case BUFFER:
	buffer_discard(data->u.buffer);
    }
}

String *data_tostr(Data *data)
{
    char *s;
    Number_buf nbuf;

    switch (data->type) {

      case INTEGER:
	s = long_to_ascii(data->u.val, nbuf);
	return string_from_chars(s, strlen(s));

      case STRING:
	if (data->u.substr.span == data->u.substr.str->len)
	    return string_dup(data->u.substr.str);
	else
	    return string_from_chars(data_sptr(data), data->u.substr.span);

      case DBREF:
	s = long_to_ascii(data->u.dbref, nbuf);
	return string_add(string_from_chars("#", 1), s, strlen(s));

      case LIST:
	return string_from_chars("<list>", 6);

      case SYMBOL:
	s = ident_name(data->u.symbol);
	return string_from_chars(s, strlen(s));

      case ERROR:
	s = ident_name(data->u.error);
	return string_from_chars(s, strlen(s));

      case FROB:
	return string_from_chars("<frob>", 6);

      case DICT:
	return string_from_chars("<dict>", 6);

      case BUFFER:
	return string_from_chars("<buffer>", 8);

      default:
	panic("Unrecognized data type.");
	return NULL;
    }
}

/* Effects: Returns a string containing a printed representation of data. */
String *data_to_literal(Data *data)
{
    String *str = string_empty(0);

    return data_add_literal_to_str(str, data);
}

/* Modifies: str (mutator, claims reference count).
 * Effects: Returns a string with the printed representation of data added to
 *	    it. */
String *data_add_literal_to_str(String *str, Data *data)
{
    char *s;
    Number_buf nbuf;
    int i;

    switch(data->type) {

      case INTEGER:
	s = long_to_ascii(data->u.val, nbuf);
	return string_add(str, s, strlen(s));

      case STRING:
	return string_add_unparsed(str, data_sptr(data), data->u.substr.span);

      case DBREF:
	s = long_to_ascii(data->u.dbref, nbuf);
	str = string_addc(str, '#');
	return string_add(str, s, strlen(s));

      case LIST:
	return data_add_list_literal_to_str(str, data_dptr(data),
					    data->u.sublist.span);

      case SYMBOL:
	str = string_addc(str, '\'');
	s = ident_name(data->u.symbol);
	if (is_valid_ident(s))
	    return string_add(str, s, strlen(s));
	else
	    return string_add_unparsed(str, s, strlen(s));

      case ERROR:
	str = string_addc(str, '~');
	s = ident_name(data->u.error);
	if (is_valid_ident(s))
	    return string_add(str, s, strlen(s));
	else
	    return string_add_unparsed(str, s, strlen(s));

      case FROB:
	str = string_add(str, "<#", 2);
	s = long_to_ascii(data->u.frob.class, nbuf);
	str = string_add(str, s, strlen(s));
	str = string_add(str, ", ", 2);
	if (data->u.frob.rep_type == LIST) {
	    str = data_add_list_literal_to_str(str, data->u.frob.rep.list->el,
					       data->u.frob.rep.list->len);
	} else {
	    str = dict_add_literal_to_str(str, data->u.frob.rep.dict);
	}
	return string_addc(str, '>');

      case DICT:
	return dict_add_literal_to_str(str, data->u.dict);

      case BUFFER:
	str = string_add(str, "`[", 2);
	for (i = 0; i < data->u.buffer->len; i++) {
	    s = long_to_ascii(data->u.buffer->s[i], nbuf);
	    str = string_add(str, s, strlen(s));
	    if (i < data->u.buffer->len - 1)
		str = string_add(str, ", ", 2);
	}
	return string_addc(str, ']');

      default:
	return str;
    }
}

static String *data_add_list_literal_to_str(String *str, Data *data, int len)
{
    int i;

    str = string_addc(str, '[');
    for (i = 0; i < len; i++) {
	str = data_add_literal_to_str(str, &data[i]);
	if (i < len - 1)
	    str = string_add(str, ", ", 2);
    }
    return string_addc(str, ']');
}

char *data_from_literal(Data *d, char *s)
{
    while (isspace(*s))
	s++;

    d->type = -1;

    if (isdigit(*s)) {
	d->type = INTEGER;
	d->u.val = atol(s);
	while (isdigit(*++s));
	return s;
    } else if (*s == '"') {
	d->type = STRING;
	substr_set_to_full_string(&d->u.substr, string_parse(&s));
	return s;
    } else if (*s == '#' && (isdigit(s[1]) || s[1] == '-')) {
	d->type = DBREF;
	d->u.dbref = atol(++s);
	while (isdigit(*++s));
	return s;
    } else if (*s == '$') {
	long name, dbref;

	s++;
	name = parse_ident(&s);
	if (!lookup_retrieve_name(name, &dbref))
	    dbref = -1;
	ident_discard(name);
	d->type = DBREF;
	d->u.dbref = dbref;
	return s;
    } else if (*s == '[') {
	List *list;

	list = list_new(0);
	s++;
	while (*s && *s != ']') {
	    s = data_from_literal(d, s);
	    list = list_add(list, d);
	    data_discard(d);
	    while (isspace(*s))
		s++;
	    if (*s == ',')
		s++;
	    while (isspace(*s))
		s++;
	}
	d->type = LIST;
	sublist_set_to_full_list(&d->u.sublist, list);
	return (*s) ? s + 1 : s;
    } else if (*s == '#' && s[1] == '[') {
	Data assocs;

	/* Get associations. */
	s = data_from_literal(&assocs, s + 1);
	if (assocs.type != LIST) {
	    if (assocs.type != -1)
		data_discard(&assocs);
	    d->type = -1;
	    return s;
	}

	/* Make a dict from the associations. */
	d->type = DICT;
	d->u.dict = dict_from_slices(assocs.u.sublist.list);
	data_discard(&assocs);
	if (!d->u.dict)
	    d->type = -1;
	return s;
    } else if (*s == '`' && s[1] == '[') {
	Data nums;
	List *l;
	int i;
	Buffer *buf;

	/* Get the contents of the buffer. */
	s = data_from_literal(&nums, s + 1);
	if (nums.type != LIST) {
	    if (nums.type != -1)
		data_discard(&nums);
	    d->type = -1;
	    return s;
	}

	/* Verify that the numbers are numbers. */
	l = nums.u.sublist.list;
	for (i = 0; i < l->len; i++) {
	    if (l->el[i].type != INTEGER) {
		data_discard(&nums);
		d->type = -1;
		return s;
	    }
	}

	/* Make a buffer from the numbers. */
	buf = buffer_new(l->len);
	for (i = 0; i < l->len; i++)
	    buf->s[i] = l->el[i].u.val;

	data_discard(&nums);
	d->type = BUFFER;
	d->u.buffer = buf;
	return s;
    } else if (*s == '\'') {
	s++;
	d->type = SYMBOL;
	d->u.symbol = parse_ident(&s);
	return s;
    } else if (*s == '~') {
	s++;
	d->type = ERROR;
	d->u.symbol = parse_ident(&s);
	return s;
    } else if (*s == '<') {
	Data class, rep;

	s = data_from_literal(&class, s + 1);
	if (class.type == DBREF) {
	    while (isspace(*s))
		s++;
	    if (*s == ',')
		s++;
	    while (isspace(*s))
		s++;
	    s = data_from_literal(&rep, s);
	    d->type = FROB;
	    d->u.frob.class = class.u.dbref;
	    d->u.frob.rep_type = rep.type;
	    if (rep.type == LIST) {
		d->u.frob.rep.list = rep.u.sublist.list;
	    } else if (rep.type == DICT) {
		d->u.frob.rep.dict = rep.u.dict;
	    } else {
		if (rep.type != -1)
		    data_discard(&rep);
		d->type = -1;
	    }
	} else if (class.type != -1) {
	    data_discard(&class);
	}
	return (*s) ? s + 1 : s;
    } else {
	return (*s) ? s + 1 : s;
    }
}

/* Effects: Returns an id (without updating reference count) for the name of
 *	    the type given by type. */
long data_type_id(int type)
{
    switch (type) {
      case INTEGER:	return integer_id;
      case STRING:	return string_id;
      case DBREF:	return dbref_id;
      case LIST:	return list_id;
      case SYMBOL:	return symbol_id;
      case ERROR:	return error_id;
      case FROB:	return frob_id;
      case DICT:	return dictionary_id;
      case BUFFER:	return buffer_id;
      default:		panic("Unrecognized data type."); return 0;
    }
}

int sublist_search(Sublist *sublist, Data *data)
{
    Data *base;
    int i;

    base = sublist->list->el + sublist->start;
    for (i = 0; i < sublist->span; i++) {
	if (data_cmp(&base[i], data) == 0)
	    return i;
    }
    return -1;
}

/* Effects: Returns 0 if the sublists s1 and s2 are equivalent, or 1 if not. */
static int sublist_cmp(Sublist *s1, Sublist *s2)
{
    int i;
    List *l1, *l2;

    /* Lists can only be equal if they're of the same length. */
    if (s1->span != s2->span)
	return 1;

    l1 = s1->list;
    l2 = s2->list;

    /* If they're pointing to the same actual list, then they're obviously
     * equal. */
    if (l1 == l2 && s1->start == s2->start)
	return 0;

    /* See if any elements differ. */
    for (i = 0; i < s1->span; i++) {
	if (data_cmp(&l1->el[s1->start + i], &l2->el[s2->start + i]) != 0)
	    return 1;
    }

    /* No elements differ, so the lists are the same. */
    return 0;
}

/* Modifies: sublist and sublist->list.
 * Effects: Makes sure that sublist is pointing to a list which we can add
 *	    to or otherwise modify. */
void sublist_truncate(Sublist *sublist)
{
    List *list;

    if (sublist->list->refs == 1) {
	/* Since we own the list, we can just throw away anything past the end
	 * of the sublist. */
	while (sublist->list->len > sublist->start + sublist->span)
	    data_discard(&sublist->list->el[--sublist->list->len]);
    } else {
	/* Make a copy of the list containing just the sublist. */
	list = sublist->list;
	sublist->list = list_from_data(list->el + sublist->start,
				       sublist->span);
	list_discard(list);
	sublist->start = 0;
    }
}

/* Modifies: substr and substr->str.
 * Effects: Makes sure that substr is pointing to a string which we can add
 *	    to. */
void substring_truncate(Substring *substr)
{
    String *str;

    if (substr->str->refs == 1) {
	/* Since we own the string, then we can just throw away anything past
	 * the end of the substring.  This invalidates the regexp, though. */
	if (substr->start + substr->span != substr->str->len) {
	    if (substr->str->reg) {
		free(substr->str->reg);
		substr->str->reg = NULL;
	    }
	    substr->str->len = substr->start + substr->span;
	    substr->str->s[substr->str->len] = 0;
	}
    } else {
	/* Make a copy of the string containing just the substring. */
	str = string_from_chars(substr->str->s + substr->start, substr->span);
	string_discard(substr->str);
	substr->str = str;
	substr->start = 0;
    }
}

/* Requires: data contains a string value.
 * Effects: Returns a pointer to the first character in the string.  This may
 *	    not be null-terminated if we don't use substring_truncate()
 *	    first. */
char *data_sptr(Data *data)
{
    return data->u.substr.str->s + data->u.substr.start;
}

/* Effects: Returns a pointer to the first data element in the list. */
Data *data_dptr(Data *data)
{
    return data->u.sublist.list->el + data->u.sublist.start;
}

/* Modifies: target.
 * Effects: Assigns the full range of str to target.  Does not update the
 *	    reference count on str. */
void substr_set_to_full_string(Substring *target, String *str)
{
    target->str = str;
    target->start = 0;
    target->span = str->len;
}

/* Modifies: target.
 * Effects: Assigns the full range of list to target.  Does not update the
 *	    reference count on list. */
void sublist_set_to_full_list(Sublist *target, List *list)
{
    target->list = list;
    target->start = 0;
    target->span = list->len;
}