/* @@@HEAD@@@
// Convert text into tokens for yyparse().
*/

#include <ctype.h>
#include "config.h"
#include "defs.h"
#include "y.tab.h"
#include "token.h"
#include "memory.h"
#include "data.h"

#define NUM_RESERVED_WORDS (sizeof(reserved_words) / sizeof(*reserved_words))
#define SUBSCRIPT(c) ((c) & 0x7f)

internal char *string_token(char *s, int len, int *token_len);
internal char *identifier_token(char *s, int len, int *token_len);

static list_t *code;
static cur_line, cur_pos;

/* Words with same first letters must be together. */
static struct {
    char *word;
    int token;
} reserved_words[] = {
    { "any",			ANY },
    { "arg",			ARG },
    { "atomic",			ATOMIC },
    { "break",			BREAK },
    { "case",			CASE },
    { "catch",			CATCH },
    { "continue",		CONTINUE },
    { "default",		DEFAULT },
    { "disallow_overrides",	DISALLOW_OVERRIDES },
    { "else",			ELSE },
    { "for",			FOR },
    { "fork",			FORK },
    { "handler",		HANDLER },
    { "if",			IF },
    { "in",			IN },
    { "non_atomic",		NON_ATOMIC },
    { "pass",			PASS },
    { "return",			RETURN },
    { "switch",			SWITCH },
    { "to",			TO },
    { "var",			VAR },
    { "while",			WHILE },
    { "with",			WITH },
    { "(|",			CRITLEFT },
    { "(>",			PROPLEFT },
    { "<)",			PROPRIGHT },
    { "<=",			LE },
    { "..",			UPTO },
    { "|)",			CRITRIGHT },
    { "||",			OR },
    { "#[",			START_DICT },
    { "`[",			START_BUFFER },

    { "&&",			AND },
    { "==",			EQ },
    { "!=",			NE },
    { ">=",			GE }
#if 0
    ,
    { ">>",			SR },
    { "<<",			SL }
#endif
};

static struct {
    int start;
    int num;
} starting[128];

extern Pile *compiler_pile;		/* For allocating strings. */

void init_token(void)
{
    int i, c;

    for (i = 0; i < 128; i++)
	starting[i].start = -1;

    i = 0;
    while (i < NUM_RESERVED_WORDS) {
	c = SUBSCRIPT(*reserved_words[i].word);
	starting[c].start = i;
	starting[c].num = 1;
	for (i++; i < NUM_RESERVED_WORDS && *reserved_words[i].word == c; i++)
	    starting[c].num++;
    }
}

void lex_start(list_t * code_list) {
    code = code_list;
    cur_line = cur_pos = 0;
}

/* Returns if s can be parsed as an identifier. */
int is_valid_ident(char *s)
{
    while (*s) {
	if (!isalnum(*s) && *s != '_')
	    return 0;
	s++;
    }
    return 1;
}

int yylex(void)
{
    data_t *d = (data_t *)0;
    string_t *line;
    char *s = NULL, *word;
    int len = 0, i, j, start, type;

    /* Find the beginning of the next token. */
    while (cur_line < list_length(code)) {
	/* Fetch text and length of current line. */
	d = list_elem(code, cur_line);
	line = d->u.str;
	s = string_chars(line);
	len = string_length(line);

	/* Scan over line for a non-space character. */
	while (cur_pos < len && isspace(s[cur_pos]))
	    cur_pos++;

	/* If we didn't hit the end, return the character we stopped at. */
	if (cur_pos < len)
	    break;

	/* Go on to the next line. */
	cur_line++;
	cur_pos = 0;
	d = (data_t *)0;
    }
    if (!d) {
	return 0;
    } else {
	s += cur_pos;
	len -= cur_pos;
    }

    /* Check if it's a reserved word. */
    start = starting[SUBSCRIPT(*s)].start;
    if (start != -1) {
	for (i = start; i < start + starting[SUBSCRIPT(*s)].num; i++) {
	    /* Compare remaining letters of word against s. */
	    word = reserved_words[i].word;
	    for (j = 1; j < len && word[j]; j++) {
		if (s[j] != word[j])
		    break;
	    }

	    /* Comparison fails if we didn't match all the characters in word,
	     * or if word is an identifier and the next character in s isn't
	     * punctuation. */
	    if (word[j])
		continue;
	    if (isalpha(*s) && j < len && (isalnum(s[j]) || s[j] == '_'))
		continue;

	    cur_pos += j;
	    return reserved_words[i].token;
	}
    }

    /* Check if it's an identifier. */
    if (isalpha(*s) || *s == '_') {
	yylval.s = identifier_token(s, len, &i);
	cur_pos += i;
	return IDENT;
    }

    /* Check if it's a number. */
    if (isdigit(*s)) {
	/* Convert the string to a number. */
	yylval.num = 0;
	while (len && isdigit(*s)) {
	    yylval.num = yylval.num * 10 + (*s - '0');
	    s++, cur_pos++, len--;
	}

        if (len && *s!='.' && *s!='e')
	    return INTEGER;

	{
	    float f=yylval.num;

	    if (*s=='.') {
	        float muly=1;

	        s++, cur_pos++, len--;
		while (len && isdigit(*s)) {
		    muly/=10; f+=(*s - '0')*muly;
		    s++, cur_pos++, len--;
		}
	    }

	    if (len && *s=='e') {
		int esign=0, evalue=0;

	        s++, cur_pos++, len--;
		if (len && *s=='-') {
		    esign=1;
		    s++, cur_pos++, len--;
		}
		else if (len && *s=='+') {
		    esign=0;
		    s++, cur_pos++, len--;
		}
		while (len && isdigit(*s)) {
		    evalue=evalue * 10 + (*s - '0');
		    s++, cur_pos++, len--;
		}
		if (esign) evalue=-evalue;
		if (evalue>0)
		     while (evalue--) f*=10;
                else
		     while (evalue++) f*=10;
	    }
	    yylval.fnum=f;
	    return FLOAT;
	}	
    }

    /* Check if it's a string. */
    if (*s == '"') {
	yylval.s = string_token(s, len, &i);
	cur_pos += i;
	return STRING;
    }

    /* Check if it's an object literal, symbol, or error code. */
    if ((*s == '$' || *s == '\'' || *s == '~')) {
	type = ((*s == '$') ? NAME : ((*s == '\'') ? SYMBOL : ERROR));
	if (len > 1 && s[1] == '"') {
	    yylval.s = string_token(s + 1, len - 1, &i);
	    cur_pos += i + 1;
	    return type;
	} else if (isalnum(s[1]) || s[1] == '_') {
	    yylval.s = identifier_token(s + 1, len - 1, &i);
	    cur_pos += i + 1;
	    return type;
	}
    }

    /* Check if it's a comment. */
    if (len >= 2 && *s == '/' && s[1] == '/') {
	/* Copy in text after //, and move to next line. */
	yylval.s = PMALLOC(compiler_pile, char, len - 1);
	MEMCPY(yylval.s, s + 2, len - 2);
	yylval.s[len - 2] = 0;
	cur_line++;
	cur_pos = 0;
	return COMMENT;
    }

    /* Check if it's a dbref. */
    if (len >= 2 && *s == '#' && isdigit(s[1])) {
	/* Convert the string to a number. */
	s++, cur_pos++, len--;
	yylval.num = 0;
	while (len && isdigit(*s)) {
	    yylval.num = yylval.num * 10 + (*s - '0');
	    s++, cur_pos++, len--;
	}
	return DBREF;
    }

    /* None of the above. */
    cur_pos++;
    return *s;
}

int cur_lineno(void)
{
    return cur_line + 1;
}

internal char *string_token(char * s, int len, int *token_len)
{
    int count = 0, i;
    char *p, *q;

#if 0
    /* Count characters in string. */
    for (i = 1; i < len && s[i] != '"'; i++) {
	if (s[i] == '\\' && i < len - 1)
	    i++;
	count++;
    }

    /* Allocate space and copy. */
    q = p = PMALLOC(compiler_pile, char, count + 1);
    for (i = 1; i < len && s[i] != '"'; i++) {
	if (s[i] == '\\' && i < len - 1)
	    i++;
	*q++ = s[i];
    }
    *q = 0;
#else
    /* Count the length */
    for (i = 1; i < len && s[i] != '"'; i++) {
        if (s[i] == '\\' && i < len -1 && (s[i+1] == '"' || s[i+1] == '\\'))
	    i++;
	count++;
    }

    /* Allocate space and copy. */
    q = p = PMALLOC(compiler_pile, char, count + 1);
    for (i = 1; i < len && s[i] != '"'; i++) {
	if (s[i] == '\\' && i < len - 1 && (s[i+1] == '"' || s[i+1] == '\\'))
	    i++;
	*q++ = s[i];
    }
    *q = 0;
#endif

    *token_len = (i == len) ? i : i + 1;
    return p;
}

/* Assumption: isalpha(*s) || *s == '_'. */
internal char *identifier_token(char *s, int len, int *token_len)
{
    int count = 1, i;
    char *p;

    /* Count characters in identifier. */
    for (i = 1; i < len && (isalnum(s[i]) || s[i] == '_'); i++)
	 count++;

    /* Allocate space and copy. */
    p = PMALLOC(compiler_pile, char, count + 1);
    MEMCPY(p, s, count);
    p[count] = 0;

    *token_len = count;
    return p;
}