/* Copyright 1989, 1990 by James Aspnes, David Applegate, and Bennet Yee */ /* See the file COPYING for distribution information */ %{ #include "os.h" #include <setjmp.h> #include "config.h" #include "bytecode.h" #include "db.h" #define BREAK_TMP 0 /* ridiculous jump offset */ /* types */ #define T_VOID 0 #define T_STRING 1 #define T_NUM 2 #define T_OBJ 3 #define T_BOOL 4 #define T_ACTION 5 /* action names */ #define T_SET 6 #define T_TIME 7 typedef struct code { byte *code; int len; int type; struct code *next; /* next in list */ } *CodeP; #define YYSTYPE CodeP /* Note: if you make YYSTYPE = Code * */ /* some versions of yacc will put in an */ /* incorrect Code * yylval, yyval; def */ #define COERCE(arg, type) \ if((arg = coerce(arg, type)) == 0) { \ yyerror("type mismatch"); \ YYABORT; \ } \ static void yyerror(const char *); static int yylex(void); static byte *result; /* holds final result of parse */ static CodeP empty_code(void); /* creates new code blob of VOID type */ static CodeP add_after(CodeP, byte); /* adds op to end of code */ /* doesn't change type */ static CodeP merge(CodeP, CodeP); /* merges two code blobs */ /* type of result is type of second argument */ static CodeP make_if(CodeP test, CodeP thencase, CodeP elsecase); static CodeP make_op(byte op); static CodeP make_immediate(byte op, datum value); static CodeP make_binop(byte op, CodeP arg1, CodeP arg2, int t1, int t2, int tresult); static CodeP make_compare(byte op, CodeP arg1, CodeP arg2); static CodeP make_set_do(CodeP expr, CodeP block); static CodeP make_matching_set_do(CodeP expr, CodeP string_expr, CodeP block); static CodeP coerce(CodeP, int type); static jmp_buf parse_error; %} %token IF %token THEN %token ELSE %token ELSEIF %token FI %token IN %token DO %token END %token MOVE %token TELL %token TO %token BREAK %token TEXT %token MTEXT %token RANDOM %token SET %token ME %token YOU %token NEXT %token LOCATION %token BAD_TOKEN %token AND %token OR %token TRUE %token FALSE %token MATCHING %token ADD %token TAKE %token FROM %token COUNT %token ID %token TIME %token DELAY %token NULL_OBJECT %token NULL_STRING %token EXIT /* these have actual values attached to them */ %token VAR /* normal typed variable */ %token SETVAR /* set variable */ %token VAL /* literal value */ %left OR %left AND %nonassoc '=' NEQ %nonassoc CONTAINS MATCHES '<' '>' LEQ GEQ %left '|' %left '+' '-' %left '*' '/' MOD %left UMINUS %left CREATE DESTROY SYSCALL CLEAR %left '!' %left '.' %% program: block { $1 = add_after($1, RETURN_OP); result = $1->code; $1->code = 0; } block: /* empty */ { $$ = empty_code(); } | block statement { $$ = merge($1, $2); } statement: if_statement | do_statement | break_statement | action_expr { $$ = add_after($1, DROP_OP); } /* ( -- ) */ if_statement: IF if_tail { $$ = $2; } ; /* ( -- ) */ if_tail: expr THEN block FI { COERCE($1, T_BOOL); $$ = make_if($1, $3, empty_code()); } | expr THEN block ELSE block FI { COERCE($1, T_BOOL); $$ = make_if($1, $3, $5); } | expr THEN block ELSEIF if_tail { COERCE($1, T_BOOL); $$ = make_if($1, $3, $5); } ; /* ( -- ) */ do_statement: /* Note that make_do can fail, if block contains another loop */ IN coerced_setvar DO block END { if(($$ = make_set_do($2, $4)) == 0) YYABORT; } | IN coerced_setvar MATCHING expr DO block END { COERCE($4, T_STRING); if(($$ = make_matching_set_do($2, $4, $6)) == 0) YYABORT; } ; /* ( -- ) */ break_statement: BREAK { $$ = make_immediate(JUMP_OP, BREAK_TMP); } | EXIT { $$ = make_op(EXIT_OP); } ; /* ( -- ) */ assign_statement: SET var TO expr { switch($2->type) { case T_SET: yyerror("set variable in lhs of assignment"); YYABORT; case T_ACTION: COERCE($4, T_STRING); $$ = add_after(merge($2, $4), SET_ACTION_OP); break; case T_STRING: COERCE($4, T_STRING); $$ = add_after(merge($2, $4), SET_STRING_OP); break; default: COERCE($4, $2->type); $$ = add_after(merge($2, $4), SET_OP); break; } $$->type = T_BOOL; } | CLEAR var { switch($2->type) { case T_ACTION: $$ = add_after($2, UNSET_ACTION_OP); break; default: $$ = add_after($2, UNSET_OP); break; } $$->type = T_BOOL; } | CLEAR setvar { $$ = add_after($2, CLEAR_SET_OP); break; } | ADD expr TO setvar { COERCE($2, T_OBJ); $$ = add_after(merge($4, $2), ADD_SET_OP); $$->type = T_BOOL; } | TAKE expr FROM setvar { COERCE($2, T_OBJ); if($4->type != T_SET) yyerror("can't take from a non-set"); $$ = add_after(merge($4, $2), DEL_SET_OP); $$->type = T_BOOL; } ; /* ( -- result ) */ expr: simple_expr | action_expr | expr '=' expr { COERCE($3, $1->type); $$ = add_after(merge($1, $3), EQ_OP); $$->type = T_BOOL; } | expr NEQ expr { COERCE($3, $1->type); $$ = add_after(merge($1, $3), NEQ_OP); $$->type = T_BOOL; } | '!' expr { COERCE($2, T_BOOL); $$ = add_after($2, NOT_OP); } | expr OR expr { COERCE($1, T_BOOL); COERCE($3, T_BOOL); $$ = make_if($1, make_op(TRUE_OP), $3); $$->type = T_BOOL; } | expr AND expr { COERCE($1, T_BOOL); COERCE($3, T_BOOL); $$ = make_if($1, $3, make_op(FALSE_OP)); $$->type = T_BOOL; } | expr '+' expr { if($1->type == T_TIME) { /* time + num = time */ $$ = make_binop(PLUS_OP, $1, $3, T_TIME, T_NUM, T_TIME); } else if($3->type == T_TIME) { /* num + time = time */ $$ = make_binop(PLUS_OP, $1, $3, T_NUM, T_TIME, T_TIME); } else { /* num + num = num */ $$ = make_binop(PLUS_OP, $1, $3, T_NUM, T_NUM, T_NUM); } } | expr '-' expr { if($3->type == T_TIME) { /* time - time = num */ $$ = make_binop(MINUS_OP, $1, $3, T_TIME, T_TIME, T_NUM); } else if($1->type == T_TIME) { /* time - num = time */ $$ = make_binop(MINUS_OP, $1, $3, T_TIME, T_NUM, T_TIME); } else { /* num - num = num */ $$ = make_binop(MINUS_OP, $1, $3, T_NUM, T_NUM, T_NUM); } } | expr MOD expr { /* sleazy instant coercion */ if($1->type == T_TIME) $1->type = T_NUM; $$ = make_binop(MOD_OP, $1, $3, T_NUM, T_NUM, T_NUM); } | expr '*' expr { $$ = make_binop(TIMES_OP, $1, $3, T_NUM, T_NUM, T_NUM); } | expr '/' expr { $$ = make_binop(DIV_OP, $1, $3, T_NUM, T_NUM, T_NUM); } | expr '<' expr { $$ = make_compare(LT_OP, $1, $3); } | expr '>' expr { $$ = make_compare(GT_OP, $1, $3); } | expr LEQ expr { $$ = make_compare(LEQ_OP, $1, $3); } | expr GEQ expr { $$ = make_compare(GEQ_OP, $1, $3); } | '-' expr %prec UMINUS { COERCE($2, T_NUM); $$ = add_after($2, NEGATE_OP); } | '(' expr ')' { $$ = $2; } | expr MATCHES expr { $$ = make_binop(MATCHES_OP, $1, $3, T_STRING, T_OBJ, T_BOOL); } | expr CONTAINS expr { /* location test */ COERCE($1, T_OBJ); COERCE($3, T_OBJ); $$ = add_after(merge(add_after($3, LOC_OP), $1), EQ_OP); $$->type = T_BOOL; } | setvar CONTAINS expr { COERCE($3, T_OBJ); $$ = add_after(merge($1, $3), CONTAINS_OP); $$->type = T_BOOL; } | expr '|' expr { COERCE($1, T_STRING); COERCE($3, T_STRING); $$ = add_after(merge($1, $3), CONCAT_OP); $$->type = T_STRING; } | CREATE { $$ = make_op(CREATE_OP); $$->type = T_OBJ; } ; /* ( -- result ) */ /* these are things that can be on the lhs of a '.' */ simple_expr: VAL | var { switch($1->type) { case T_ACTION: $$ = add_after($1, LOOKUP_ACTION_OP); $$->type = T_STRING; break; default: $$ = add_after($1, LOOKUP_OP); break; } } | ME { $$ = make_op(ME_OP); $$->type = T_OBJ; } | YOU { $$ = make_op(YOU_OP); $$->type = T_OBJ; } | NEXT { $$ = make_op(NEXT_OP); $$->type = T_OBJ; } | RANDOM { $$ = make_op(RANDOM_OP); $$->type = T_NUM; } | TEXT { $$ = make_op(TEXT_OP); $$->type = T_STRING; } | MTEXT { $$ = make_op(MTEXT_OP); $$->type = T_STRING; } | TRUE { $$ = make_op(TRUE_OP); $$->type = T_BOOL; } | FALSE { $$ = make_op(FALSE_OP); $$->type = T_BOOL; } | NULL_OBJECT { $$ = make_op(FALSE_OP); $$->type = T_OBJ; } | NULL_STRING { $$ = make_op(FALSE_OP); $$->type = T_STRING; } | TIME { $$ = make_op(TIME_OP); $$->type = T_TIME; } | LOCATION { $$ = add_after(make_op(ME_OP), LOC_OP); $$->type = T_OBJ; } | simple_expr '.' LOCATION { COERCE($1, T_OBJ); $$ = add_after($1, LOC_OP); } | ID { $$ = make_op(ME_OP); $$->type = T_NUM; } | simple_expr '.' ID { /* COERCE($1, T_OBJ); */ $$->type = T_NUM; } | COUNT { $$ = add_after(merge(make_op(ME_OP), make_immediate(LITERAL_OP, CONTENTS_NAME)), COUNT_OP); $$->type = T_NUM; } | simple_expr '.' COUNT { COERCE($1, T_OBJ); $$ = add_after(merge($1, make_immediate(LITERAL_OP, CONTENTS_NAME)), COUNT_OP); $$->type = T_NUM; } | setvar '.' COUNT { $$ = add_after($1, COUNT_OP); $$->type = T_NUM; } ; /* ( -- bool ) */ action_expr: MOVE expr TO expr { $$ = make_binop(MOVE_OP, $2, $4, T_OBJ, T_OBJ, T_BOOL); } | assign_statement | TELL string_list TO expr { COERCE($4, T_OBJ); $$ = add_after(merge(merge($4, make_op(TELL_INIT_OP)), $2), TELL_OP); $$->type = T_BOOL; } | DESTROY expr { COERCE($2, T_OBJ); $$ = add_after($2, DESTROY_OP); $$->type = T_BOOL; } | SYSCALL expr { COERCE($2, T_NUM); $$ = add_after($2, SYSCALL_OP); $$->type = T_BOOL; } | DELAY expr { switch($2->type) { case T_TIME: /* absolute delay */ $$ = add_after($2, DELAY_OP); $$->type = T_BOOL; break; case T_NUM: /* relative delay */ $$ = add_after(add_after(add_after($2, TIME_OP), PLUS_OP), DELAY_OP); $$->type = T_BOOL; break; default: yyerror("bad argument to delay"); YYABORT; } } ; /* ( -- ) */ string_list: string_list_elt | string_list string_list_elt { $$ = merge($1, $2); } ; /* ( -- ) */ string_list_elt: expr { switch($1->type) { case T_NUM: $$ = add_after($1, ADD_NUM_BUF_OP); break; case T_STRING: $$ = add_after($1, ADD_BUF_OP); break; case T_TIME: $$ = add_after($1, ADD_TIME_BUF_OP); break; default: /* you lose */ yyerror("illegal type in string list"); YYABORT; } } ; /* ( -- object varname ) */ var: VAR { $$ = merge(make_op(ME_OP), $1); $$->type = $1->type; } | simple_expr '.' VAR { COERCE($1, T_OBJ); $$ = merge($1, $3); $$->type = $3->type; } ; coerced_setvar: setvar | simple_expr { COERCE($1, T_OBJ); $$ = merge($1, make_immediate(LITERAL_OP, CONTENTS_NAME)); $$->type = T_SET; } ; setvar: SETVAR { $$ = merge(make_op(ME_OP), $1); $$->type = T_SET; } | simple_expr '.' SETVAR { COERCE($1, T_OBJ); $$ = merge($1, $3); $$->type = T_SET; } ; %% static CodeP code_list; /* list of all code objects */ static CodeP new_code(int len) { CodeP c; c = (CodeP) malloc(sizeof(struct code)); c->len = len; c->type = T_VOID; c->next = code_list; code_list = c; if(len < 1) len = 1; c->code = (byte *) malloc(len * sizeof(byte)); return c; } static void free_codes(void) { CodeP next; while(code_list) { next = code_list->next; if(code_list->code) free((void *) code_list->code); free((void *) code_list); code_list = next; } } static CodeP empty_code(void) { return new_code(0); } static CodeP add_after(CodeP old, byte op) { old->code = (byte *) realloc(old->code, sizeof(byte) * ++old->len); old->code[old->len - 1] = op; return old; } static CodeP make_op(byte op) { CodeP c; c = new_code(1); c->code[0] = op; return c; } static CodeP make_immediate(byte op, datum value) { CodeP c; c = new_code(1 + ARG_WIDTH); c->code[0] = op; ARG_STORE(c->code + 1, value); return c; } static CodeP merge(CodeP c1, CodeP c2) { byte *to; int i; c1->code = (byte *) realloc((void *) c1->code, sizeof(byte) * (c1->len + c2->len)); for(to = c1->code + c1->len, i = 0; i < c2->len; i++) { to[i] = c2->code[i]; } c1->len += c2->len; c1->type = c2->type; return c1; } static CodeP make_compare(byte op, CodeP arg1, CodeP arg2) { if(arg1->type == T_TIME) { return make_binop(op, arg1, arg2, T_TIME, T_TIME, T_BOOL); } else { return make_binop(op, arg1, arg2, T_NUM, T_NUM, T_BOOL); } } static CodeP make_binop(byte op, CodeP arg1, CodeP arg2, int t1, int t2, int tresult) { CodeP c; if((arg1 = coerce(arg1, t1)) == 0 || (arg2 = coerce(arg2, t2)) == 0) { yyerror("bad argument type to binary operation"); longjmp(parse_error, 1); return 0; /* not really reached */ } else { c = add_after(merge(arg1, arg2), op); c->type = tresult; return c; } } /* expr JUMP_IF(then) f JUMP(end) [then] t [end] */ static CodeP make_if(CodeP test, CodeP t, CodeP f) { CodeP c; c = merge(test, make_immediate(JUMP_IF_OP, CALCULATE_OFFSET(f->len + 1 + ARG_WIDTH))); c = merge(c, f); c = merge(c, make_immediate(JUMP_OP, CALCULATE_OFFSET(t->len))); c = merge(c, t); return c; } /* [loop] NEXT NOT JUMP_IF(end) block DO_NEXT JUMP(loop) [end] */ /* JUMP(BREAK_TMP) => JUMP(end) inside block */ /* returns error if block contains DO_INIT */ static CodeP make_do_body(CodeP block, byte next_op) { CodeP c; int scan; /* build the loop first */ c = add_after(make_op(NEXT_OP), NOT_OP); c = merge(c, make_immediate(JUMP_IF_OP, CALCULATE_OFFSET(block->len + 2 + ARG_WIDTH))); c = merge(c, block); c = add_after(c, next_op); c = merge(c, make_immediate(JUMP_BACK_OP, CALCULATE_REVERSE_OFFSET(c->len))); /* scan for breaks */ /* and erroneous DO_INIT's! */ for(scan = 0; scan < c->len; scan++) { switch(c->code[scan]) { case LITERAL_OP: case JUMP_BACK_OP: case JUMP_IF_OP: scan += ARG_WIDTH; break; case JUMP_OP: if(ARG_VALUE(c->code + scan + 1) == BREAK_TMP) { /* replace it with offset to end */ ARG_STORE(c->code + scan + 1, CALCULATE_OFFSET(c->len - (scan + 1 + ARG_WIDTH))); } /* skip arg in any case */ scan += ARG_WIDTH; break; case DO_SET_INIT_OP: case MATCHING_DO_SET_INIT_OP: /* disaster */ yyerror("nested loop"); return 0; default: break; } } return c; } /* expr SET_DO_INIT loop_body */ static CodeP make_set_do(CodeP expr, CodeP block) { /* just add initialization */ return merge(add_after(expr, DO_SET_INIT_OP), make_do_body(block, DO_NEXT_OP)); } /* expr string MATCHING_SET_DO_INIT loop_body */ static CodeP make_matching_set_do(CodeP expr, CodeP string_expr, CodeP block) { /* just add initialization */ return merge(add_after(merge(expr, string_expr), MATCHING_DO_SET_INIT_OP), make_do_body(block, MATCHING_DO_NEXT_OP)); } static CodeP coerce(CodeP c, int type) { if(c->type == type || type == T_BOOL) { c->type = type; return c; } else if(c->type == T_NUM && type == T_STRING) { return add_after(c, NUM_TO_STRING_OP); } else { return 0; /* can't do it */ } } static const char *input_start; static const char *input; char compile_error[MAX_STRLEN]; byte *compile(const char *s) { input_start = input = s; code_list = 0; if(setjmp(parse_error)) { return 0; /* error */ } else if(yyparse()) { return 0; /* error signaled by yyparse() */ } else { return result; } } static void yyerror(const char *s) { free_codes(); sprintf(compile_error, "%s at position %d", s, input - input_start); } static int token_list[RESERVED_TOKENS] = { BAD_TOKEN, IF, THEN, ELSE, FI, IN, DO, END, MOVE, TELL, TO, BREAK, TEXT, RANDOM, SET, ME, YOU, NEXT, LOCATION, MOD, MATCHES, AND, OR, CREATE, TRUE, FALSE, CLEAR, DESTROY, SYSCALL, MATCHING, ADD, TAKE, FROM, COUNT, ELSEIF, CONTAINS, ID, TIME, DELAY, NULL_OBJECT, NULL_STRING, EXIT, MTEXT, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN, BAD_TOKEN }; static void get_symbol(char *buf, int size) { int i; /* got a symbol of some sort */ for(i = 0; isalnum(*input) || *input == '_'; i++) { if(i >= size) { yyerror("symbol too long"); longjmp(parse_error, 1); } buf[i] = *input++; } /* check for empty symbol */ if(i == 0) { yyerror("empty symbol name"); longjmp(parse_error, 1); } /* we're at the end of it */ buf[i] = '\0'; } static int intern_token(const char *buf, int type) { datum s; /* it's either a reserved word or an object var */ s = intern(buf); if(s < RESERVED_TOKENS) { /* do the token lookup */ return token_list[s]; } else { /* treat it as a VAR or a SETVAR */ yylval = make_immediate(LITERAL_OP, s); yylval->type = type; return ((type == T_SET) ? SETVAR : VAR); } } static int parse_token(int type) { char buf[MAX_NAMELEN+2]; /* get that first character into the name */ *buf = *input++; /* get the rest */ get_symbol(buf+1, MAX_NAMELEN); return intern_token(buf, type); } /* allows 1-char object variable names */ /* this is indeed a crock, but it could be much worse */ static int parse_obj_token(void) { char buf[MAX_NAMELEN+2]; get_symbol(buf, MAX_NAMELEN); return intern_token(buf, T_OBJ); } static int parse_action_name(void) { char buf[MAX_NAMELEN+1]; char *p; /* drop the ACTION_MARKER */ input++; /* get the verb */ get_symbol(buf, MAX_NAMELEN); /* maybe get a preposition */ if(*input == LEFT_ACTION || *input == RIGHT_ACTION || *input == DOUBLE_ACTION) { p = buf+strlen(buf); *p++ = *input++; get_symbol(p, MAX_NAMELEN - (p - buf)); } yylval = make_immediate(LITERAL_OP, intern(buf)); yylval->type = T_ACTION; return VAR; } static int parse_giant_string(void) { char buf[MAX_STRLEN+1]; int i; int nest_count; input++; /* eat the '[' */ nest_count = 1; for(i = 0; i < MAX_STRLEN; i++) { switch(*input) { case ']': buf[i] = *input++; if(--nest_count == 0) { /* we're done */ buf[i] = '\0'; yylval = make_immediate(LITERAL_OP, intern(buf)); yylval->type = T_STRING; return VAL; } break; case '[': buf[i] = *input++; nest_count++; break; case '\\': if(nest_count > 1) buf[i++] = *input; input++; buf[i] = *input++; break; case '\"': /* nested normal string */ /* This should be in a separate routine that also handles */ /* the normal string case in yylex(). Someday it will be */ buf[i++] = *input++; for(; i < MAX_STRLEN; i++) { switch(*input) { case '\\': buf[i++] = *input++; buf[i] = *input++; break; case '\"': buf[i] = *input++; goto normal_exit; case '\0': goto panic; default: buf[i] = *input++; } } normal_exit: break; case '\0': goto panic; default: buf[i] = *input++; break; } } panic: yyerror("Runaway string constant"); longjmp(parse_error, 1); return 0; } static int yylex(void) { char buf[MAX_STRLEN+1]; int i; datum s; start_over: /* eat whitespace */ while(*input && isspace(*input)) input++; if(*input == '\0') { return 0; /* end of input */ } else if(isdigit(*input)) { /* it's a numeric constant */ for(s = 0; *input && isdigit(*input); input++) { /* possibly non-portable character hack here */ s = s * 10 + *input - '0'; } yylval = make_immediate(LITERAL_OP, s); yylval->type = T_NUM; return VAL; } else switch(*input) { case '.': case '+': case '-': case '*': case '/': case '=': case '(': case ')': case '|': return *input++; case '<': if(input[1] == '=') { input += 2; return LEQ; } else { return *input++; } case '>': if(input[1] == '=') { input += 2; return GEQ; } else { return *input++; } case '!': if(input[1] == '=') { input += 2; return NEQ; } else { return *input++; } case '{': /* comment */ /* skip to '}' */ input++; while(*input && *input++ != '}'); goto start_over; case '[': /* giant string constant */ return parse_giant_string(); case '\"': /* string constant */ /* skip the first quote */ input++; for(i = 0; i < MAX_STRLEN; i++) { switch(*input) { case '\\': switch(*++input) { case '\\': buf[i] = '\\'; break; case '\"': buf[i] = '\"'; break; case '\0': /* disaster */ goto panic; default: buf[i] = *input; break; } input++; break; case '\"': /* hit the end */ buf[i] = '\0'; input++; yylval = make_immediate(LITERAL_OP, intern(buf)); yylval->type = T_STRING; return VAL; case '\0': goto panic; default: buf[i] = *input++; } } /* overlong string */ panic: yyerror("Runaway string constant"); longjmp(parse_error, 1); break; case STRING_MARKER: return parse_token(T_STRING); case NUM_MARKER: return parse_token(T_NUM); case BOOL_MARKER: return parse_token(T_BOOL); case SET_MARKER: return parse_token(T_SET); case TIME_MARKER: return parse_token(T_TIME); case ACTION_MARKER: return parse_action_name(); default: if(isalpha(*input) || *input == '_') { return parse_obj_token(); } else { yyerror("bad input character"); longjmp(parse_error, 1); } break; } return 0; /* not really reached */ }