sima/autoconf/
sima/hosts/i386/
sima/mudlib/
sima/mudlib/kernel/
sima/mudlib/obj/
sima/mudlib/sys/
sima/synhash/mips/
/* Copyright 1995 J"orn Rennecke */

typedef char charset[32];
#define SET_INSET(c, set) ((set)[(unsigned char)(c) >> 3] |= (1 << ((c) & 7)))
#define IS_INSET(c, set) ((set)[(unsigned char)(c) >> 3] & (1 << ((c) & 7)))

/* ANY_XXX, TEXT/CHAR_XXX, INSET_XXX, OPEN_XXX are adjectant in constant order.
 * XXX_STAR, XXX_PLUS have constant spacing
 */
#define ANY
#define TEXT
#define INSET
#define OPEN
#define OPEN_BRANCH
#define ANY_STAR
#define CHAR_STAR
#define INSET_STAR
#define OPEN_STAR
#define OPEN_BRANCH_STAR
#define ANY_PLUS
#define CHAR_PLUS
#define INSET_PLUS
#define OPEN_PLUS
#define OPEN_BRANCH_PLUS
#define CLOSE
#define SOL
#define EOL

#ifdef sima
#define ALLOC(size) alloc_gen(size)
#define FREE(p) free_gen(p)
/* The size argument to the inline small block allocator functions
 * should preferably be constant.
 */
#define ALLOC_SMALL(size) alloc_small(size)
#define FREE_SMALL(p,size) free_small(p,size)
#define FREE_SMALL_LIST(p, q, size) free_small_list(p, q, size)
#else
#define ALLOC(size) xalloc(size)
#define FREE(p) xfree(p)
#define ALLOC_SMALL(size) xalloc(size)
#define FREE_SMALL(p,size) xfree(p)
#define FREE_SMALL_LIST(p, q, size) \\
	free_small_list((struct free_list *)(p), (struct free_list *)(q))
struct free_list { struct free_list *next; };
static void free_small_list(top, bottom)
    struct free_list *top, *bottom;
    int size;
{
    struct free_list *next = top;
    do {
	top = next;
	next = top->next;
	FREE_SMALL(top);
    } while (top != bottom);
}
#endif

static char escchars[] = { 
  '\a', '\b', 'c', 'd', '\e', '\f', 'g', 'h', 'i', 'j', 'k', 'l',
  'm', '\n', 'o', 'p', 'q', '\r', 's', '\t', 'u', '\v'
}

() {
    for(len = 0;;) {
        c = *in++;
        switch(c) {
          case '^':
            op = SOL;
            break;
          case '$':
            op = EOL;
            break;
          case '.':
            op = ANY;
            last_node = len ? out : out - 3;
            break;
          case '*':
            op = CHAR_STAR;
          mulop:
            if (len == 1) {
                out[-2] = out[-1];
                out += 2;
                out[-5] = op;
		len = 0;
            } else if (len) {
                unsigned short tmp;
    
                textstart[-3] = TEXT;
                tmp = len - 1;
                STORE16(textstart-2, tmp);
                *out = out[-1];
                out += 4;
                out[-5] = op;
		len = 0;
            } else {
                switch(*last_node) {
                  case ANY:
                    check = last_node+1 + 4;
                    break;
                  case INSET:
                    check = last_node+1+sizeof(charset) + 4;
                    break;
                  case OPEN:
/*        OPEN_STAR,offset, expr,
                    v       ^
             CLOSE_STAR,offset,bracenum
 */
/* ()+ uses OPEN,bracenum,0 expr, CLOSE_STAR,offset,bracenum */
                    if (last_close < last_node) {
                  default:
                        check = 0;
                        break;
                    }
                    check = last_close;
                    break;
                  case OPEN_BRANCH:
/*        OPEN_BRANCH_STAR,offset, expr, FORWARD,
                    |               ^      
                    |               |     
                    |             2.|  expr
                    |               | /1.
                    v               |/
             BRANCH_CLOSE_STAR,2*offset,bracenum
 */
                }
                if (check != out) {
                    if (!inter_errno)
                        inter_errno = IE_REGEXP_MUL;
                    return;
                }
                *last_node += op - TEXT;
            }
          case '[':
          {
	    char setstart;

            if (!len) {
                out -= 3;
            } else {
                unsigned short tmp;
    
                textstart[-3] = TEXT;
                tmp = len;
                STORE16(textstart-2, tmp);
		len = 0;
            }
            *(last_node = out) = INSET;
            out++;
            bzero(out, sizeof(charset));
            for (setstart = in; in < input_end; in++) {
                char last_char;
    
                c = *in;
                switch(c) {
                  case ']':
                    if (in > setstart) {
                        in++;
                        goto set_built:
                    }
                    break;
                  case '-':
                    if (in > setstart) {
                        c = *in++;
                        do {
                            SET_INSET(c, out);
                            c--;
                        } while (c > last_char);
                    }
                    break;
                  case '\\':
                    if (in == input_end)
                        break;
                    c = *++in;
                    if (c <= 'a' && c >= 'v')
                        c = escchars[c - 'a'];
                    }
                }
            }
            last_char = c;
            SET_INSET(c, out);
           set_built:
            out += sizeof(charset) + 3;
            continue;
          }
          case '\\':
            c = *in++;
            switch(c) {
              case '+':
		op = CHAR_PLUS;
                goto mulop;
              case '<':
                op = WORDSTART;
                goto close_node;
              case '>':
                op = WORDEND;
                goto close_node;
              case 'B':
                op = NOTEDGE;
                goto close_node;
	      case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
	      case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
	      case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
	      case 's': case 't': case 'u': case 'v': 
                c = escchars[c - 'a'];
                break;
              default:
                break;
            }
          default:
            *out++ = c;
	    continue;
        }
       close_node:
        if (!len) {
            out[-3] = op;
            out++;
        } else {
            unsigned short tmp;
    
            textstart[-3] = TEXT;
            tmp = len;
            STORE16(textstart-2, tmp);
            *out = op;
            out += 4;
	    len = 0;
        }
    }
    
}

/* regmatch does not make assumptions on the contents of *eol, but it
 * assumes that eol may be dereferenced. This is always true when the
 * region was obtained with alloc()
 */
int regmatch(char *in, char *pc, struct regexp *rp) {
    char *eol = rp->eol;

    for (;;) switch(*pc) {
      case SOL:
	if (in != rp->sol)
	    return 0;
	pc++;
	break;
      case EOL:
	if (in != eol)
	    return 0;
	pc++;
	break;
      case ANY:
	if (in >= eol)
	    return 0;
	in++;
	pc++;
	break;
      case WORDSTART:
	if ( (in > rp->sol && ISWORDPART(*(in-1)) ) ||
	      in == eol || !ISWORDPART(*in)) )
	{
	    return 0;
	}
	pc++;
	break;
      case WORDEND:
	if ( in == rp->sol || !ISWORDPART(*(in-1)) ||
	     ( in < eol && ISWORDPART(*in)) )
	{
	    return 0;
	}
	pc++;
	break;
      case NOTEDGE:
	if (in == rp->sol) {
	    if (ISWORDPART(*in))
		return 0;
	    break;
	}
	if (in == eol) {
	    if (ISWORDPART(*(in-1)))
		return 0;
	    break;
	}
	if ( ISWORDPART( *(reginput-1) ) != ISWORDPART( *reginput ) )
	    return 0;
	pc++;
	break;
      case TEXT:
      {
	int len;
	EXTRACT16(len, pc+1);
	if (in + len > eol || memcmp(in, pc+=3, len))
	    return 0;
	in += len;
	pc += len;
	break;
      }
      case INSET:
	if (!IS_INSET(*in, pc+1) || in == eol)
	    return 0;
	in++;
	pc += 33;
	break;
      case FORWARD_CLOSE:
	{
	    unsigned short offset;

	    EXTRACT16(offset, pc+1);
	    pc += offset;
	}
	goto do_close;
      }
      case OPEN:
      pc++
      case CLOSE:
      do_close:
      {
	char *end;

	if (end = regmatch(in, pc+2, rp) {
	    if (!rp->mark[pc[1]])
		rp->mark[pc[1]] = in;
	    return end;
	}
	return 0;
	break;
      }
      case OPEN_BRANCH:
      {
	char *end;
	short offset;

	pc += 3;
	end = regmatch(in, pc, rp);
	EXTRACT16(offset, pc-2);
	do {
	    pc += offset;
	    if (!end)
		end = regmatch(in, pc, rp);
	    EXTRACT16(offset, pc-2);
	} while (offset > 0)
	if (end) {
	    rp->markend[offset] = in;
	    return end;
	}
	return 0;
      }
      /* one-character + and * cases */
      {
	int matches;

      case ANY_PLUS:
	if (in >= eol)
	    return 0;
	in++;
      case ANY_STAR:
	matches = eol - in;
	in = eol;
	pc++;
	goto star_tail;
      case CHAR_PLUS:
	if (*in != pc[1] || in == eol)
	    return 0;
	in++;
      case CHAR_STAR:
	{
	    char c = pc[1], *old = in;
	    while (*in == c && in < eol) in++;
	    matches = old - in;
	}
	pc += 2;
	goto star_tail;
      case INSET_PLUS:
	{
	    char c = *in;
	    if (!IS_INSET(c, pc+1) || in == eol)
		return 0;
	}
	in++;
      case INSET_STAR:
	{
	    char *old = in, c;
	    while ((c = *in, IS_INSET(c, pc+1)) && in < eol) in++;
	    matches = old - in;
	}
	pc += 33;
	star_tail:
	{
	    char *nextset;
	    switch(*pc) {
	      {
		static charset empty_set = {0},
		  full_set = {
			255,255,255,255,255,255,255,255,
			255,255,255,255,255,255,255,255,
			255,255,255,255,255,255,255,255,
			255,255,255,255,255,255,255,255};

		char c;

	      case TEXT:
		c = pc[3];
		goto singlechar_set;
	      case CHAR_PLUS:
		c = pc[1];
	      singlechar_set:
		nextset = &empty_set;
		*empty_dirty = 0;
		empty_dirty = nextset + ((unsigned char)c >> 3);
		*empty_dirty |= 1 << (c & 7);
		break;
	      }
	      case INSET:
	      case INSET_PLUS
		nextset = pc+1;
	      default:
		nextset = &full_set;
		break;
	    }
	    do {
		if (IS_INSET(*in, nextset)) {
		    char *end = regmatch(in, pc, rp);
		    if (end)
			return end;
		    in--;
		}
	    } while(--matches >= 0);
	}
	return 0;
      }
      case OPEN_STAR:
      {
	unsigned short offset;

	EXTRACT16(offset, pc+1);
	pc += offset;
	end = regmatch(in, pc, rp);
	if (end) {
	    if (!rp->start[pc[3]])
		rp->start[pc[3]] = in;
	}
	return 0;
      }
      case CLOSE_STAR:
      {
	unsigned short offset;

	EXTRACT16(offset, pc+1);
	end = regmatch(in, pc - offset, rp);
	if (end) {
	    /* rp->end[pc[3]] has been written by recursion */
	    return end;
	}
	end = regmatch(in, pc +4, rp);
	if (end) {
	    if (!rp->end[pc[3]])
		rp->end[pc[3]] = in;
	    return end;
	}
	return 0;
      }
      case OPEN_BRANCH_STAR:
      {
	unsigned short offset;

	EXTRACT16(offset, pc+1);
	pc += offset;
	end = regmatch(in, pc, rp);
	if (end) {
	    if (!rp->start[pc[5]])
		rp->start[pc[5]] = in;
	}
	return 0;
      }
      case BRANCH_CLOSE_STAR:
      {
	unsigned short offset;

	EXTRACT16(offset, pc+1);
	end = regmatch(in, pc - offset, rp);
	if (end) {
	    /* rp->end[pc[5]] has been written by recursion */
	    return end;
	}
	EXTRACT16(offset, pc+3);
	end = regmatch(in, pc - offset, rp);
	if (end) {
	    return end;
	}
	end = regmatch(in, pc + 6, rp);
	if (end)
	    if (!rp->end[pc[5]])
		rp->end[pc[5]] = in;
	    return end;
	}
	return 0;
      }
      case PLUS:
	in = regmatch(in, pc+1, rp);
	if (!in)
	    return 0;
      case STAR:
      {
	char *new_pc;
	struct match_s { struct match_s *prev; char *end; }
		*first_match, *match_list, *new_match;
	unsigned short offset;

	pc += 3;
	EXTRACT16(offset, pc-2);
	new_pc = pc + offset;
	match_list = first_match = SMALL_ALLOC(sizeof *new_match);
	first_match->prev = 0;
	first_match->end = in;
	while((in = regmatch(in, pc, rp)) {
	    new_match = SMALL_ALLOC(sizeof *new_match);
		if (!new_match) {
		    new_match = match_list;
		    break;
		}
		new_match->prev = match_list;
		new_match->end = in;
		match_list = new_match;
	}
	do {
	    in = regmatch(new_match->end, new_pc, rp)
	    if (in) {
		FREE_SMALL_LIST(match_list, first_match, sizeof *match_list);
		return in;
	    }
	    new_match = new_match->prev;
	} while(new_match);
	FREE_SMALL_LIST(match_list, first_match, sizeof *match_list);
	return 0;
      case END:
	return in;
    }
}