/* Primitives Package */ #include "copyright.h" #include "config.h" #include <sys/types.h> #include <stdio.h> #include <time.h> #include <ctype.h> #ifdef WIN32 # define __STDC__ 1 # include "./regex.h" #else # include <regex.h> #endif #include "db.h" #include "tune.h" #include "inst.h" #include "externs.h" #include "match.h" #include "interface.h" #include "params.h" #include "fbstrings.h" #include "interp.h" #define MUF_RE_CACHE_ITEMS 64 static struct inst *oper1, *oper2, *oper3, *oper4; static char buf[BUFFER_LEN]; typedef struct { struct shared_string* pattern; int flags; regex_t re; } muf_re; static muf_re muf_re_cache[MUF_RE_CACHE_ITEMS]; muf_re* muf_re_get(struct shared_string* pattern, int flags, int* err) { int idx = (hash(DoNullInd(pattern), MUF_RE_CACHE_ITEMS) + flags) % MUF_RE_CACHE_ITEMS; muf_re* re = &muf_re_cache[idx]; if (re->pattern) { if ((flags != re->flags) || strcmp(DoNullInd(pattern), DoNullInd(re->pattern))) { regfree(&re->re); if (re->pattern && (--re->pattern->links == 0)) free((void *)re->pattern); } else return re; } if ((*err = regcomp(&re->re, DoNullInd(pattern), flags | REG_EXTENDED)) != 0) { re->pattern = NULL; return NULL; } re->pattern = pattern; re->pattern->links++; re->flags = flags; return re; } const char* muf_re_error(int err) { switch(err) { case REG_NOMATCH: return "No matches"; case REG_BADPAT: return "Invalid regular expression"; case REG_ECOLLATE: return "Invalid collating element referenced"; case REG_ECTYPE: return "Invalid character class type referenced"; case REG_EESCAPE: return "Trailing \\ in pattern"; case REG_ESUBREG: return "Number in \\digit invalid or in error"; case REG_EBRACK: return "[ ] imbalance"; case REG_EPAREN: return "\\( \\) or ( ) imbalance"; case REG_EBRACE: return "{ } imbalance"; case REG_BADBR: return "Content of \\{ \\} invalid"; case REG_ERANGE: return "Invalid endpoint in range expression"; case REG_ESPACE: return "Out of memory"; case REG_BADRPT: return "?, * or + not preceded by valid regular expression"; default: return "Unknown error"; } } void prim_regexp(PRIM_PROTOTYPE) { stk_array* nu_val = 0; stk_array* nu_idx = 0; regmatch_t* matches = 0; muf_re* re; char* text; int flags = 0; int nosubs, err, len, i; CHECKOP(3); oper3 = POP(); /* int:Flags */ oper2 = POP(); /* str:Pattern */ oper1 = POP(); /* str:Text */ if (oper1->type != PROG_STRING) abort_interp("Non-string argument (1)"); if (oper2->type != PROG_STRING) abort_interp("Non-string argument (2)"); if (oper3->type != PROG_INTEGER) abort_interp("Non-integer argument (3)"); if (!oper2->data.string) abort_interp("Empty string argument (2)"); if (oper3->data.number & MUF_RE_ICASE) flags |= REG_ICASE; if ((re = muf_re_get(oper2->data.string, flags, &err)) == NULL) abort_interp(muf_re_error(err)); text = DoNullInd(oper1->data.string); len = strlen(text); nosubs = re->re.re_nsub + 1; if ((matches = (regmatch_t*)malloc(sizeof(regmatch_t) * nosubs)) == NULL) abort_interp("Out of memory"); if ((err = regexec(&re->re, text, nosubs, matches, 0)) != 0) { if (err != REG_NOMATCH) { free(matches); abort_interp(muf_re_error(err)); } if (((nu_val = new_array_packed(0)) == NULL) || ((nu_idx = new_array_packed(0)) == NULL)) { free(matches); if (nu_val != NULL) array_free(nu_val); if (nu_idx != NULL) array_free(nu_idx); abort_interp("Out of memory"); } } else { if (((nu_val = new_array_packed(nosubs)) == NULL) || ((nu_idx = new_array_packed(nosubs)) == NULL)) { free(matches); if (nu_val != NULL) array_free(nu_val); if (nu_idx != NULL) array_free(nu_idx); abort_interp("Out of memory"); } for(i = 0; i < nosubs; i++) { regmatch_t* cm = &matches[i]; struct inst idx, val; stk_array* nu; if ((cm->rm_so >= 0) && (cm->rm_eo >= 0) && (cm->rm_so < len)) snprintf(buf, BUFFER_LEN, "%.*s", (int)(cm->rm_eo - cm->rm_so), &text[cm->rm_so]); else buf[0] = '\0'; idx.type = PROG_INTEGER; idx.data.number = i; val.type = PROG_STRING; val.data.string = alloc_prog_string(buf); array_setitem(&nu_val, &idx, &val); CLEAR(&idx); CLEAR(&val); if ((nu = new_array_packed(2)) == NULL) { free(matches); array_free(nu_val); array_free(nu_idx); abort_interp("Out of memory"); } idx.type = PROG_INTEGER; idx.data.number = 0; val.type = PROG_INTEGER; val.data.number = cm->rm_so + 1; array_setitem(&nu, &idx, &val); CLEAR(&idx); CLEAR(&val); idx.type = PROG_INTEGER; idx.data.number = 1; val.type = PROG_INTEGER; val.data.number = cm->rm_eo - cm->rm_so; array_setitem(&nu, &idx, &val); CLEAR(&idx); CLEAR(&val); idx.type = PROG_INTEGER; idx.data.number = i; val.type = PROG_ARRAY; val.data.array = nu; array_setitem(&nu_idx, &idx, &val); CLEAR(&idx); CLEAR(&val); } } free(matches); CLEAR(oper3); CLEAR(oper2); CLEAR(oper1); PushArrayRaw(nu_val); PushArrayRaw(nu_idx); } void prim_regsub(PRIM_PROTOTYPE) { regmatch_t* matches = 0; int flags = 0; char* write_ptr = buf; int write_left = BUFFER_LEN - 1; muf_re* re; char* text; int nosubs, err, len; CHECKOP(4); oper4 = POP(); /* int:Flags */ oper3 = POP(); /* str:Replace */ oper2 = POP(); /* str:Pattern */ oper1 = POP(); /* str:Text */ if (oper1->type != PROG_STRING) abort_interp("Non-string argument (1)"); if (oper2->type != PROG_STRING) abort_interp("Non-string argument (2)"); if (oper3->type != PROG_STRING) abort_interp("Non-string argument (3)"); if (oper4->type != PROG_INTEGER) abort_interp("Non-integer argument (4)"); if (!oper2->data.string) abort_interp("Empty string argument (2)"); if (oper4->data.number & MUF_RE_ICASE) flags |= REG_ICASE; if ((re = muf_re_get(oper2->data.string, flags, &err)) == NULL) abort_interp(muf_re_error(err)); text = DoNullInd(oper1->data.string); nosubs = re->re.re_nsub + 1; if ((matches = (regmatch_t*)malloc(sizeof(regmatch_t) * nosubs)) == NULL) abort_interp("Out of memory"); while((*text != '\0') && (write_left > 0)) { len = strlen(text); if ((err = regexec(&re->re, text, nosubs, matches, 0)) != 0) { if (err != REG_NOMATCH) { free(matches); abort_interp(muf_re_error(err)); } while((write_left > 0) && (*text != '\0')) { *write_ptr++ = *text++; write_left--; } break; } else { regmatch_t* cm = &matches[0]; char* read_ptr = DoNullInd(oper3->data.string); int soff = cm->rm_so; int count; for(count = cm->rm_so; (write_left > 0) && (*text != '\0') && (count > 0); count--) { *write_ptr++ = *text++; write_left--; } while((write_left > 0) && (*read_ptr != '\0')) { if (*read_ptr == '\\') { if (!isdigit(*(++read_ptr))) { *write_ptr++ = *read_ptr++; write_left--; } else { int idx = (*read_ptr++) - '0'; if ((idx < 0) || (idx >= nosubs)) { free(matches); abort_interp("Invalid \\subexp (3)"); } cm = &matches[idx]; if ((cm->rm_so >= 0) && (cm->rm_eo >= 0) && (cm->rm_so < len)) { char* ptr = &text[cm->rm_so - soff]; count = cm->rm_eo - cm->rm_so; if (count > write_left) { free(matches); abort_interp("Operation would result in overflow"); } for(; (write_left > 0) && (count > 0) && (*ptr != '\0'); count--) { *write_ptr++ = *ptr++; write_left--; } } } } else { *write_ptr++ = *read_ptr++; write_left--; } } cm = &matches[0]; for(count = cm->rm_eo - cm->rm_so; (*text != '\0') && (count > 0); count--) text++; } if ((oper4->data.number & MUF_RE_ALL) == 0) { while((write_left > 0) && (*text != '\0')) { *write_ptr++ = *text++; write_left--; } break; } } free(matches); if (*text != '\0') abort_interp("Operation would result in overflow"); *write_ptr = '\0'; CLEAR(oper4); CLEAR(oper3); CLEAR(oper2); CLEAR(oper1); PushString(buf); }