/* match.c: Routine for matching against a template. */ #define _POSIX_SOURCE #include <stdio.h> #include <string.h> #include "x.tab.h" #include "match.h" #include "memory.h" #include "data.h" #include "cmstring.h" #include "util.h" /* We use MALLOC_DELTA to keep the memory allocated in fields thirty-two bytes * less than a power of two, assuming fields are twelve bytes. */ #define MALLOC_DELTA 3 #define FIELD_STARTING_SIZE 8 typedef struct { char *start; char *end; int strip; /* Strip backslashes? */ } Field; static char *match_coupled_wildcard(char *template, char *s); static char *match_wildcard(char *template, char *s); static char *match_word_pattern(char *template, char *s); static void add_field(char *start, char *end, int strip); static Field *fields; static int field_pos, field_size; void init_match(void) { fields = EMALLOC(Field, FIELD_STARTING_SIZE); field_size = FIELD_STARTING_SIZE; } List *match_template(char *template, char *s) { char *p; int i, j, coupled; List *l; String *str; field_pos = 0; /* Strip leading spaces in template. */ while (*template == ' ') template++; while (*template) { /* Skip over spaces in s. */ while (*s == ' ') s++; /* Is the next template entry a wildcard? */ if (*template == '*') { /* Check for coupled wildcard ("*=*"). */ if (template[1] == '=' && template[2] == '*') { template += 2; coupled = 1; } else { coupled = 0; } /* Template is invalid if wildcard is not alone. */ if (template[1] && template[1] != ' ') return NULL; /* Skip over spaces to find next token. */ while (*++template == ' '); /* Two wildcards in a row is illegal. */ if (*template == '*') return NULL; /* Match the wildcard. This also matches the next token, if there * is one, and adds the appropriate fields. */ if (coupled) s = match_coupled_wildcard(template, s); else s = match_wildcard(template, s); if (!s) return NULL; } else { /* Match the word pattern. This does not add any fields, so we * do it ourselves.*/ p = match_word_pattern(template, s); if (!p) return NULL; add_field(s, p, 0); s = p; } /* Get to next token in template, if there is one. */ while (*template && *template != ' ') template++; while (*template == ' ') template++; } /* Ignore any trailing spaces in s. */ while (*s == ' ') s++; /* If there's anything left over in s, the match failed. */ if (*s) return NULL; /* The match succeeded. Construct a list of the fields. */ l = list_new(field_pos); for (i = 0; i < field_pos; i++) { s = fields[i].start; p = fields[i].end; if (fields[i].strip) { str = string_new(p - s); j = 0; while (s < p) { if (*s == '\\' && s + 1 < p) s++; str->s[j++] = *s; s++; } str->s[j] = 0; str->len = j; } else { str = string_from_chars(s, p - s); } l->el[i].type = STRING; substr_set_to_full_string(&l->el[i].u.substr, str); } return l; } /* Match a coupled wildcard as well as the next token, if there is one. This * adds the fields that it matches. */ static char *match_coupled_wildcard(char *template, char *s) { char *p, *q; /* Check for quoted text. */ if (*s == '"') { /* Find the end of the quoted text. */ p = s + 1; while (*p && *p != '"') { if (*p == '\\' && p[1]) p++; p++; } /* Skip whitespace after quoted text. */ for (q = p + 1; *q && *q == ' '; q++); /* Move on if next character is an equals sign. */ if (*q == '=') { for (q++; *q && *q == ' '; q++); add_field(s + 1, p, 1); return match_wildcard(template, q); } else { return NULL; } } /* Find first occurrance of an equal sign. */ p = strchr(s, '='); if (!p) return NULL; /* Add field up to first nonspace character before equalsign, and move on * starting from the first nonspace character after it. */ for (q = p - 1; *q == ' '; q--); for (p++; *p == ' '; p++); add_field(s, q + 1, 0); return match_wildcard(template, p); } /* Match a wildcard. Also match the next token, if there is one. This adds * the fields that it matches. */ static char *match_wildcard(char *template, char *s) { char *p, *q, *r; /* If no token follows the wildcard, then the match succeeds. */ if (!*template) { p = s + strlen(s); add_field(s, p, 0); return p; } /* There's a word pattern to match. Check if wildcard match is quoted. */ if (*s == '"') { /* Find the end of the quoted text. */ p = s + 1; while (*p && *p != '"') { if (*p == '\\' && p[1]) p++; p++; } /* Skip whitespace after quoted wildcard match. */ for (q = p + 1; *q == ' '; q++); /* Next token must match here. */ r = match_word_pattern(template, q); if (r) { add_field(s + 1, p, 1); add_field(q, r, 0); return r; } else { return NULL; } } else if (*s == '\\') { /* Skip an initial backslash. This is so a wildcard match can start * with a double quote without being a quoted match. */ s++; } /* There is an unquoted wildcard match. Start by looking here. */ p = match_word_pattern(template, s); if (p) { add_field(s, s, 0); add_field(s, p, 0); return p; } /* Look for more words to match. */ p = s; while (*p) { if (*p == ' ') { /* Skip to end of word separator and try next word. */ q = p + 1; while (*q == ' ') q++; r = match_word_pattern(template, q); if (r) { /* It matches; add wildcard field and word field. */ add_field(s, p, 0); add_field(q, r, 0); return r; } /* No match; continue looking at q. */ p = q; } else { p++; } } /* No words matched, so the match fails. */ return NULL; } /* Match a word pattern. Do not add any fields. */ static char *match_word_pattern(char *template, char *s) { char *p = s; int abbrev = 0; while (*template && *template != ' ' && *template != '|') { if (*template == '"' || *template == '*') { /* Invalid characters in a word pattern; match fails. */ return NULL; } else if (*template == '?') { /* A question mark tells us that the matching string can be * abbreviated down to this point. */ abbrev = 1; } else if (LCASE(*p) != LCASE(*template)) { /* The match succeeds if we're at the end of the word in p and * abbrev is set. */ if (abbrev && (!*p || *p == ' ')) return p; /* Otherwise the match against this word fails. Try the next word * if there is one, or fail if there isn't. Also catch illegal * characters (* and ") in the word and return NULL if we find * them. */ while (*template && !strchr(" |*\"", *template)) template++; if (*template == '|') return match_word_pattern(template + 1, s); else return NULL; } else { p++; } template++; } /* We came to the end of the word in the template. If we're also at the * end of the word in p, then the match succeeds. Otherwise, try the next * word if there is one, and fail if there isn't. */ if (!*p || *p == ' ') return p; else if (*template == '|') return match_word_pattern(template + 1, s); else return NULL; } /* Add a field. strip should be true if this is a field for a wildcard not at * the end of the template. */ static void add_field(char *start, char *end, int strip) { if (field_pos >= field_size) { field_size = field_size * 2 + MALLOC_DELTA; fields = EREALLOC(fields, Field, field_size); } fields[field_pos].start = start; fields[field_pos].end = end; fields[field_pos].strip = strip; field_pos++; } /* Returns a backwards list of fields if <s> matches the pattern <pattern>, or * NULL if it doesn't. */ List *match_pattern(char *pattern, char *s) { char *p, *q; List *list; String *str; Data d; /* Locate wildcard in pattern, if any. If there isn't any, return an empty * list if pattern and s are equivalent, or fail if they aren't. */ p = strchr(pattern, '*'); if (!p) return (strccmp(pattern, s) == 0) ? list_new(0) : NULL; /* Fail if s and pattern don't match up to wildcard. */ if (strnccmp(pattern, s, p - pattern) != 0) return NULL; /* Consider s starting after the part that matches the pattern up to the * wildcard. */ s += (p - pattern); /* Match always succeeds if wildcard is at end of line. */ if (!p[1]) { list = list_new(1); list->el[0].type = STRING; str = string_from_chars(s, strlen(s)); substr_set_to_full_string(&list->el[0].u.substr, str); return list; } /* Find first potential match of rest of pattern. */ for (q = s; *q && *q != p[1]; q++); /* As long as we have a potential match... */ while (*q) { /* Check to see if this is actually a match. */ list = match_pattern(p + 1, q); if (list) { /* It matched. Append a field and return the list. */ d.type = STRING; str = string_from_chars(s, q - s); substr_set_to_full_string(&d.u.substr, str); list = list_add(list, &d); string_discard(str); return list; } /* Find next potential match. */ while (*++q && *q != p[1]); } /* Return failure. */ return NULL; }