/* stringop.c: Function operators acting on strings. */ #define _POSIX_SOURCE #include <string.h> #include <stdlib.h> #include "x.tab.h" #include "operator.h" #include "execute.h" #include "cmstring.h" #include "data.h" #include "util.h" #include "match.h" #include "ident.h" static int do_match(char *s, int slen, char *m, int mlen, char *t, int tlen); static char *regexp_error; void op_strlen(void) { Data *args; int len; /* Accept a string to take the length of. */ if (!func_init_1(&args, STRING)) return; /* Replace the argument with its length. */ len = args[0].u.substr.span; pop(1); push_int(len); } void op_substr(void) { int num_args, start, span; Data *args; /* Accept a string for the initial string, an integer specifying the start * of the substring, and an optional integer specifying the length of the * substring. */ if (!func_init_2_or_3(&args, &num_args, STRING, INTEGER, INTEGER)) return; start = args[1].u.val - 1; span = (num_args == 3) ? args[2].u.val : args[0].u.substr.span - start; if (start < 0) { throw(range_id, "Start (%d) is less than one.", start + 1); } else if (span < 0) { throw(range_id, "Span (%d) is less than zero.", span); } else if (start + span > args[0].u.substr.span) { throw(range_id, "The substring extends to %d, past the end of the string (%d).", start + span, args[0].u.substr.span); } else { /* Replace first argument with substring, and pop other arguments. */ args[0].u.substr.start += start; args[0].u.substr.span = span; pop(num_args - 1); } } void op_explode(void) { int num_args, sep_len, len, want_blanks; Data *args, d; List *exploded; char *sep, *s, *p, *word_start; String *word; /* Accept a string to explode and an optional string for the word * separator. */ if (!func_init_1_to_3(&args, &num_args, STRING, STRING, 0)) return; if (num_args >= 2 && args[1].u.substr.span == 0) { throw(range_id, "The separator string is empty."); return; } want_blanks = (num_args == 3) ? data_true(&args[2]) : 0; if (num_args >= 2) { sep = data_sptr(&args[1]); sep_len = args[1].u.substr.span; } else { sep = " "; sep_len = 1; } s = data_sptr(&args[0]); len = args[0].u.substr.span; exploded = list_new(0); word_start = p = s; while (p + sep_len <= s + len) { /* Look for first character of sep starting from p. */ p = memchr(p, *sep, (s + len) - (p + sep_len - 1)); if (!p) break; /* Keep going if we don't match all of the separator. */ if (strnccmp(p + 1, sep + 1, sep_len - 1) != 0) { p++; continue; } /* We found a word separator. */ if (want_blanks || p > word_start) { /* Add the word. */ word = string_from_chars(word_start, p - word_start); d.type = STRING; substr_set_to_full_string(&d.u.substr, word); exploded = list_add(exploded, &d); string_discard(word); } word_start = p = p + sep_len; } if (want_blanks || word_start < s + len) { /* Add the last word. */ word = string_from_chars(word_start, s + len - word_start); d.type = STRING; substr_set_to_full_string(&d.u.substr, word); exploded = list_add(exploded, &d); string_discard(word); } /* Pop the arguments and push the list onto the stack. */ pop(num_args); push_list(exploded); list_discard(exploded); } void op_strsub(void) { int slen, rlen, len, i, start; Data *args; char *sstr, *rstr, *s; String *subbed; /* Accept a base string, a search string, and a replacement string. */ if (!func_init_3(&args, STRING, STRING, STRING)) return; s = data_sptr(&args[0]); len = args[0].u.substr.span; sstr = data_sptr(&args[1]); slen = args[1].u.substr.span; rstr = data_sptr(&args[2]); rlen = args[2].u.substr.span; subbed = string_empty(slen); start = 0; while (1) { /* Look for first character of sstr in s. */ for (i = start; i + slen <= len && s[i] != *sstr; i++); /* Stop if we hit the end of the string. */ if (i + slen > len) break; if (strnccmp(&s[i], sstr, slen) == 0) { /* We found the search string. */ subbed = string_add(subbed, &s[start], i - start); subbed = string_add(subbed, rstr, rlen); start = i + slen; } else { subbed = string_add(subbed, &s[start], i + 1 - start); start = i + 1; } } subbed = string_add(subbed, &s[start], len - start); /* Pop the arguments and push the new string onto the stack. */ pop(3); push_string(subbed); string_discard(subbed); } /* Pad a string on the left (positive length) or on the right (negative * length). The optional third argument gives the fill character. */ void op_pad(void) { int num_args, len, padding; Data *args; char fill; Substring *substr; String *padded; if (!func_init_2_or_3(&args, &num_args, STRING, INTEGER, STRING)) return; if (num_args == 3 && args[2].u.substr.span != 1) { throw(type_id, "The third argument (%D) is not one character.", &args[2]); return; } /* Construct the padded string. */ anticipate_assignment(); substr = &args[0].u.substr; len = (args[1].u.val > 0) ? args[1].u.val : -args[1].u.val; padding = len - args[0].u.substr.span; fill = (num_args == 3) ? *data_sptr(&args[2]) : ' '; if (padding <= 0) { substr->span = len; } else if (args[1].u.val > 0) { substring_truncate(substr); substr->str = string_extend(substr->str, substr->start + len); memset(&substr->str->s[substr->start + substr->span], fill, padding); substr->span = len; substr->str->len = substr->start + len; substr->str->s[substr->start + len] = 0; } else { padded = string_of_char(fill, padding); padded = string_add(padded, &substr->str->s[substr->start], substr->span); string_discard(substr->str); substr_set_to_full_string(substr, padded); } /* Discard all but the first argument. */ pop(num_args - 1); } static int do_match(char *s, int slen, char *m, int mlen, char *t, int tlen) { int pos; /* Obviously, no match if slen is less than mlen. */ if (slen < mlen) return 0; /* Check for a match at the beginning of the string. */ if (strnccmp(s, m, mlen) == 0) return 1; /* Start checking after one token's length. */ pos = tlen; while (1) { /* Look for the first character in m. */ while (pos + mlen <= slen && s[pos] != *m) pos++; /* No match if we couldn't find *m for pos <= slen - mlen. */ if (pos + mlen > slen) return 0; /* Only check against m if we're just after a word-separator. */ if (strnccmp(&s[pos - slen], t, tlen) == 0) { /* Match against m. */ if (strnccmp(&s[pos], m, mlen) == 0) { /* We have a match. Return 1. */ return 1; } } /* It wasn't a match. Continue at pos + 1. */ pos++; } /* We never found a match; return 0. */ return 0; } void op_match_begin(void) { int num_args, tlen, result; Data *args; char *token; /* Accept a string to search in, a string to search for, and an optional * string giving the word separator. */ if (!func_init_2_or_3(&args, &num_args, STRING, STRING, STRING)) return; token = (num_args == 3) ? data_sptr(&args[2]) : " "; tlen = (num_args == 3) ? args[2].u.substr.span : 1; result = do_match(data_sptr(&args[0]), args[0].u.substr.span, data_sptr(&args[1]), args[1].u.substr.span, token, tlen); pop(num_args); push_int(result); } /* Match against a command template. */ void op_match_template(void) { Data *args; List *fields; /* Accept a string for the template and a string to match against. */ if (!func_init_2(&args, STRING, STRING)) return; /* Make sure strings we pass to match_template() are null-terminated. */ substring_truncate(&args[0].u.substr); substring_truncate(&args[1].u.substr); fields = match_template(data_sptr(&args[0]), data_sptr(&args[1])); pop(2); if (fields) { stack[stack_pos].type = LIST; sublist_set_to_full_list(&stack[stack_pos].u.sublist, fields); stack_pos++; } else { push_int(0); } } /* Match against a command template. */ void op_match_pattern(void) { Data *args; List *fields; Substring tmp; int i; /* Accept a string for the pattern and a string to match against. */ if (!func_init_2(&args, STRING, STRING)) return; /* Make sure strings we pass to match_pattern() are null-terminated. */ substring_truncate(&args[0].u.substr); substring_truncate(&args[1].u.substr); fields = match_pattern(data_sptr(&args[0]), data_sptr(&args[1])); pop(2); if (fields) { /* fields is backwards. Reverse it. */ for (i = 0; i * 2 < fields->len; i++) { tmp = fields->el[i].u.substr; fields->el[i].u.substr = fields->el[fields->len - 1 - i].u.substr; fields->el[fields->len - 1 - i].u.substr = tmp; } stack[stack_pos].type = LIST; sublist_set_to_full_list(&stack[stack_pos].u.sublist, fields); stack_pos++; } else { push_int(0); } } void op_match_regexp(void) { Data *args; regexp *reg; List *fields = NULL, *elemlist; int num_args, case_flag, i; char *s; if (!func_init_2_or_3(&args, &num_args, STRING, STRING, 0)) return; case_flag = (num_args == 3) ? data_true(&args[2]) : 0; /* Get the cached regexp, if there is one, or compile it. */ substring_truncate(&args[0].u.substr); substring_truncate(&args[1].u.substr); if (args[0].u.substr.start == 0 && args[0].u.substr.str->reg) reg = args[0].u.substr.str->reg; else reg = regcomp(data_sptr(&args[0])); if (!reg) { throw(regexp_id, "%s", regexp_error); return; } /* Execute the regexp. */ s = data_sptr(&args[1]); if (regexec(reg, s, case_flag)) { /* Build the list of fields. */ fields = list_new(NSUBEXP); for (i = 0; i < NSUBEXP; i++) { elemlist = list_new(2); elemlist->el[0].type = elemlist->el[1].type = INTEGER; if (reg->startp[i]) { elemlist->el[0].u.val = reg->startp[i] - s + 1; elemlist->el[1].u.val = reg->endp[i] - reg->startp[i]; } else { elemlist->el[0].u.val = elemlist->el[1].u.val = 0; } fields->el[i].type = LIST; sublist_set_to_full_list(&fields->el[i].u.sublist, elemlist); } } /* Store the regexp if possible. */ if (args[0].u.substr.start == 0) args[0].u.substr.str->reg = reg; else free(reg); pop(num_args); if (fields) { push_list(fields); list_discard(fields); } else { push_int(0); } } void regerror(char *msg) { regexp_error = msg; } /* Encrypt a string. */ void op_crypt(void) { int num_args, len; Data *args; char *s, save, *encrypted, salt[3]; String *str; /* Accept a string to encrypt and an optional salt. */ if (!func_init_1_or_2(&args, &num_args, STRING, STRING)) return; /* Temporarily convert args[0] to a null-terminated string. */ s = data_sptr(&args[0]); len = args[0].u.substr.span; save = s[len]; s[len] = 0; if (num_args == 2) { salt[0] = *data_sptr(&args[1]); salt[1] = *(data_sptr(&args[1]) + 1); salt[2] = 0; encrypted = crypt_string(s, salt); } else { encrypted = crypt_string(s, NULL); } /* Restore the character we clobbered. */ s[len] = save; pop(num_args); stack[stack_pos].type = STRING; str = string_from_chars(encrypted, strlen(encrypted)); substr_set_to_full_string(&stack[stack_pos].u.substr, str); stack_pos++; } void op_uppercase(void) { Data *args; Substring *substr; char *s; /* Accept a string to uppercase. */ if (!func_init_1(&args, STRING)) return; /* Uppercase all the characters in the argument. */ substr = &args[0].u.substr; substring_truncate(substr); for (s = data_sptr(&args[0]); *s; s++) *s = UCASE(*s); } void op_lowercase(void) { Data *args; Substring *substr; char *s; /* Accept a string to uppercase. */ if (!func_init_1(&args, STRING)) return; /* Uppercase all the characters in the argument. */ substr = &args[0].u.substr; substring_truncate(substr); for (s = data_sptr(&args[0]); *s; s++) *s = LCASE(*s); } void op_strcmp(void) { Data *args; int l1, l2, l, val; /* Accept two strings to compare. */ if (!func_init_2(&args, STRING, STRING)) return; /* Compare the strings case-sensitively. */ l1 = args[0].u.substr.span; l2 = args[1].u.substr.span; l = (l1 < l2) ? l1 : l2; val = strncmp(data_sptr(&args[0]), data_sptr(&args[1]), l); if (!val && l1 > l2) val = data_sptr(&args[0])[l2]; else if (!val && l1 < l2) val = data_sptr(&args[1])[l1]; pop(2); push_int(val); }