// a.cpp // // $Id: a.cpp,v 1.3 2000/09/18 08:03:42 sdennis Exp $ // // Analyse a MUX database, we chew it in on stdin, extract // attributes, and stuff them into a radix tree. We accept a single // parameter telling us what the maximum length word to store is, and // then we dump the whole tree out. This gives a complete list of all // substrings of length N or less found in a MUX database, complete // with counts, for post-processing. // #include "copyright.h" #include "autoconf.h" #include "config.h" #include <stdio.h> #include <stdlib.h> #include <string.h> #include "radix.h" #include "_build.h" /* * forward */ void eatline(void); void copyattr(char *buff); char attr[4002]; int DCL_CDECL main(int ac, char *av[]) { struct r_node *root; int ch, i, len, sslen, max_len; unsigned char substr[128]; int objcnt = 0; if (ac != 2) { #ifdef WIN32 #ifdef BETA #if PATCHLEVEL > 0 fprintf(stderr, "%s from MUX %s.p%d for Win32 #%s [BETA]\n", av[0], MUX_VERSION, PATCHLEVEL, MUX_BUILD_NUM); #else // PATCHLEVEL fprintf(stderr, "%s from MUX %s for Win32 #%s [BETA]\n", av[0], MUX_VERSION, MUX_BUILD_NUM); #endif // PATCHLEVEL #else // BETA #if PATCHLEVEL > 0 fprintf(stderr, "%s from MUX %s.p%d for Win32 #%s [%s]\n", av[0], MUX_VERSION, PATCHLEVEL, MUX_BUILD_NUM, MUX_RELEASE_DATE); #else // PATCHLEVEL fprintf(stderr, "%s from MUX %s for Win32 #%s [%s]\n", av[0], MUX_VERSION, MUX_BUILD_NUM, MUX_RELEASE_DATE); #endif // PATCHLEVEL #endif // BETA #else // WIN32 #ifdef BETA #if PATCHLEVEL > 0 fprintf(stderr, "%s from MUX %s.p%d #%s [BETA]\n", av[0], MUX_VERSION, PATCHLEVEL, MUX_BUILD_NUM); #else // PATCHLEVEL fprintf(stderr, "%s from MUX %s #%s [BETA]\n", av[0], MUX_VERSION, MUX_BUILD_NUM); #endif // PATCHLEVEL #else // BETA #if PATCHLEVEL > 0 fprintf(stderr, "%s from MUX %s.p%d #%s [%s]\n", av[0], MUX_VERSION, PATCHLEVEL, MUX_BUILD_NUM, MUX_RELEASE_DATE); #else // PATCHLEVEL fprintf(stderr, "%s from MUX %s #%s [%s]\n", av[0], MUX_VERSION, MUX_BUILD_NUM, MUX_RELEASE_DATE); #endif // PATCHLEVEL #endif // BETA #endif // WIN32 fprintf(stderr, "usage: %s <max string length>\n", av[0]); exit(1); } max_len = atoi(av[1]); if (max_len <= 0) { fprintf(stderr, "usage: %s <positive max string length>\n", av[0]); exit(1); } if (max_len >= 128) { fprintf(stderr, "A max string length os over 128 is insane.\n"); exit(1); } switch (max_len) { case 1: case 2: case 3: case 4: case 5: break; case 6: case 7: case 8: fprintf(stderr, "counting all strings up to %s long is going to be expensive\n", av[1]); break; default: fprintf(stderr, "All strings of length %s? I hope you have some real iron\n", av[1]); break; } root = (struct r_node *)MEMALLOC(sizeof(struct r_node)); root->count = 0; while ((ch = getchar()) != EOF) { switch (ch) { case '>': eatline(); copyattr(attr); len = strlen(attr); for (i = 0; i < len; i++) { sslen = ((len - i) < max_len ? (len - i) : max_len); memmove(substr, attr + i, sslen); substr[sslen] = '\0'; r_insert(&root, substr); } break; case '!': objcnt++; if ((objcnt & 0x1ff) == 0) fprintf(stderr, "."); fflush(stderr); default: eatline(); } } r_dump(root); return 0; } /* * Consume a line up to and including the newline */ void eatline(void) { int ch; while ((ch = getchar()) != '\n') { if (ch == EOF) break; } } /* * Read in a string on stdin and stuff it into the passed down array, observing * * the strange rules for attribute escaping. */ void copyattr(char *buff) { char last; int i; char ch = '\0'; /* * anything other than a \r */ i = 0; do { last = ch; ch = getchar(); buff[i++] = ch; /* * Internal newlines are escaped as \r\n */ if (ch == '\n' && last == '\r') { buff[i - 2] = ' '; i--; } } while ((ch != '\n' || last == '\r') && ch != EOF && i < 4000); buff[i - 1] = '\0'; /* * Whack trailing newline */ }