/* Compression library. Uses a radix tree to compress substrings down to 12 bit codes. */ #include <stdio.h> #include "radix.h" #include "compresstab.h" static struct r_node *root; static unsigned char *decompresstab[4096]; static unsigned char singletons[260]; unsigned int strings_compressed = 0; unsigned int strings_decompressed = 0; unsigned int chars_in = 0; unsigned int symbols_out = 0; void init_string_compress() { int code, i; unsigned char *p; root = (struct r_node *)malloc(sizeof(struct r_node)); root->count = 0; /* Get all the printables */ p = singletons; *p = '\0'; decompresstab[0] = p; p++; for(i = 1; i < 128; i++) { p[0] = (unsigned char) i; p[1] = '\0'; code = r_insert(&root, p); if(code <= 0 || code > 4095) { printf("Invalid code 1\n"); abort(); } decompresstab[code] = p; p += 2; } /* Now insert all the strings in our compression table */ for(i = 0; cmptab[i] != NULL; i++) { code = r_insert(&root, (unsigned char *)(cmptab[i])); if(code <= 0 || code > 4095) { printf("Invalid code 2\n"); abort(); } p = (unsigned char *)(cmptab[i]); decompresstab[code] = p; } #ifdef DEBUG printf("Done inserting.\n"); #endif } /* Compress the input string to the output array as 12 bit codes. Return the number of bytes to worry about in the output. */ int string_compress(src, dst) unsigned char *src; unsigned char *dst; { int code; int bitnum = 0; int bytenum; #ifdef DEBUG int i; #endif strings_compressed++; chars_in += strlen(src) + 1; do { code = r_compress(root, &src); #ifdef DEBUG printf("emit code %x\n", code); #endif bytenum = (bitnum >> 3); if(bitnum & 7) { dst[bytenum] = dst[bytenum] | ((code >> 8) & 0x0f); dst[bytenum + 1] = (code & 0xff); } else { dst[bytenum] = (code >> 4) & 0xff; dst[bytenum + 1] = ((code << 4) & 0xf0); } bitnum += 12; symbols_out++; } while(code != 0); #ifdef DEBUG bytenum = ((bitnum & 7) ? (bitnum >> 3) + 1 : (bitnum >> 3)); for(i = 0; i < bytenum; i++) { printf("%x ", dst[i]); } printf("\n"); #endif return ((bitnum & 7) ? (bitnum >> 3) + 1 : (bitnum >> 3)); } /* Decompress an array of 12 bit codes as produced by string_compress(). Return the number of bytes in the string, including zero terminator. */ int string_decompress(src, dst) unsigned char *src; unsigned char *dst; { unsigned int code, bitnum, bytenum; register unsigned char *p, tmp; int count = 0; strings_decompressed++; bitnum = 0; while(1) { bytenum = bitnum >> 3; if(bitnum & 7) { code = (tmp & 0x0f) << 8; code |= src[bytenum + 1]; } else { code = src[bytenum] << 4; tmp = src[bytenum + 1]; code |= tmp >> 4; } #ifdef DEBUG printf("Extract code %x = '%s'\n", code, decompresstab[code]); #endif if(code == 0) break; p = decompresstab[code]; while(*p) { count++; *dst++ = *p++; } bitnum += 12; } *dst = '\0'; return count + 1; /* Include zero terminator */ }