/* compress.c */ #include "copyright.h" #include "config.h" #ifdef COMPRESS #include <stdio.h> #include "teeny.h" /* Compression routines */ /* -*-C-*- Copyright (c) 1989, 1990 by David Applegate, James Aspnes, Timothy Freeman, and Bennet Yee. This material was developed by the above-mentioned authors. Permission to copy this software, to redistribute it, and to use it for any purpose is granted, subject to the following restrictions and understandings. 1. Any copy made of this software must include this copyright notice in full. 2. Users of this software agree to make their best efforts (a) to return to the above-mentioned authors any improvements or extensions that they make, so that these may be included in future releases; and (b) to inform the authors of noteworthy uses of this software. 3. All materials developed as a consequence of the use of this software shall duly acknowledge such use, in accordance with the usual standards of acknowledging credit in academic research. 4. The authors have made no warrantee or representation that the operation of this software will be error-free, and the authors are under no obligation to provide any services, by way of maintenance, update, or otherwise. 5. In conjunction with products arising from the use of this material, there shall be no use of the names of the authors, of Carnegie-Mellon University, nor of any adaptation thereof in any advertising, promotional, or sales literature without prior written consent from the authors, and Carnegie-Mellon University in each case. */ /* These use a pathetically simple encoding that takes advantage of the */ /* eighth bit on a char; if you are using an international character set, */ /* they may need substantial patching. */ #define BUFFER_LEN 16384 /* nice big buffer */ #define TOKEN_BIT 0x80 /* if on, it's a token */ #define TOKEN_MASK 0x7f /* for stripping out token value */ #define NUM_TOKENS (128) #define MAX_CHAR (128) /* Top 128 bigrams in the CMU TinyMUD database as of 2/13/90 */ static char *tokens[NUM_TOKENS] = { "e ", " t", "th", "he", "s ", " a", "ou", "in", "t ", " s", "er", "d ", "re", "an", "n ", " i", " o", "es", "st", "to", "or", "nd", "o ", "ar", "r ", ", ", "on", " b", "ea", "it", "u ", " w", "ng", "le", "is", "te", "en", "at", " c", "y ", "ro", " f", "oo", "al", ". ", "a ", " d", "ut", " h", "se", "nt", "ll", "g ", "yo", " l", " y", " p", "ve", "f ", "as", "om", "of", "ha", "ed", "h ", "hi", " r", "lo", "Yo", " m", "ne", "l ", "li", "de", "el", "ta", "wa", "ri", "ee", "ti", "no", "do", "Th", " e", "ck", "ur", "ow", "la", "ac", "et", "me", "il", " g", "ra", "co", "ch", "ma", "un", "so", "rt", "ai", "ce", "ic", "be", " n", "k ", "ge", "ot", "si", "pe", "tr", "wi", "e.", "ca", "rs", "ly", "ad", "we", "bo", "ho", "ir", "fo", "ke", "us", "m ", " T", "di", ".." }; static char token_table[MAX_CHAR][MAX_CHAR]; static int table_initialized = 0; char *compress(); char *uncompress(); static void init_compress() { int i; int j; for (i = 0; i < MAX_CHAR; i++) { for (j = 0; j < MAX_CHAR; j++) { token_table[i][j] = 0; } } for (i = 0; i < NUM_TOKENS; i++) { token_table[tokens[i][0]][tokens[i][1]] = i | TOKEN_BIT; } table_initialized = 1; } char * compress(s) char *s; { static char buf[BUFFER_LEN]; char *to; char token; if (!table_initialized) init_compress(); if (s == NULL) return (char *) NULL; /* don't compress NULL */ /* tokenize the first characters */ for (to = buf; s[0] && s[1]; to++) { if (token = token_table[s[0]][s[1]]) { *to = token; s += 2; } else { *to = s[0]; s++; } } /* copy the last character (if any) and null */ while (*to++ = *s++); return buf; } char * uncompress(s) char *s; { static char buf[BUFFER_LEN]; char *to; char *token; if (s == NULL) return (char *) NULL; /* don't uncompress NULL */ for (to = buf; *s; s++) { if (*s & TOKEN_BIT) { token = tokens[*s & TOKEN_MASK]; *to++ = *token++; *to++ = *token; } else { *to++ = *s; } } *to++ = *s; return buf; } #endif /* COMPRESS */