/* compress.c */ #include "copyright.h" #include "config.h" #ifdef COMPRESS #include <stdio.h> #include "teeny.h" /* Compression routines */ /* * -*-C-*- * * Copyright (c) 1989, 1990 by David Applegate, James Aspnes, Timothy Freeman, * and Bennet Yee. * * This material was developed by the above-mentioned authors. Permission to * copy this software, to redistribute it, and to use it for any purpose is * granted, subject to the following restrictions and understandings. * * 1. Any copy made of this software must include this copyright notice in full. * * 2. Users of this software agree to make their best efforts (a) to return to * the above-mentioned authors any improvements or extensions that they make, * so that these may be included in future releases; and (b) to inform the * authors of noteworthy uses of this software. * * 3. All materials developed as a consequence of the use of this software shall * duly acknowledge such use, in accordance with the usual standards of * acknowledging credit in academic research. * * 4. The authors have made no warrantee or representation that the operation of * this software will be error-free, and the authors are under no obligation * to provide any services, by way of maintenance, update, or otherwise. * * 5. In conjunction with products arising from the use of this material, there * shall be no use of the names of the authors, of Carnegie-Mellon * University, nor of any adaptation thereof in any advertising, promotional, * or sales literature without prior written consent from the authors, and * Carnegie-Mellon University in each case. */ /* These use a pathetically simple encoding that takes advantage of the */ /* eighth bit on a char; if you are using an international character set, */ /* they may need substantial patching. */ #define BUFFER_LEN 16384 /* nice big buffer */ #define TOKEN_BIT 0x80 /* if on, it's a token */ #define TOKEN_MASK 0x7f /* for stripping out token value */ #define NUM_TOKENS (128) #define MAX_CHAR (128) /* Top 128 bigrams in the CMU TinyMUD database as of 2/13/90 */ static char *tokens[NUM_TOKENS] = { "e ", " t", "th", "he", "s ", " a", "ou", "in", "t ", " s", "er", "d ", "re", "an", "n ", " i", " o", "es", "st", "to", "or", "nd", "o ", "ar", "r ", ", ", "on", " b", "ea", "it", "u ", " w", "ng", "le", "is", "te", "en", "at", " c", "y ", "ro", " f", "oo", "al", ". ", "a ", " d", "ut", " h", "se", "nt", "ll", "g ", "yo", " l", " y", " p", "ve", "f ", "as", "om", "of", "ha", "ed", "h ", "hi", " r", "lo", "Yo", " m", "ne", "l ", "li", "de", "el", "ta", "wa", "ri", "ee", "ti", "no", "do", "Th", " e", "ck", "ur", "ow", "la", "ac", "et", "me", "il", " g", "ra", "co", "ch", "ma", "un", "so", "rt", "ai", "ce", "ic", "be", " n", "k ", "ge", "ot", "si", "pe", "tr", "wi", "e.", "ca", "rs", "ly", "ad", "we", "bo", "ho", "ir", "fo", "ke", "us", "m ", " T", "di", ".." }; static char token_table[MAX_CHAR][MAX_CHAR]; static int table_initialized = 0; char *compress(); char *uncompress(); static void init_compress() { int i; int j; for (i = 0; i < MAX_CHAR; i++) { for (j = 0; j < MAX_CHAR; j++) { token_table[i][j] = 0; } } for (i = 0; i < NUM_TOKENS; i++) { token_table[tokens[i][0]][tokens[i][1]] = i | TOKEN_BIT; } table_initialized = 1; } char * compress(s) char *s; { static char buf[BUFFER_LEN]; char *to; char token; if (!table_initialized) init_compress(); if (s == NULL) return (char *) NULL; /* don't compress NULL */ /* tokenize the first characters */ for (to = buf; s[0] && s[1]; to++) { if (token = token_table[s[0]][s[1]]) { *to = token; s += 2; } else { *to = s[0]; s++; } } /* copy the last character (if any) and null */ while (*to++ = *s++); return buf; } char * uncompress(s) char *s; { static char buf[BUFFER_LEN]; char *to; char *token; if (s == NULL) return (char *) NULL; /* don't uncompress NULL */ for (to = buf; *s; s++) { if (*s & TOKEN_BIT) { token = tokens[*s & TOKEN_MASK]; *to++ = *token++; *to++ = *token; } else { *to++ = *s; } } *to++ = *s; return buf; } #endif /* COMPRESS */