dbm/
misc/
old-docs/
/* compress.c */

#include "copyright.h"
#include "config.h"

#ifdef COMPRESS
#include <stdio.h>
#include "teeny.h"

/* Compression routines */
/*
 * -*-C-*-
 * 
 * Copyright (c) 1989, 1990 by David Applegate, James Aspnes, Timothy Freeman,
 * and Bennet Yee.
 * 
 * This material was developed by the above-mentioned authors. Permission to
 * copy this software, to redistribute it, and to use it for any purpose is
 * granted, subject to the following restrictions and understandings.
 * 
 * 1. Any copy made of this software must include this copyright notice in full.
 * 
 * 2. Users of this software agree to make their best efforts (a) to return to
 * the above-mentioned authors any improvements or extensions that they make,
 * so that these may be included in future releases; and (b) to inform the
 * authors of noteworthy uses of this software.
 * 
 * 3. All materials developed as a consequence of the use of this software shall
 * duly acknowledge such use, in accordance with the usual standards of
 * acknowledging credit in academic research.
 * 
 * 4. The authors have made no warrantee or representation that the operation of
 * this software will be error-free, and the authors are under no obligation
 * to provide any services, by way of maintenance, update, or otherwise.
 * 
 * 5. In conjunction with products arising from the use of this material, there
 * shall be no use of the names of the authors, of Carnegie-Mellon
 * University, nor of any adaptation thereof in any advertising, promotional,
 * or sales literature without prior written consent from the authors, and
 * Carnegie-Mellon University in each case.
 */


/* These use a pathetically simple encoding that takes advantage of the */
/* eighth bit on a char; if you are using an international character set, */
/* they may need substantial patching. */

#define BUFFER_LEN 16384	/* nice big buffer */

#define TOKEN_BIT 0x80		/* if on, it's a token */
#define TOKEN_MASK 0x7f		/* for stripping out token value */
#define NUM_TOKENS (128)
#define MAX_CHAR (128)

/* Top 128 bigrams in the CMU TinyMUD database as of 2/13/90 */
static char    *tokens[NUM_TOKENS] = {
  "e ", " t", "th", "he", "s ", " a", "ou", "in",
  "t ", " s", "er", "d ", "re", "an", "n ", " i",
  " o", "es", "st", "to", "or", "nd", "o ", "ar",
  "r ", ", ", "on", " b", "ea", "it", "u ", " w",
  "ng", "le", "is", "te", "en", "at", " c", "y ",
  "ro", " f", "oo", "al", ". ", "a ", " d", "ut",
  " h", "se", "nt", "ll", "g ", "yo", " l", " y",
  " p", "ve", "f ", "as", "om", "of", "ha", "ed",
  "h ", "hi", " r", "lo", "Yo", " m", "ne", "l ",
  "li", "de", "el", "ta", "wa", "ri", "ee", "ti",
  "no", "do", "Th", " e", "ck", "ur", "ow", "la",
  "ac", "et", "me", "il", " g", "ra", "co", "ch",
  "ma", "un", "so", "rt", "ai", "ce", "ic", "be",
  " n", "k ", "ge", "ot", "si", "pe", "tr", "wi",
  "e.", "ca", "rs", "ly", "ad", "we", "bo", "ho",
  "ir", "fo", "ke", "us", "m ", " T", "di", ".."
};

static char     token_table[MAX_CHAR][MAX_CHAR];
static int      table_initialized = 0;
char           *compress();
char           *uncompress();

static void     init_compress()
{
  int             i;
  int             j;

  for (i = 0; i < MAX_CHAR; i++) {
    for (j = 0; j < MAX_CHAR; j++) {
      token_table[i][j] = 0;
    }
  }

  for (i = 0; i < NUM_TOKENS; i++) {
    token_table[tokens[i][0]][tokens[i][1]] = i | TOKEN_BIT;
  }

  table_initialized = 1;
}

char           *
                compress(s)
  char           *s;
{
  static char     buf[BUFFER_LEN];
  char           *to;
  char            token;

  if (!table_initialized)
    init_compress();

  if (s == NULL)
    return (char *) NULL;	/* don't compress NULL */

  /* tokenize the first characters */
  for (to = buf; s[0] && s[1]; to++) {
    if (token = token_table[s[0]][s[1]]) {
      *to = token;
      s += 2;
    } else {
      *to = s[0];
      s++;
    }
  }

  /* copy the last character (if any) and null */
  while (*to++ = *s++);

  return buf;
}

char           *
                uncompress(s)
  char           *s;
{
  static char     buf[BUFFER_LEN];
  char           *to;
  char           *token;

  if (s == NULL)
    return (char *) NULL;	/* don't uncompress NULL */

  for (to = buf; *s; s++) {
    if (*s & TOKEN_BIT) {
      token = tokens[*s & TOKEN_MASK];
      *to++ = *token++;
      *to++ = *token;
    } else {
      *to++ = *s;
    }
  }

  *to++ = *s;

  return buf;
}

#endif				/* COMPRESS */