/
teeny/db/
teeny/dbm/
teeny/docs/
teeny/includes/
teeny/misc/
teeny/news/
teeny/text/
/* compress.c */

#include "copyright.h"
#include "config.h"

#ifdef COMPRESS
#include <stdio.h>
#include "teeny.h"

/* Compression routines */
/* -*-C-*-

Copyright (c) 1989, 1990 by David Applegate, James Aspnes, Timothy Freeman,
                          and Bennet Yee.

This material was developed by the above-mentioned authors.
Permission to copy this software, to redistribute it, and to use it
for any purpose is granted, subject to the following restrictions and
understandings.

1. Any copy made of this software must include this copyright notice
in full.

2. Users of this software agree to make their best efforts (a) to
return to the above-mentioned authors any improvements or extensions
that they make, so that these may be included in future releases; and
(b) to inform the authors of noteworthy uses of this software.

3. All materials developed as a consequence of the use of this
software shall duly acknowledge such use, in accordance with the usual
standards of acknowledging credit in academic research.

4. The authors have made no warrantee or representation that the
operation of this software will be error-free, and the authors are
under no obligation to provide any services, by way of maintenance,
update, or otherwise.

5. In conjunction with products arising from the use of this material,
there shall be no use of the names of the authors, of Carnegie-Mellon
University, nor of any adaptation thereof in any advertising,
promotional, or sales literature without prior written consent from
the authors, and Carnegie-Mellon University in each case. */


/* These use a pathetically simple encoding that takes advantage of the */
/* eighth bit on a char; if you are using an international character set, */
/* they may need substantial patching. */

#define BUFFER_LEN 16384	/* nice big buffer */

#define TOKEN_BIT 0x80		/* if on, it's a token */
#define TOKEN_MASK 0x7f		/* for stripping out token value */
#define NUM_TOKENS (128)
#define MAX_CHAR (128)

/* Top 128 bigrams in the CMU TinyMUD database as of 2/13/90 */
static char    *tokens[NUM_TOKENS] = {
			     "e ", " t", "th", "he", "s ", " a", "ou", "in",
			     "t ", " s", "er", "d ", "re", "an", "n ", " i",
			     " o", "es", "st", "to", "or", "nd", "o ", "ar",
			     "r ", ", ", "on", " b", "ea", "it", "u ", " w",
			     "ng", "le", "is", "te", "en", "at", " c", "y ",
			     "ro", " f", "oo", "al", ". ", "a ", " d", "ut",
			     " h", "se", "nt", "ll", "g ", "yo", " l", " y",
			     " p", "ve", "f ", "as", "om", "of", "ha", "ed",
			     "h ", "hi", " r", "lo", "Yo", " m", "ne", "l ",
			     "li", "de", "el", "ta", "wa", "ri", "ee", "ti",
			     "no", "do", "Th", " e", "ck", "ur", "ow", "la",
			     "ac", "et", "me", "il", " g", "ra", "co", "ch",
			     "ma", "un", "so", "rt", "ai", "ce", "ic", "be",
			     " n", "k ", "ge", "ot", "si", "pe", "tr", "wi",
			     "e.", "ca", "rs", "ly", "ad", "we", "bo", "ho",
			      "ir", "fo", "ke", "us", "m ", " T", "di", ".."
};

static char     token_table[MAX_CHAR][MAX_CHAR];
static int      table_initialized = 0;
char           *compress();
char           *uncompress();

static void 
init_compress()
{
  int             i;
  int             j;

  for (i = 0; i < MAX_CHAR; i++)
  {
    for (j = 0; j < MAX_CHAR; j++)
    {
      token_table[i][j] = 0;
    }
  }

  for (i = 0; i < NUM_TOKENS; i++)
  {
    token_table[tokens[i][0]][tokens[i][1]] = i | TOKEN_BIT;
  }

  table_initialized = 1;
}

char           *
compress(s)
  char           *s;
{
  static char     buf[BUFFER_LEN];
  char           *to;
  char            token;

  if (!table_initialized)
    init_compress();

  if (s == NULL)
    return (char *) NULL;	/* don't compress NULL */

  /* tokenize the first characters */
  for (to = buf; s[0] && s[1]; to++)
  {
    if (token = token_table[s[0]][s[1]])
    {
      *to = token;
      s += 2;
    } else
    {
      *to = s[0];
      s++;
    }
  }

  /* copy the last character (if any) and null */
  while (*to++ = *s++);

  return buf;
}

char           *
uncompress(s)
  char           *s;
{
  static char     buf[BUFFER_LEN];
  char           *to;
  char           *token;

  if (s == NULL)
    return (char *) NULL;	/* don't uncompress NULL */

  for (to = buf; *s; s++)
  {
    if (*s & TOKEN_BIT)
    {
      token = tokens[*s & TOKEN_MASK];
      *to++ = *token++;
      *to++ = *token;
    } else
    {
      *to++ = *s;
    }
  }

  *to++ = *s;

  return buf;
}

#endif				/* COMPRESS */