/
umud/DOC/
umud/DOC/U/
umud/DOC/U/U-examples/
umud/DOC/internals/
umud/DOC/wizard/
umud/MISC/
umud/MISC/dbchk/
umud/RWHO/rwhod/
/*
	Chops a flat OIF from stdin up into shorter lines on stdout, or vice
versa. Probably not at all wise to apply it to bigram-compressed OIF.

	Takes two args:
		-l <linesize> tells it to break up into lines of some
			size other than the default of 80.
		-d tells it to dechop -- i.e. paste a chopped file
			back together.

	NOTE: The dechop side assumes that every line that starts with a space
is to be pasted on to the end of the previous line. Since we are assuming
OIF input, where no line starts with a space, this is fine as long as
you don't bugger up a chopped up file, OK?


*/

#include	<stdio.h>
#include	<ctype.h>
#include	<string.h>

/* My libraries want this. Your mileage may vary. */

#define	index(a,b)	strchr((a),(b))

char	*malloc();
char	*getsep();

static	int	len = 80;
static	char	*line;

struct {
	char	*typ;
	char	*lst;
} seplst[] = {
	{"lst",";"},		/* Break lists ONLY at ;s */
	{(char *)0, " \t;"}	/* Everything else at these */
};

static	char	*badfile = "Bad OIF file at line %d\n";
static	char	*badbreak = "Unable to do good line break at line %d\n";

main(ac,av)
int	ac;
char	*av[];
{
	int	dechop = 0;
	int	onearg = 0;
	int	currline = 0;
	int	i;
	int	ch,ch2,ch3;
	char	*p,*lastsep,*seps;
	FILE	*infile = stdin;
	FILE	*outfile = stdout;

	while(--ac){
		if(av[ac][0] != '-'){
			onearg++;
			continue;
		}
		switch(av[ac][1]){
		case 'l':
			if(!onearg)
				exit(usage());
			onearg = 0;
			len = atoi(av[ac+1]);
			break;
		case 'd':
			dechop = 1;
			break;
		case 'i':
			if(!onearg)
				exit(usage());
			onearg = 0;
			infile = fopen(av[ac+1],"r");
			if(infile == (FILE *)0){
				fputs("Could not open input file.\n",stderr);
				exit(usage());
			}
			break;
		case 'o':
			if(!onearg)
				exit(usage());
			onearg = 0;
			outfile = fopen(av[ac+1],"w");
			if(outfile == (FILE *)0){
				fputs("Could not open output file.\n",stderr);
				exit(usage());
			}
			break;
		default:
			exit(usage());
			break;
		}
	}

	if(dechop){
		/* Dechop infile to outfile */

		while((ch = fgetc(infile)) != -1){
			if(ch == '\n'){
				/* Check next line for continuation */

				if((ch = fgetc(infile)) == -1){
					fputc('\n',outfile);
					exit(0);
				}
				/* If it starts with a space, it's continued */
				/* This is, after all, OIF. */

				if(ch != ' '){
					fputc('\n',outfile);
					fputc(ch,outfile);
				}
			} else {
				fputc(ch,outfile);
			}
		}
	} else {
		/* Chop infile to outfile */

		if((line = malloc(len+3)) == (char *)0){
			fputs("Could not allocate line buffer. Too long?\n",
				stderr);
			exit(1);
		}

		while(ch != -1){
			p = line;
			lastsep = (char *)0;
			i = 0;

			/* Read first word of OIF line */

			currline++;
			ch = fgetc(infile);
			while(!isspace(ch) && ch != -1 && i < len){
				*p++ = ch;
				i++;
				ch = fgetc(infile);
			}
			if(ch == '\n' || ch == -1){
				if(i){
					*p++ = '\n';
					*p = '\0';
					fputs(line,outfile);
				}
				i = 0;
				continue;
			}
			if(i >= len){
				fprintf(stderr,badfile,currline);
				exit(1);
			}

			/* Look up separators */

			*p = '\0';
			seps = getsep(line);
			if(index(seps,ch))
				lastsep = p;
			*p++ = ch;

			/* Do the rest of the line */

			ch = fgetc(infile);
			while(ch != '\n' && ch != -1){

				/* If we have space on the line... */

				if(i < len){
					if(index(seps,ch))
						lastsep = p;
					*p++ = ch;
					i++;
				} else {
					/* No more room. Attempt linebreak */

					if(lastsep){
						*p = '\0';
						ch2 = *lastsep;
						ch3 = lastsep[1];
						*lastsep = '\n';
						lastsep[1] = '\0';
						fputs(line,outfile);
						p = line;
						*p++ = ' ';
						*p++ = ch2;
						*p++ = ch3;
						strcpy(p,lastsep+2);
						i = strlen(line);
						p = line+i;
						*p++ = ch;
					} else {
						/* No seperators! Panic! */

						fprintf(stderr,badbreak,currline);
						*p++ = '\n';
						*p = '\0';
						fputs(line,outfile);
						fflush(stdout);
						p = line;
						*p++ = ' ';
						*p++ = ch;
						i = 1;
					}
					lastsep = (char *)0;
				}
				ch = fgetc(infile);
			}

			/* Put out remaining in the buffer */

			if(i){
				*p++ = '\n';
				*p++ = '\0';
				fputs(line,outfile);
			}
		}
	}
}

/*
	Returns the string of separators for the given attribute type
*/

char	*
getsep(typ)
char	*typ;
{
	int	i;

	for(i = 0; seplst[i].typ != (char *)0;i++)
		if(strcmp(typ,seplst[i].typ) == 0)
			break;

	return(seplst[i].lst);
}
usage()
{
	fputs("usage: bonk [-i infile] [-o outfile] [-l linesize] [-d]\n",
		stderr);
	return(1);
}