/* Chops a flat OIF from stdin up into shorter lines on stdout, or vice versa. Probably not at all wise to apply it to bigram-compressed OIF. Takes two args: -l <linesize> tells it to break up into lines of some size other than the default of 80. -d tells it to dechop -- i.e. paste a chopped file back together. NOTE: The dechop side assumes that every line that starts with a space is to be pasted on to the end of the previous line. Since we are assuming OIF input, where no line starts with a space, this is fine as long as you don't bugger up a chopped up file, OK? */ #include <stdio.h> #include <ctype.h> #include <string.h> /* My libraries want this. Your mileage may vary. */ #define index(a,b) strchr((a),(b)) char *malloc(); char *getsep(); static int len = 80; static char *line; struct { char *typ; char *lst; } seplst[] = { {"lst",";"}, /* Break lists ONLY at ;s */ {(char *)0, " \t;"} /* Everything else at these */ }; static char *badfile = "Bad OIF file at line %d\n"; static char *badbreak = "Unable to do good line break at line %d\n"; main(ac,av) int ac; char *av[]; { int dechop = 0; int onearg = 0; int currline = 0; int i; int ch,ch2,ch3; char *p,*lastsep,*seps; FILE *infile = stdin; FILE *outfile = stdout; while(--ac){ if(av[ac][0] != '-'){ onearg++; continue; } switch(av[ac][1]){ case 'l': if(!onearg) exit(usage()); onearg = 0; len = atoi(av[ac+1]); break; case 'd': dechop = 1; break; case 'i': if(!onearg) exit(usage()); onearg = 0; infile = fopen(av[ac+1],"r"); if(infile == (FILE *)0){ fputs("Could not open input file.\n",stderr); exit(usage()); } break; case 'o': if(!onearg) exit(usage()); onearg = 0; outfile = fopen(av[ac+1],"w"); if(outfile == (FILE *)0){ fputs("Could not open output file.\n",stderr); exit(usage()); } break; default: exit(usage()); break; } } if(dechop){ /* Dechop infile to outfile */ while((ch = fgetc(infile)) != -1){ if(ch == '\n'){ /* Check next line for continuation */ if((ch = fgetc(infile)) == -1){ fputc('\n',outfile); exit(0); } /* If it starts with a space, it's continued */ /* This is, after all, OIF. */ if(ch != ' '){ fputc('\n',outfile); fputc(ch,outfile); } } else { fputc(ch,outfile); } } } else { /* Chop infile to outfile */ if((line = malloc(len+3)) == (char *)0){ fputs("Could not allocate line buffer. Too long?\n", stderr); exit(1); } while(ch != -1){ p = line; lastsep = (char *)0; i = 0; /* Read first word of OIF line */ currline++; ch = fgetc(infile); while(!isspace(ch) && ch != -1 && i < len){ *p++ = ch; i++; ch = fgetc(infile); } if(ch == '\n' || ch == -1){ if(i){ *p++ = '\n'; *p = '\0'; fputs(line,outfile); } i = 0; continue; } if(i >= len){ fprintf(stderr,badfile,currline); exit(1); } /* Look up separators */ *p = '\0'; seps = getsep(line); if(index(seps,ch)) lastsep = p; *p++ = ch; /* Do the rest of the line */ ch = fgetc(infile); while(ch != '\n' && ch != -1){ /* If we have space on the line... */ if(i < len){ if(index(seps,ch)) lastsep = p; *p++ = ch; i++; } else { /* No more room. Attempt linebreak */ if(lastsep){ *p = '\0'; ch2 = *lastsep; ch3 = lastsep[1]; *lastsep = '\n'; lastsep[1] = '\0'; fputs(line,outfile); p = line; *p++ = ' '; *p++ = ch2; *p++ = ch3; strcpy(p,lastsep+2); i = strlen(line); p = line+i; *p++ = ch; } else { /* No seperators! Panic! */ fprintf(stderr,badbreak,currline); *p++ = '\n'; *p = '\0'; fputs(line,outfile); fflush(stdout); p = line; *p++ = ' '; *p++ = ch; i = 1; } lastsep = (char *)0; } ch = fgetc(infile); } /* Put out remaining in the buffer */ if(i){ *p++ = '\n'; *p++ = '\0'; fputs(line,outfile); } } } } /* Returns the string of separators for the given attribute type */ char * getsep(typ) char *typ; { int i; for(i = 0; seplst[i].typ != (char *)0;i++) if(strcmp(typ,seplst[i].typ) == 0) break; return(seplst[i].lst); } usage() { fputs("usage: bonk [-i infile] [-o outfile] [-l linesize] [-d]\n", stderr); return(1); }