#include <stdio.h> #include <string.h> #include "config.h" #include "lint.h" /* * stralloc.c - string management. * * All strings are stored in an extensible hash table, with reference counts. * free_string decreases the reference count; if it gets to zero, the string * will be deallocated. add_string increases the ref count if it finds a * matching string, or allocates it if it cant. There is no way to allocate * a string of a particular size to fill later (hash wont work!), so you'll * have to copy things freom a static (or malloced and later freed) buffer - * that is, if you want to avoid space leaks... * * Current overhead: * 8 bytes per string (next pointer, and 2 shorts for length and refs) * Strings are nearly all fairly short, so this is a significant overhead- * there is also the 4 byte malloc overhead and the fact that malloc * generally allocates blocks which are a power of 2 (should write my * own best-fit malloc specialised to strings); then again, GNU malloc * is bug free... */ /* * there is also generic hash table management code, but strings can be shared * (that was the point of this code), will be unique in the table, * require a reference count, and are malloced, copied and freed at * will by the string manager. Besides, I wrote this code first :-). * Look at htable.c for the other code. It uses the Hash() function * defined here, and requires hashed objects to have a pointer to the * next element in the chain (which you specify when you call the functions). */ #define MAXSHORT (1 << (sizeof(short)*8 - 2)) #define WORD_ALIGN_BIT 0x3 /* these are 0 for aligned ptrs */ char * xalloc(); #ifndef _AIX char * strcpy(); #endif static int num_distinct_strings = 0; int bytes_distinct_strings = 0; static int allocd_strings = 0; static int allocd_bytes = 0; int overhead_bytes = 0; static int search_len = 0; static int num_str_searches = 0; /* * strings are stored: * (char * next) (short numrefs) string * ^ * pointer points here */ #define NEXT(str) (*(char **)((char *) (str) - sizeof(short) \ - sizeof(int))) #define REFS(str) (*(short *)((char *) (str) - sizeof(short))) /* * hash table - list of pointers to heads of string chains. * Each string in chain has a pointer to the next string and a * reference count (char *, int) stored just before the start of the string. * HTABLE_SIZE is in config.h, and should be a prime, probably between * 1000 and 5000. */ static char ** base_table = 0; static void init_strings() { int x; base_table = (char **) xalloc(sizeof(char *) * HTABLE_SIZE); overhead_bytes += (sizeof(char *) * HTABLE_SIZE); for (x=0; x<HTABLE_SIZE; x++) base_table[x] = 0; } /* * generic hash function. This is probably overkill; I haven't checked the * stats for different prime numbers, etc. */ static int StrHash(s) char * s; { if (!base_table) init_strings(); return hashstr(s, 20, HTABLE_SIZE); } /* * Looks for a string in the table. If it finds it, returns a pointer to * the start of the string part, and moves the entry for the string to * the head of the pointer chain. One thing (blech!) - puts the previous * pointer on the hash chain into fs_prev. */ char * findstring(s) char * s; { char * curr, *prev; int h = StrHash(s); curr = base_table[h]; prev = 0; num_str_searches++; while (curr) { search_len++; if (*curr == *s && !strcmp(curr, s)) { /* found it */ if (prev) { /* not at head of list */ NEXT(prev) = NEXT(curr); NEXT(curr) = base_table[h]; base_table[h] = curr; } return(curr); /* pointer to string */ } prev = curr; curr = NEXT(curr); } return(0); /* not found */ } /* * Make a space for a string. This is rather nasty, as we want to use * alloc/free, but malloc overhead is generally severe. Later, we * will have to compact strings... */ static char * alloc_new_string(string) char * string; { char * s = xalloc(1 + strlen(string) + sizeof(char *) + sizeof(short)); int h = StrHash(string); s += sizeof(char *) + sizeof(short); strcpy(s, string); REFS(s) = 0; NEXT(s) = base_table[h]; base_table[h] = s; num_distinct_strings++; bytes_distinct_strings += 4 + (strlen(s) +3) & ~3; overhead_bytes += sizeof(char *) + sizeof(short); return(s); } char * make_shared_string(str) char * str; { char * s; s = findstring(str); if (!s) s = alloc_new_string(str); REFS(s)++; allocd_strings++; allocd_bytes += 4 + (strlen(str) + 3) & ~3; return(s); } /* * free_string - reduce the ref count on a string. Various sanity * checks applied, the best of them being that a add_string allocated * string will point to a word boundary + sizeof(int)+sizeof(short), * since malloc always allocates on a word boundary. * On systems where a short is 1/2 a word, this gives an easy check * to see whather we did in fact allocate it. * * Don't worry about the overhead for all those checks! */ /* * function called on free_string detected errors; things return checked(s). */ static void checked(s, str) char * s, *str; { fprintf(stderr, "%s (\"%s\")\n", s, str); fatal(s); /* brutal - debugging */ } void free_string(str) char * str; { char * s; allocd_strings--; allocd_bytes -= 4 + (strlen(str) + 3) & ~3; #ifndef BUG_FREE #ifdef dcheck /* GNU malloc range check flag */ { int align; align = (((int)str) - sizeof(int) - sizeof(short)) & WORD_B_MASK; if (align) checked("Free string: improperly aligned string!", str); } #endif /* dcheck */ #endif s = findstring(str); /* moves it to head of table if found */ #ifndef BUG_FREE if (!s) { checked("Free string: not found in string table!", str); return; } if (s != str) { checked("Free string: string didnt hash to the same spot!", str); return; } if (REFS(s) <= 0) { checked("Free String: String refs zero or -ve!", str); return; } #endif /* BUG_FREE */ if (REFS(s) > MAXSHORT) return; REFS(s)--; if (REFS(s) > 0) return; /* It will be at the head of the hash chain */ base_table[StrHash(str)] = NEXT(s); num_distinct_strings--; /* We know how much overhead malloc has */ bytes_distinct_strings-= 4 + (strlen(s) + 3) & ~3; overhead_bytes -= sizeof(short) + sizeof(char *); free(s-sizeof(short)-sizeof(char *)); return; } /* * you think this looks bad! and we didn't even tell them about the * GNU malloc overhead! tee hee! */ int add_string_status(verbose) int verbose; { if (verbose) { add_message("\nShared string hash table:\n"); add_message("-------------------------\t Strings Bytes\n"); } add_message("Strings malloced\t\t%8d %8d + %d overhead\n", num_distinct_strings, bytes_distinct_strings, overhead_bytes); if (verbose) { add_message("Total asked for\t\t\t%8d %8d\n", allocd_strings, allocd_bytes); add_message("Space actually required/total string bytes %d%%\n", (bytes_distinct_strings + overhead_bytes)*100 / allocd_bytes); add_message("Searches: %d Average search length: %6.3f\n", num_str_searches, (double)search_len / num_str_searches); } return(bytes_distinct_strings + overhead_bytes); }