/home66/gary/public_html/cloudy/c08_branch/source/hash.cpp

Go to the documentation of this file.
00001 /* This file is part of Cloudy and is copyright (C)1978-2008 by Gary J. Ferland and
00002  * others.  For conditions of distribution and use see copyright notice in license.txt */
00003 #include "cddefines.h"
00004 #include "hash.h"
00005 
00006 /* Implement sorted linear hash table -- automatically expands to
00007  * get uniform bin filling, requires good hash function as 2^n masking is used.
00008  *
00009  * References
00010  *
00011  * W. Litwin, Linear hashing: A new tool for file and table addressing, 
00012  * Proc. 6th Conference on Very Large Databases, pages 212-223, 1980.
00013  *
00014  * http://www.concentric.net/~Ttwang/tech/sorthash.htm
00015  *
00016  */
00017 
00018 /* Function to calculate 4 byte hash index -- must be good for 2^n table */
00019 STATIC unsigned long hashfunction(const void *t, const size_t len);
00020 
00021 /* Internal utility functions for hash table */
00022 STATIC entry *lookup_key(const void *key, size_t *lkey, const hashtab *table, 
00023                                                                                                  unsigned long *hk);
00024 STATIC void extend(hashtab *table);
00025 STATIC unsigned long getbin(unsigned long hk, const hashtab *table);
00026 
00027 
00028 hashtab *newhash(void (*freedata)(void *))
00029 {
00030         hashtab *self;
00031 
00032         DEBUG_ENTRY("newhash()");
00033 
00034         self = (hashtab *) MALLOC(sizeof(hashtab));
00035         self->freedata = freedata;
00036         self->nelem              = 0;
00037         self->size               = 0x1;
00038         self->fullmask = 0x1;
00039         self->frontmask = 0x0;
00040         self->space = 128;
00041         self->hashfunction = hashfunction;
00042         self->tab = (entry **) MALLOC(self->space*sizeof(entry *));
00043         self->tab[0] = NULL;
00044 
00045         return self;
00046 }
00047 
00048 void freehash(hashtab *table)
00049 {
00050         entry *s, *t;
00051         unsigned long i;
00052 
00053         DEBUG_ENTRY("freehash()");
00054 
00055         for(i=0;i<table->size;i++) 
00056         {
00057                 s = table->tab[i];
00058                 while (s != NULL) 
00059                 {
00060                         t = s->next;
00061                         if(table->freedata != NULL) 
00062                         {
00063                                 table->freedata(s->data.p);
00064                         }
00065                         free(s);
00066                         s = t;
00067                 }
00068         }
00069 
00070         free(table->tab);
00071         free(table);
00072 }
00073 
00074 data_u *addentry(const void *key, size_t lkey, hashtab *table, int *exists)
00075 {
00076         unsigned long hk, i;
00077         entry *s;
00078         void *p;
00079 
00080         DEBUG_ENTRY("addentry()");
00081 
00082         s = lookup_key(key,&lkey,table,&hk);
00083         if(s) 
00084         { 
00085                 *exists = 1;
00086                 return &(s->data);
00087         }
00088 
00089         *exists = 0;
00090         p = MALLOC(sizeof(entry)+lkey);
00091         s = (entry *) p;
00092         s->hashval = hk;
00093         s->data.lkey = lkey;
00094         s->data.key = (void *)(((char *)p)+sizeof(entry));
00095         memcpy(s->data.key,key,lkey);
00096 
00097         i = getbin(hk,table);
00098         s->next = table->tab[i];
00099         table->tab[i] = s;
00100 
00101         table->nelem++;
00102 
00103         extend(table);
00104 
00105         return &(s->data);
00106 }
00107 
00108 data_u *lookup(const void *key, size_t lkey, const hashtab *table)
00109 {
00110         unsigned long hk;
00111         entry *s;
00112 
00113         if(table->nelem == 0)
00114                 return NULL;
00115         s = lookup_key(key,&lkey,table,&hk);
00116         if(s == NULL)
00117                 return NULL;
00118         return &(s->data);
00119 }
00120 
00121 int maxchain(const hashtab *table)
00122 {
00123         unsigned long i, l, max=0;
00124         entry *s;
00125 
00126         DEBUG_ENTRY("maxchain()");
00127 
00128         for(i=0;i<table->size;i++) 
00129         {
00130                 l = 0;
00131                 for(s = table->tab[i];s != NULL;s=s->next) 
00132                 {
00133                         l++;
00134                 }
00135                 if(l > max)
00136                         max = l;
00137         }
00138 
00139         return max;
00140 }
00141 
00142 /* Internal utility functions */
00143 /* Bernstein hash, see:
00144          http://www.cse.yorku.ca/~oz/hash.html
00145          http://www.burtleburtle.net/bob/hash/doobs.html 
00146 */
00147 enum {HASHINIT=5381,HASHMUL=33}; /* Bernstein */
00148 /* enum {HASHINIT=0,HASHMUL=131}; */
00149 STATIC unsigned long hashfunction(const void *t, const size_t len)
00150 { 
00151         size_t i;
00152         unsigned long h = HASHINIT;
00153         unsigned char *s = (unsigned char *)t;
00154 
00155         DEBUG_ENTRY("hashfunction()");
00156 
00157         for( i=0; i<len; i++ ) 
00158                 h = HASHMUL*h + s[i]; 
00159 
00160         return( h );
00161 } 
00162 
00163 STATIC entry *lookup_key(const void *key, size_t *lkey, const hashtab *table, 
00164                          unsigned long *hk)
00165 {
00166         unsigned long i;
00167         entry *s;
00168 
00169         DEBUG_ENTRY("lookup_key()");
00170 
00171         if(*lkey == 0)
00172                 *lkey = strlen((char *) key)+1;
00173 
00174         *hk = table->hashfunction(key,*lkey);
00175         i = getbin(*hk,table);
00176 
00177         /* Look through list within bin */
00178         for(s = table->tab[i];s!=NULL;s=s->next) 
00179         {
00180                 /* Check for match -- full hash value will likely distinguish */
00181                 if(*hk == s->hashval &&
00182                                 *lkey == s->data.lkey    &&
00183                                 !memcmp(key,s->data.key,*lkey)) 
00184                 {
00185                         return s;
00186                 }
00187         }
00188         return NULL;
00189 }
00190 
00191 STATIC void extend(hashtab *table)
00192 {
00193         unsigned long move, last, i, j;
00194         entry *t, *s;
00195 
00196         DEBUG_ENTRY("extend()");
00197         last = table->size;
00198         table->size++;
00199         if(last > table->fullmask) 
00200         {        /* Extend table when full */
00201                 table->frontmask = table->fullmask;
00202                 table->fullmask <<= 1;
00203                 table->fullmask |= 1;
00204                 if(table->fullmask+1 > table->space) 
00205                 {
00206                         table->space = table->fullmask+1;
00207                         table->tab = (entry **) 
00208                                 REALLOC(table->tab,(table->space)*sizeof(entry *));
00209                 }
00210         }
00211 
00212         /* Split next bin in front part */
00213         i = last & table->frontmask;    
00214         t = table->tab[i];
00215         table->tab[last] = table->tab[i] = NULL;
00216         move = table->frontmask ^ table->fullmask;
00217         while (t) 
00218         { /* Go through list and sort between [last] and [i] */
00219                 if(t->hashval & move) 
00220                 {
00221                         j = last;
00222                 } 
00223                 else 
00224                 {
00225                         j = i;
00226                 }
00227                 s = t->next;
00228                 t->next = table->tab[j];
00229                 table->tab[j] = t;
00230                 t = s;
00231         }
00232 }
00233 
00234 STATIC unsigned long getbin(unsigned long hk, const hashtab *table)
00235 {
00236         unsigned long i;
00237 
00238         DEBUG_ENTRY("getbin()");
00239         i = hk & table->fullmask; 
00240         if(i >= table->size)
00241                 i &= table->frontmask;
00242         assert (i < table->size && i < table->space);
00243         return i;
00244 }
00245 
00246 /* Copy data from hash into list, 
00247          optionally selected according to a masking function */
00248 unsigned long makelist(const hashtab *table, data_u **list, 
00249                                                                                          const unsigned long nlist, int (*maskfun)(data_u *dat))
00250 {
00251         unsigned long i, n;
00252         entry *s;
00253 
00254         DEBUG_ENTRY("makelist()");
00255 
00256         n = 0;
00257         for(i=0;i<table->size;i++) 
00258         {
00259                 for(s = table->tab[i];s != NULL;s = s->next) 
00260                 {
00261                         if(maskfun == NULL || maskfun(&(s->data))) 
00262                                 list[n++] = &(s->data);
00263                         if(n > nlist)
00264                         {
00265                                 fprintf(ioQQQ,"Too many list items for array provided in makelist\n");
00266                                 cdEXIT(EXIT_FAILURE);
00267                         }
00268                 }
00269         }
00270         return n;
00271 }
00272 unsigned long makeplist(const hashtab *table, void **list, 
00273                                                                                          const unsigned long nlist, int (*maskfun)(data_u *dat))
00274 {
00275         unsigned long i, n;
00276         entry *s;
00277 
00278         DEBUG_ENTRY("makeplist()");
00279         n = 0;
00280         for(i=0;i<table->size;i++) 
00281         {
00282                 for(s = table->tab[i];s != NULL;s = s->next) 
00283                 {
00284                         if(maskfun == NULL || maskfun(&(s->data))) 
00285                                 list[n++] = s->data.p;
00286                         if(n > nlist)
00287                         {
00288                                 fprintf(ioQQQ,"Too many list items for array provided in makeplist\n");
00289                                 cdEXIT(EXIT_FAILURE);
00290                         }
00291                 }
00292         }
00293         return n;
00294 }
00295 
00296 #ifdef TEST
00297 main()
00298 {
00299         hashtab *table;
00300         data_u *data;
00301         int i, exists;
00302 
00303         char strings[][15] = {"Hello","Goodbye","Thirteen","One",
00304                         "Two","Three","Four","Five","Six",
00305                         "Seven","Eight"};
00306 
00307         table = newhash(NULL);
00308         for(i=0;i<sizeof(strings)/15;i++) 
00309         {
00310                 data = addentry(strings[i],0,table,&exists);
00311                 data->i = i;
00312         }
00313 
00314         for(i=0;i<sizeof(strings)/15;i++) 
00315         { 
00316                 data = lookup(strings[i],0,table);
00317                 if(data) 
00318                 {
00319                         printf("Found data %d\n",data->i);
00320                 }
00321                 else 
00322                 {
00323                         printf("Couldn't find data\n");
00324                 }
00325         }
00326         data = lookup("Banana",0,table);
00327         if(data) 
00328         {
00329                 printf("Found data %d\n",data->i);
00330         } 
00331         else 
00332         {
00333                 printf("Couldn't find data (as expected)\n");
00334         }
00335         printf("Longest chain is %d\n",maxchain(table));
00336 }
00337 #endif

Generated on Mon Feb 16 12:01:15 2009 for cloudy by  doxygen 1.4.7