Main Page | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members

shashtbl.cpp

Go to the documentation of this file.
00001 // +-------------------------------------------------------------------------+
00002 // |               I__n__t__e__L__i__b           0.6.10 development          |
00003 // | Copyright (c) Andrey Vikt. Stolyarov <crocodil_AT_croco.net> 2000-2007. |
00004 // |                                                                         |
00005 // | This is free software. The library part is available under              |
00006 // |                               GNU LESSER GENERAL PUBLIC LICENSE v.2.1.  |
00007 // | GNU LGPL v2.1 is found in docs/gnu_gpl2.txt,  or at  http://www.gnu.org |
00008 // |     Please see also docs/readme.txt and visit http://www.intelib.org    |
00009 // |                                                                         |
00010 // | !!! THERE IS NO WARRANTY OF ANY KIND, NEITHER EXPRESSED NOR IMPLIED !!! |
00011 // +-------------------------------------------------------------------------+
00012 
00013 
00014 
00015 
00016 #include "iexcept.hpp"
00017 #include "shashtbl.hpp"
00018 #if INTELIB_TEXT_REPRESENTATIONS == 1
00019 #  include "sstring.hpp"
00020 #endif
00021 
00022 
00023 static const intelib_hash_t hash_multiplier = 0x9c406bb5;
00024 // Recommended by Knuth
00025 
00026 static const intelib_hash_t hash_xor_op = 0x12fade34;
00027 // arbitrary
00028 
00029 static const unsigned long hash_sizes[] = {
00030             11,     /* > 8 */
00031             17,     /* > 16 */
00032             37,     /* > 32 */
00033             67,     /* > 64 */
00034             131,    /* > 128 */
00035             257,    /* > 256 */
00036             521,    /* > 512 */
00037             1031,   /* > 1024 */
00038             2053,   /* > 2048 */
00039             4099,   /* > 4096 */
00040             8209,   /* > 8192 */
00041             16411,  /* > 16384 */
00042             32771,  /* > 32768 */
00043             65537,  /* > 65536 */
00044             131101, /* > 131072 */
00045             262147, /* > 262144 */
00046             524309, /* > 524288 */
00047             1048583, /* > 1048576 */
00048             2097169, /* > 2097152 */
00049             4194319, /* > 4194304 */
00050             8388617, /* > 8388608 */
00051             16777259, /* > 16777216 */
00052             33554467, /* > 33554432 */
00053             67108879, /* > 67108864 */
00054             134217757, /* > 134217728 */
00055             /* we don't need sizes larger that 2**27; */
00056             /* allocating such an array would crash the program anyway */
00057             0       /* end-of-table mark */
00058         };
00059 
00060 
00061 intelib_hash_t UniversalHash(intelib_hash_t l)
00062 {
00063     return hash_multiplier * (l ^ hash_xor_op);
00064 }
00065 
00066 static
00067 intelib_hash_t CharStringSum(const char *s)
00068 {
00069     register intelib_hash_t res = 7;
00070     // Arbitrary; to enforce Hash(0) != Hash("").
00071 
00072     register int i = 0;
00073     while(*s) {
00074         res ^= (((intelib_hash_t) *s) << (8*(i%4)));
00075         // Every byte is XOR'ed with 0th, 1st, 2nd, 3rd, 0th... byte of res.
00076         s++; i++;
00077     }
00078     return res;
00079 }
00080 
00081 #if 0
00082 static
00083 intelib_hash_t MemAreaSum(const unsigned char *p, int l)
00084 {
00085     intelib_hash_t res = 13; // Arbitrary; to enforce Hash(0) != Hash("").
00086     register int i;
00087     for(i=0; i<l; i++) {
00088         res ^= (((intelib_hash_t) p[i]) << (8*(i%4)));
00089         // Every byte is XOR'ed with 0th, 1st, 2nd, 3rd, 0th... byte of res.
00090     }
00091     return res;
00092 }
00093 #endif
00094 
00095 intelib_hash_t LispHash(const SReference &ref) {
00096     // Keep It Simple, Stupid...
00097     return UniversalHash(CharStringSum(ref->TextRepresentation().c_str()));
00098     // This might be too slow for particular purposes so... so what? ;-)
00099 }
00100 
00101 
00102 bool SExprsAreEq(SReference l1, SReference l2)
00103 { return l1 == l2; }
00104 bool SExprsAreEql(SReference l1, SReference l2)
00105 { return l1.IsEql(l2); }
00106 bool SExprsAreEqual(SReference l1, SReference l2)
00107 { return l1.IsEqual(l2); }
00108 
00110 // class SExpressionHashTable
00111 
00112 IntelibTypeId SExpressionHashTable::TypeId(&SExpression::TypeId, true);
00113 
00114 SLabel SExpressionHashTable::EmptySlotMark("#<HASH_EMPTY_SLOT>");
00115 
00116 SExpressionHashTable::SExpressionHashTable(SEqualityPredicate eqp)
00117         : SExpression(TypeId)
00118 {
00119     dim = hash_sizes[0];
00120     comp = eqp ? eqp : SExprsAreEqual;
00121     table = new SReference[dim];
00122     itemcount = 0;
00123     lastfoundpos = -1;
00124 }
00125 
00126 SExpressionHashTable::SExpressionHashTable(const IntelibTypeId &id,
00127         SEqualityPredicate eqp)
00128         : SExpression(id)
00129 {
00130     dim = hash_sizes[0];
00131     comp = eqp ? eqp : SExprsAreEqual;
00132     table = new SReference[dim];
00133     itemcount = 0;
00134     lastfoundpos = -1;
00135 }
00136 
00137 SExpressionHashTable::~SExpressionHashTable()
00138 {
00139     delete[] table;
00140 }
00141 
00142 SExpression* SExpressionHashTable::Clone() const
00143 {
00144     SExpressionHashTable *ret = new SExpressionHashTable(comp);
00145     delete[] ret->table;
00146     ret->dim = dim;
00147     ret->table = new SReference[dim];
00148     ret->itemcount = itemcount;
00149     ret->lastfoundpos = lastfoundpos;
00150     for(unsigned int i=0; i<dim; i++)
00151         ret->table[i] = table[i].Clone();
00152     return ret;
00153 }
00154 
00155 void SExpressionHashTable::AddItem(SReference key, SReference val)
00156 {
00157     GetItemPosition(key) = val;
00158 }
00159 
00160 bool SExpressionHashTable::SafeAddItem(SReference key, SReference val)
00161 {
00162     SReference &pos = GetItemPosition(key);
00163     if(pos.GetPtr() != EmptySlotMark.GetPtr())
00164         return false;
00165     else {
00166         pos = val;
00167         return true;
00168     }
00169 }
00170 
00171 SReference SExpressionHashTable::
00172 FindItem(SReference key, const SReference &defval) const
00173 {
00174     unsigned long pos = LispHash(key) % dim;
00175     while(table[pos].GetPtr()) {
00176         SExpressionCons *dp =
00177             static_cast<SExpressionCons*>(table[pos].GetPtr());
00178         if((comp)(dp->Car(), key)) {
00179             // found !
00180             return dp->Cdr();
00181         }
00182         pos = pos ? pos-1 : dim-1;
00183     }
00184     // Not found
00185     return SReference(defval);
00186 }
00187 
00188 SReference& SExpressionHashTable::GetItemPosition(SReference key)
00189 {
00190     HandleLastFoundPos();
00191     intelib_hash_t pos = LispHash(key) % dim;
00192     while(table[pos].GetPtr()) {
00193         SExpressionCons *dp =
00194             static_cast<SExpressionCons*>(table[pos].GetPtr());
00195         if((comp)(dp->Car(), key)) {
00196             // found !
00197             return dp->Cdr();
00198         }
00199         pos = pos ? pos-1 : dim-1;
00200     }
00201     itemcount++;
00202     if(itemcount * 3 > dim * 2) {
00203         // This is rare event so we can leave it unoptimized
00204         ResizeTable();
00205         // table changed; recompute the pos assuming the item isn't found
00206         pos = LispHash(key) % dim;
00207         while(table[pos].GetPtr()) pos = pos ? pos-1 : dim-1;
00208         // ResizeTable destroyed our itemcount
00209         itemcount++;
00210     }
00211     SExpressionCons *cdp;
00212     table[pos] = SReference(cdp = new SExpressionCons(key, EmptySlotMark));
00213     lastfoundpos = pos;
00214     return cdp->Cdr();
00215 }
00216 
00217 bool SExpressionHashTable::RemoveItem(SReference key)
00218 {
00219     intelib_hash_t pos = LispHash(key) % dim;
00220     while(table[pos].GetPtr()) {
00221         SExpressionCons *dp =
00222             static_cast<SExpressionCons*>(table[pos].GetPtr());
00223         if((comp)(dp->Car(), key)) {
00224             DoRemoveItem(pos);
00225         }
00226         pos = pos ? pos-1 : dim-1;
00227     }
00228     // Not found
00229     return false;
00230 }
00231 
00232 void SExpressionHashTable::Clear()
00233 {
00234     for(unsigned int i = 0; i< dim; i++)
00235         table[i] = SReference();
00236     itemcount = 0;
00237     lastfoundpos = -1;
00238 }
00239 
00240 SString SExpressionHashTable::TextRepresentation() const
00241 {
00242     SString res("#S(HASH-TABLE ");
00243     if(comp == SExprsAreEq)
00244         res += "EQ";
00245     else
00246     if(comp == SExprsAreEql)
00247         res += "EQL";
00248     else
00249     if(comp == SExprsAreEqual)
00250         res += "EQUAL";
00251     else
00252         res += "???";
00253     HandleLastFoundPos();
00254     for(unsigned int i = 0; i< dim; i++) {
00255         if(table[i].GetPtr()) {
00256             res += " ";
00257             res += table[i]->TextRepresentation();
00258         }
00259     }
00260     res += ")";
00261     return res;
00262 }
00263 
00264 void SExpressionHashTable::DoRemoveItem(long pos)
00265 {
00266     /* We implement the Don Knuth' algorythm of removing items */
00267     /* see vol. 3, section 6.4 */
00268     itemcount--;
00269     table[pos] = SReference();
00270     for(long i = pos?pos-1:dim-1; table[i].GetPtr(); i =i?i-1:dim-1) {
00271         SExpressionCons *dp = static_cast<SExpressionCons*>(table[i].GetPtr());
00272         long r = LispHash(dp->Car()) % dim;
00273         if( (i <= r && r < pos) ||
00274                 ((pos < i) && (r < pos || i <= r))
00275           ) continue;
00276         table[pos] = table[i];
00277         table[i] = SReference();
00278         pos = i;
00279     }
00280 }
00281 
00282 void SExpressionHashTable::ResizeTable()
00283 {
00284     HandleLastFoundPos();
00285     unsigned long newdim = 0;
00286     for(int i=1; hash_sizes[i]; i++) {
00287         if(hash_sizes[i] > dim) {
00288             newdim = hash_sizes[i];
00289             break;
00290         }
00291     }
00292     if(!newdim) throw IntelibX_hash_table_too_big(SReference(this));
00293     long olddim = dim;
00294     SReference *oldtable = table;
00295     dim = newdim;
00296     table = new SReference[newdim];
00297     itemcount = 0;
00298     for(long i = 0; i<olddim; i++) {
00299         if(oldtable[i].GetPtr()) {
00300             SExpressionCons *dp =
00301                 static_cast<SExpressionCons*>(oldtable[i].GetPtr());
00302             GetItemPosition(dp->Car()) = dp->Cdr();
00303             lastfoundpos = -1; // for efficiency
00304         }
00305     }
00306     delete [] oldtable;
00307 }
00308 
00309 void SExpressionHashTable::HandleLastFoundPos() const
00310 {
00311 
00312     SExpressionHashTable *p = const_cast<SExpressionHashTable*>(this);
00313     if(lastfoundpos != -1) {
00314         SExpressionCons *dp =
00315             static_cast<SExpressionCons*>(table[lastfoundpos].GetPtr());
00316         if(dp && dp->Cdr().GetPtr() == EmptySlotMark.GetPtr()) {
00317             table[lastfoundpos] = SReference();
00318             p->itemcount--;
00319         }
00320         p->lastfoundpos = -1;
00321     }
00322 }
00323 
00324 SExpressionHashTable::Iterator::Iterator(const SExpressionHashTable &ref)
00325 {
00326     ref.HandleLastFoundPos();
00327     tbl = ref.table;
00328     idx = 0;
00329     lim = ref.dim;
00330 }
00331 
00332 
00333 SReference& SExpressionHashTable::Iterator::GetNext()
00334 {
00335 #if 1
00336     while(idx<lim &&
00337             (!tbl[idx].GetPtr() ||
00338              (static_cast<SExpressionCons*>(tbl[idx].GetPtr())->Cdr().GetPtr()
00339               == EmptySlotMark.GetPtr())
00340             )
00341          )
00342 #else 
00343     while(idx<lim && !tbl[idx].GetPtr())
00344 #endif
00345     {
00346         idx++;
00347     }
00348     if(idx>=lim) {
00349         static SReference unbound;
00350         return unbound;
00351     }
00352     return tbl[idx++];
00353 }
00354 
00355 
00357 
00358 IntelibX_not_a_hash_table::
00359 IntelibX_not_a_hash_table(SReference a_param)
00360         : IntelibX("Not a hash table", a_param) {}
00361 
00362 IntelibX_hash_table_too_big::
00363 IntelibX_hash_table_too_big(SReference a_param)
00364         : IntelibX("Hash table too big", a_param) {}
00365 

Generated on Tue Dec 18 00:39:45 2007 for InteLib by  doxygen 1.4.1