Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

LabelDict.hpp

Go to the documentation of this file.
00001 /*
00002 This product contains certain software code or other information
00003 ("AT&T Software") proprietary to AT&T Corp. ("AT&T").  The AT&T
00004 Software is provided to you "AS IS".  YOU ASSUME TOTAL RESPONSIBILITY
00005 AND RISK FOR USE OF THE AT&T SOFTWARE.  AT&T DOES NOT MAKE, AND
00006 EXPRESSLY DISCLAIMS, ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND
00007 WHATSOEVER, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
00008 MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, WARRANTIES OF
00009 TITLE OR NON-INFRINGEMENT OF ANY INTELLECTUAL PROPERTY RIGHTS, ANY
00010 WARRANTIES ARISING BY USAGE OF TRADE, COURSE OF DEALING OR COURSE OF
00011 PERFORMANCE, OR ANY WARRANTY THAT THE AT&T SOFTWARE IS "ERROR FREE" OR
00012 WILL MEET YOUR REQUIREMENTS.
00013 
00014 Unless you accept a license to use the AT&T Software, you shall not
00015 reverse compile, disassemble or otherwise reverse engineer this
00016 product to ascertain the source code for any AT&T Software.
00017 
00018 (c) AT&T Corp. All rights reserved.  AT&T is a registered trademark of AT&T Corp.
00019 
00020 ***********************************************************************
00021 
00022 History:
00023 
00024       24/11/99  - initial release by Hartmut Liefke, liefke@seas.upenn.edu
00025                                      Dan Suciu,      suciu@research.att.com
00026 */
00027 
00028 //**************************************************************************
00029 //**************************************************************************
00030 
00031 // This module contains the label dictionary manager
00032 // The label dictionary stores the associations between label IDs and
00033 // the actual label name.
00034 
00035 
00036 #ifndef LABELDICT_HPP
00037 #define LABELDICT_HPP
00038 
00039 // Maximum number of labels
00040 #define MAXLABEL_NUM       65535L
00041 
00042 #include "Types.hpp"
00043 #include "MemStreamer.hpp"
00044 #include "Load.hpp"
00045 
00046 #ifdef XMILL
00047 #include "Compress.hpp"
00048 #endif
00049 
00050 #ifdef XDEMILL
00051 #include "SmallUncompress.hpp"
00052 #endif
00053 
00054 #define ISATTRIB(labelid)  (((labelid)&ATTRIBLABEL_STARTIDX)?1:0)
00055 
00056 extern MemStreamer mainmem;
00057 
00058 #ifdef XMILL
00059 
00060 // For the compressor, the dictionary is implemented as a hash table,
00061 // since we need to look up the ID for a given name
00062 
00063 #define HASHTABLE_SIZE    256
00064 #define HASHTABLE_MASK    255
00065 #define HASHTABLE_SHIFT   8
00066 #define HASHTABLE_SHIFT2  16
00067 #define HASHTABLE_SHIFT3  24
00068 
00069 // For the compressor, we store each label dictionary item
00070 // in the following data structure.
00071 // The label data is stored *directly* after the CompressLabelDictItem object.
00072 
00073 struct CompressLabelDictItem
00074 
00075 {
00076    unsigned short          len;           // The length of the label
00077    TLabelID                labelid;       // The label ID
00078    CompressLabelDictItem   *nextsamehash; // The next label with the same hash value
00079    CompressLabelDictItem   *next;         // The label with the next higher ID
00080 
00081    char IsAttrib()   {  return ISATTRIB(labelid);  }
00082 
00083    char *GetLabelPtr()  {  return (char *)(this+1); }
00084       // Returns the pointer to the label name
00085 
00086    unsigned short GetLabelLen()  {  return len; }
00087       // Returns the label length
00088 };
00089 #endif
00090 
00091 //******************************************************************************
00092 
00093 #ifdef XDEMILL
00094 
00095 // For the decompressor, the label dictionary is implemented through a
00096 // lookup table. New entries can be added through the decompressor runs
00097 
00098 struct UncompressLabelDictItem
00099    // Each label is represented in this structure
00100 {
00101    unsigned short    len;        // The length of the label
00102    unsigned char     isattrib;   // Is it an attribute?
00103    char              *strptr;    // The pointer to the actual string
00104 };
00105 
00106 struct UncompressLabelDict
00107    // Represents a sequence of labels.
00108    // The labels are stored in 'UncompressLabelDictItem'-objects directly
00109    // after the 'UncompressLabelDict'-object.
00110 {
00111    TLabelID                labelnum;   // The number of labels in this sequence 
00112    UncompressLabelDict     *next;      // The next label sequence
00113 
00114    UncompressLabelDictItem* GetArrayPtr()
00115       // Returns the pointer to the UncompressLabelDictItem
00116    {
00117       return (UncompressLabelDictItem *)(this+1);
00118    }
00119 };
00120 #endif
00121 
00122 //******************************************************************************
00123 
00124 class LabelDict
00125    // The actual label dictionary class
00126 {
00127    TLabelID                   labelnum;            // The number of labels
00128 
00129 #ifdef XMILL
00130    TLabelID                   predefinedlabelnum; 
00131       // The number of labels predefined through the FSMs
00132       // Those labels will *not* be deleted in a Reset() invocation
00133 
00134    CompressLabelDictItem      **hashtable;         // The hash table
00135    CompressLabelDictItem      *labels,**labelref;  // The list of labels
00136 
00137    // This describes how many of the labels already have been saved
00138    // in the compressed file - i.e. in a previous run
00139    TLabelID                   savedlabelnum;    
00140    CompressLabelDictItem      **savedlabelref;
00141 #endif
00142 
00143 #ifdef XDEMILL
00144    UncompressLabelDict        *labeldictlist;      // The list of label groups
00145    UncompressLabelDict        *lastlabeldict;      // The last label group
00146 #endif
00147 
00148 public:
00149 #ifdef PROFILE
00150    unsigned long              lookupcount,hashitercount;
00151 #endif
00152 
00153    void Init()
00154    {
00155 #ifdef PROFILE
00156       lookupcount=
00157       hashitercount=0;
00158 #endif
00159 
00160       labelnum=0;
00161 
00162 #ifdef XMILL
00163       // No labels
00164       labels=NULL;
00165       labelref=&labels;
00166 
00167       // no saved labels until now
00168       savedlabelnum=0;
00169       savedlabelref=&labels;
00170 
00171       // let's get some memory for the hash table
00172 //      mainmem.WordAlign();
00173 //      hashtable=(CompressLabelDictItem **)mainmem.GetByteBlock(sizeof(CompressLabelDictItem *)*HASHTABLE_SIZE);
00174       hashtable=(CompressLabelDictItem **)new char[sizeof(CompressLabelDictItem *)*HASHTABLE_SIZE];
00175       if(hashtable==NULL)
00176          ExitNoMem();
00177 
00178       for(int i=0;i<HASHTABLE_SIZE;i++)
00179          hashtable[i]=NULL;
00180 #endif
00181 #ifdef XDEMILL
00182       // No labels until now
00183       labeldictlist=NULL;
00184       lastlabeldict=NULL;
00185 #endif
00186    }
00187 
00188    void Reset()
00189    {  
00190 #ifdef XMILL
00191       int      i;
00192       TLabelID labelid;
00193       CompressLabelDictItem **hashtableref;
00194 
00195       // First, let's scan over the hash table to remove all
00196       // entris of not predefined labels
00197       for(i=0;i<HASHTABLE_SIZE;i++)
00198       {
00199          hashtableref=hashtable+i;
00200          while(*hashtableref!=NULL)
00201          {
00202             if((*hashtableref)->labelid>=predefinedlabelnum)
00203                // Not a predefined label ID?
00204                // ==> Delete
00205                *hashtableref=(*hashtableref)->nextsamehash;
00206             else
00207                // Otherwise, we go to the next label
00208                hashtableref=&((*hashtableref)->nextsamehash);
00209          }
00210       }
00211 
00212       // Now we need to cut the global list of labels.
00213       // We keep the first 'predefinedlabelnum' predefined labels.
00214       CompressLabelDictItem **curlabelref=&labels;
00215 
00216       for(labelid=0;labelid<predefinedlabelnum;labelid++)
00217          curlabelref=&((*curlabelref)->next);
00218 
00219       *curlabelref=NULL;
00220 
00221       labelref=curlabelref;
00222       labelnum=predefinedlabelnum;
00223 
00224       // no saved labels until now
00225       savedlabelnum=0;
00226       savedlabelref=&labels;
00227 #endif
00228 #ifdef XDEMILL
00229       // No labels until now
00230       labelnum=0;
00231       labeldictlist=NULL;
00232       lastlabeldict=NULL;
00233 #endif
00234    }
00235 #ifdef XMILL
00236    void FinishedPredefinedLabels()
00237       // Finished the creation of predefined labels
00238       // ==> We keep the number of predefined labels
00239    {
00240       predefinedlabelnum=labelnum;
00241    }
00242 #endif
00243 
00244 // ************** These are functions for the compressor ****************************
00245 
00246 #ifdef XMILL
00247 
00248    int CalcHashIdx(char *label,int len)
00249       // Computes the hash value for a given label name
00250    {
00251       int val=0;
00252 
00253       while(len>0)
00254       {
00255          val+=*label;
00256          val=(val<<(*label&7))+(val>>(32-(*label&7)));
00257          label++;
00258          len--;
00259       }
00260       return ((val>>HASHTABLE_SHIFT)+(val>>HASHTABLE_SHIFT2)+(val>>HASHTABLE_SHIFT3)+val)&HASHTABLE_MASK;
00261    }
00262 
00263    TLabelID FindLabelOrAttrib(char *label,unsigned len,unsigned char isattrib)
00264       // Finds a given label (element tag or attribute)
00265    {
00266       CompressLabelDictItem **hashref=hashtable+CalcHashIdx(label,len);
00267 
00268 #ifdef PROFILE
00269       lookupcount++;
00270 #endif
00271 
00272       // We look through the linked list of hashtable entries
00273       while(*hashref!=NULL)
00274       {
00275 #ifdef PROFILE
00276          hashitercount++;
00277 #endif
00278          if(((*hashref)->len==len)&&
00279             ((*hashref)->IsAttrib()==isattrib)&&
00280             (mymemcmp((*hashref)->GetLabelPtr(),label,len)==0))
00281 
00282             return (*hashref)->labelid;
00283 
00284          hashref=&((*hashref)->nextsamehash);
00285       }
00286       return LABEL_UNDEFINED;
00287    }
00288 
00289    TLabelID CreateLabelOrAttrib(char *label,unsigned len,unsigned char isattrib)
00290       // Creates a new label in the hash table
00291    {
00292       // Let's get some memory first
00293       mainmem.WordAlign();
00294       CompressLabelDictItem *item=(CompressLabelDictItem *)mainmem.GetByteBlock(sizeof(CompressLabelDictItem)+len);
00295       mainmem.WordAlign();
00296 
00297       // We copy the label name
00298       item->len=(unsigned short)len;
00299       mymemcpy(item->GetLabelPtr(),label,len);
00300 
00301       // We insert it into the hashtable
00302       CompressLabelDictItem **hashref=hashtable+CalcHashIdx(label,len);
00303       item->nextsamehash=*hashref;
00304       *hashref=item;
00305 
00306       // Let's add the label to the list of labels
00307       item->next=NULL;
00308 
00309       *labelref=item;
00310       labelref=&(item->next);
00311 
00312       item->labelid=labelnum;
00313       labelnum++;
00314 
00315       if(isattrib)
00316          item->labelid|=ATTRIBLABEL_STARTIDX;
00317 
00318       return item->labelid;
00319    }
00320 
00321    TLabelID GetLabelOrAttrib(char *label,unsigned len,unsigned char isattrib)
00322       // Finds or creates a label/attribute
00323    {
00324       TLabelID labelid=FindLabelOrAttrib(label,len,isattrib);
00325 
00326       if(labelid==LABEL_UNDEFINED)
00327          return CreateLabelOrAttrib(label,len,isattrib);
00328       else
00329          return labelid;
00330    }
00331 
00332    void Store(Compressor *compressor)
00333       // Stores the current content of the label dictionary in the output
00334       // compressor. Only the labels since the last storing are copied.
00335    {
00336       MemStreamer mem;
00337 
00338       // Let's store the number of labels that were inserted
00339       // since the previous storing
00340       mem.StoreUInt32(labelnum-savedlabelnum);
00341 
00342       // We go through all new labels and store them.
00343       while(*savedlabelref!=NULL)
00344       {
00345          mem.StoreSInt32(((*savedlabelref)->labelid&ATTRIBLABEL_STARTIDX)?1:0,(*savedlabelref)->GetLabelLen());
00346          mem.StoreData((*savedlabelref)->GetLabelPtr(),(*savedlabelref)->GetLabelLen());
00347          savedlabelref=&((*savedlabelref)->next);
00348       }
00349 
00350       compressor->CompressMemStream(&mem);
00351 
00352       savedlabelnum=labelnum;
00353    }
00354 #endif
00355 
00356 //**********************************************************************************
00357 //**********************************************************************************
00358 //**********************************************************************************
00359 
00360 #ifdef XDEMILL
00361 
00362 #define LABELDICT_MINLABELNUM 32
00363 
00364 // ************** These are functions for the uncompressor ****************************
00365 
00366    void Load(SmallBlockUncompressor *uncompress)
00367       // Loads the next block of labels and appends the block to the
00368       // already existing blocks in the dictionary
00369    {
00370       UncompressLabelDictItem *dictitemptr;
00371       char                    isattrib;
00372       unsigned                dictlabelnum;
00373 
00374       // Let's get the number of labels first
00375       unsigned mylabelnum=uncompress->LoadUInt32();
00376       if(mylabelnum>MAXLABEL_NUM)
00377          ExitCorruptFile();
00378 
00379       // No new labels?
00380       if(mylabelnum==0)
00381          return;
00382 
00383       if(lastlabeldict!=NULL)
00384          // We already have some labels in the dictionary?
00385       {
00386          if(lastlabeldict->labelnum<LABELDICT_MINLABELNUM)
00387             // Is there some space for more labels in the current block?
00388             // We copy as many labels as we can
00389          {
00390             dictlabelnum=LABELDICT_MINLABELNUM-lastlabeldict->labelnum;
00391             if(dictlabelnum>mylabelnum)
00392                dictlabelnum=mylabelnum;
00393 
00394             mylabelnum-=dictlabelnum;
00395 
00396             dictitemptr=lastlabeldict->GetArrayPtr()+lastlabeldict->labelnum;
00397 
00398             lastlabeldict->labelnum+=dictlabelnum;
00399             labelnum+=dictlabelnum;
00400 
00401             // We copy the actual labels now
00402             // Each label is represented by the length and the attribute-flag
00403             // Then, the actual name follows
00404             while(dictlabelnum--)
00405             {
00406                dictitemptr->len=(unsigned short)uncompress->LoadSInt32(&isattrib);
00407                dictitemptr->isattrib=isattrib;
00408 
00409                dictitemptr->strptr=mainmem.GetByteBlock(dictitemptr->len);
00410 
00411                mymemcpy(dictitemptr->strptr,
00412                         uncompress->LoadData(dictitemptr->len),
00413                         dictitemptr->len);
00414 
00415                dictitemptr++;
00416             }
00417             if(mylabelnum==0)
00418                return;
00419          }
00420          // We create a new label block for the new labels
00421          lastlabeldict->next=(UncompressLabelDict *)mainmem.GetByteBlock(
00422                                  sizeof(UncompressLabelDict)+
00423                                  ((mylabelnum>LABELDICT_MINLABELNUM)? mylabelnum : LABELDICT_MINLABELNUM)*sizeof(UncompressLabelDictItem));
00424          lastlabeldict=lastlabeldict->next;
00425       }
00426       else
00427          labeldictlist=lastlabeldict=(UncompressLabelDict *)mainmem.GetByteBlock(
00428                                        sizeof(UncompressLabelDict)+
00429                                        ((mylabelnum>LABELDICT_MINLABELNUM)? mylabelnum : LABELDICT_MINLABELNUM)*sizeof(UncompressLabelDictItem));
00430 
00431       lastlabeldict->next=NULL;
00432 
00433 //******
00434 
00435       lastlabeldict->labelnum=mylabelnum;
00436       labelnum+=mylabelnum;
00437 
00438       dictlabelnum=mylabelnum;
00439 
00440       dictitemptr=lastlabeldict->GetArrayPtr();
00441 
00442       // We copy the actual labels now
00443       // Each label is represented by the length and the attribute-flag
00444       // Then, the actual name follows
00445       while(dictlabelnum--)
00446       {
00447          dictitemptr->len=(unsigned short)uncompress->LoadSInt32(&isattrib);
00448          dictitemptr->isattrib=isattrib;
00449 
00450          dictitemptr->strptr=mainmem.GetByteBlock(dictitemptr->len);
00451          mainmem.WordAlign();
00452 
00453          mymemcpy(dictitemptr->strptr,
00454                   uncompress->LoadData(dictitemptr->len),
00455                   dictitemptr->len);
00456 
00457          dictitemptr++;
00458       }
00459    }
00460 
00461    unsigned long LookupLabel(TLabelID labelid,char **ptr,unsigned char *isattrib)
00462       // Find the name of the label with a given ID
00463    {
00464       UncompressLabelDict *labeldict=labeldictlist;
00465 
00466       // We look through all the blocks
00467       while(labelid>=labeldict->labelnum)
00468       {
00469          labelid-=labeldict->labelnum;
00470          labeldict=labeldict->next;
00471       }
00472       UncompressLabelDictItem *item=labeldict->GetArrayPtr()+labelid;
00473 
00474       *isattrib=item->isattrib;
00475       *ptr=item->strptr;
00476       return item->len;
00477    }
00478 #endif
00479 
00480 //**********************************************************************************
00481 //**********************************************************************************
00482 
00483 #ifdef XMILL
00484    unsigned long LookupCompressLabel(TLabelID labelid,char **ptr)
00485       // Finds the name of a label with a given ID in the compressor
00486       // In the compressor, we need to traverse the entire list
00487       // of labels ==> Relatively inefficient - but we only
00488       // need that for printing
00489    {
00490       CompressLabelDictItem *item=labels;
00491 
00492       labelid=(labelid&(ATTRIBLABEL_STARTIDX-1));
00493 
00494       while(labelid--)
00495          item=item->next;
00496 
00497       *ptr=item->GetLabelPtr();
00498       return item->GetLabelLen();
00499    }
00500 #endif
00501 
00502    void PrintLabel(TLabelID labelid)
00503       // Prints the label with labelid 'labelid'
00504    {
00505       char           *ptr;
00506       unsigned long  len;
00507       unsigned char  isattrib=ISATTRIB(labelid);
00508 
00509       labelid=(labelid&(ATTRIBLABEL_STARTIDX-1));
00510 
00511 #ifdef XMILL
00512       len=LookupCompressLabel(labelid,&ptr);
00513 #endif
00514 #ifdef XDEMILL
00515       len=LookupLabel(labelid,&ptr,&isattrib);
00516 #endif
00517 
00518       if(isattrib)   // Attribute names start with '@'
00519       {
00520          printf("@");
00521          fwrite(ptr,len,1,stdout);
00522       }
00523       else
00524          fwrite(ptr,len,1,stdout);
00525    }
00526 
00527    void Print()
00528       // Prints all labels in the dictionary
00529    {
00530       for(unsigned long i=0;i<labelnum;i++)
00531       {
00532          printf("%lu : ",i);
00533          PrintLabel((TLabelID)i);
00534          printf("\n");
00535       }
00536    }
00537 
00538 #ifdef PROFILE
00539    void PrintProfile()
00540    {
00541       printf("Labeldict: count=%lu lookupcount=%lu   hashitercount=%lu\n",labelnum,lookupcount,hashitercount);
00542    }
00543 #endif
00544 };
00545 
00546 
00547 //***************************************************************
00548 
00549 // There is one global dictionary
00550 extern LabelDict globallabeldict;
00551 
00552 #endif

Generated on Sat Oct 13 16:08:38 2001 for XMILL by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001