00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 #include "CompressMan.hpp"
00045
00046 #ifdef XDEMILL
00047 #include "UnCompCont.hpp"
00048 #include "SmallUncompress.hpp"
00049 #endif
00050
00051 extern MemStreamer blockmem;
00052
00053 #define SMALLCOMPRESS_THRESHOLD 1024
00054
00055
00056
00057
00058
00059 #define enumcompressmem (&blockmem)
00060
00061
00062
00063
00064
00065 #ifdef XMILL
00066
00067
00068 #define ENUMHASHTABLE_SIZE 32768
00069 #define ENUMHASHTABLE_MASK 32767
00070 #define ENUMHASHTABLE_SHIFT 15
00071
00072 struct EnumCompressState;
00073
00074 struct EnumHashEntry
00075
00076 {
00077 EnumHashEntry *nextsamehash;
00078 unsigned short datalen;
00079 char *dataptr;
00080
00081 EnumCompressState *enumstate;
00082 unsigned localidx;
00083
00084
00085 void *operator new(size_t size)
00086 {
00087 return enumcompressmem->GetByteBlock(size);
00088 }
00089
00090 char *GetStrPtr() { return dataptr; }
00091 };
00092
00093
00094
00095 class EnumHashTable
00096
00097 {
00098 static EnumHashEntry *hashtable[ENUMHASHTABLE_SIZE];
00099 static char isinitialized;
00100
00101 static inline unsigned CalcHashIdx(char *str,int len)
00102
00103 {
00104 unsigned idx=len;
00105 while(len--)
00106 {
00107 idx=(idx<<3)+(idx>>29)+(idx>>(idx&1));
00108 idx+=(unsigned char)*str;
00109 str++;
00110 }
00111 return (idx+(idx>>ENUMHASHTABLE_SHIFT))&ENUMHASHTABLE_MASK;
00112 }
00113
00114 public:
00115
00116 EnumHashTable()
00117 {
00118 isinitialized=0;
00119 }
00120
00121 static void Initialize()
00122
00123 {
00124 if(isinitialized==0)
00125 {
00126 for(int i=0;i<ENUMHASHTABLE_SIZE;i++)
00127 hashtable[i]=NULL;
00128
00129 isinitialized=1;
00130 }
00131 }
00132
00133 static void Reset()
00134
00135
00136 {
00137 isinitialized=0;
00138 }
00139
00140 static EnumHashEntry *FindOrCreateEntry(char *str,int len,EnumCompressState *enumstate,char *isnew,MemStreamer *strmem)
00141
00142
00143
00144 {
00145
00146 unsigned hashidx=CalcHashIdx(str,len);
00147 EnumHashEntry **hashentryref=hashtable+hashidx;
00148 char *ptr1,*ptr2;
00149
00150
00151
00152 while(*hashentryref!=NULL)
00153 {
00154 if(((*hashentryref)->datalen==len)&&
00155 ((*hashentryref)->enumstate==enumstate))
00156 {
00157 ptr1=str;
00158 ptr2=(*hashentryref)->GetStrPtr();
00159
00160
00161 if(mymemcmp(ptr1,ptr2,len)==0)
00162
00163 {
00164 *isnew=0;
00165 return *hashentryref;
00166 }
00167 }
00168 hashentryref=&((*hashentryref)->nextsamehash);
00169 }
00170
00171
00172
00173
00174 *hashentryref=new EnumHashEntry();
00175
00176 (*hashentryref)->nextsamehash =NULL;
00177 (*hashentryref)->enumstate =enumstate;
00178
00179
00180 strmem->StoreUInt32(len);
00181
00182
00183 (*hashentryref)->datalen =len;
00184 (*hashentryref)->dataptr =strmem->GetByteBlock(len);
00185
00186
00187
00188 memcpy((*hashentryref)->dataptr,str,len);
00189
00190
00191 *isnew=1;
00192
00193 return *hashentryref;
00194 }
00195 };
00196
00197 EnumHashTable enumhashtable;
00198 EnumHashEntry *EnumHashTable::hashtable[ENUMHASHTABLE_SIZE];
00199 char EnumHashTable::isinitialized;
00200
00201
00202
00203
00204
00205
00206
00207 struct EnumCompressState
00208
00209 {
00210 unsigned long curidx;
00211
00212 MemStreamer stringmem;
00213
00214 unsigned long compressed_size;
00215 unsigned long uncompressed_size;
00216 EnumCompressState *next;
00217
00218 };
00219
00220
00221
00222 void AddEnumCompressState(EnumCompressState *state);
00223
00224
00225
00226
00227
00228 class EnumerationCompressor : public UserCompressor
00229
00230 {
00231 public:
00232 EnumerationCompressor()
00233 {
00234 datasize=sizeof(EnumCompressState);
00235 contnum=1;
00236 isrejecting=0;
00237 canoverlap=1;
00238 isfixedlen=0;
00239 }
00240
00241 void InitCompress(CompressContainer *cont,char *dataptr)
00242
00243 {
00244 ((EnumCompressState *)dataptr)->curidx=0;
00245
00246
00247
00248 ((EnumCompressState *)dataptr)->stringmem.Initialize(0);
00249
00250
00251
00252
00253 AddEnumCompressState((EnumCompressState *)dataptr);
00254
00255
00256 EnumHashTable::Initialize();
00257 }
00258
00259 void CompressString(char *str,unsigned len,CompressContainer *cont,char *dataptr)
00260
00261 {
00262 char isnew;
00263
00264
00265 EnumHashEntry *entry=EnumHashTable::FindOrCreateEntry(
00266 str,len,(EnumCompressState *)dataptr,
00267 &isnew,&(((EnumCompressState *)dataptr)->stringmem));
00268
00269 if(isnew)
00270 {
00271 entry->localidx=((EnumCompressState *)dataptr)->curidx;
00272 ((EnumCompressState *)dataptr)->curidx++;
00273 }
00274
00275
00276
00277
00278 cont->StoreCompressedUInt(entry->localidx);
00279 }
00280
00281
00282
00283
00284 void PrintCompressInfo(char *dataptr,unsigned long *overalluncomprsize,unsigned long *overallcomprsize)
00285
00286
00287 {
00288 unsigned long uncompsize=((EnumCompressState *)dataptr)->uncompressed_size,
00289 compsize=((EnumCompressState *)dataptr)->compressed_size;
00290
00291 *overalluncomprsize+=uncompsize;
00292 *overallcomprsize+=compsize;
00293
00294 if(compsize!=0)
00295 printf(" Enum: %8lu ==> %8lu (%f%%)\n",
00296 uncompsize,compsize,
00297 100.0f*(float)compsize/(float)uncompsize);
00298 else
00299 printf(" Enum: %8lu ==> Small...\n",uncompsize);
00300 }
00301 };
00302
00303 #endif // XMILL
00304
00305
00306
00307
00308
00309
00310
00311 struct EnumDictItem
00312
00313
00314 {
00315 unsigned long len;
00316 unsigned char *dataptr;
00317 };
00318
00319 #ifdef XDEMILL
00320
00321 struct EnumUncompressState
00322
00323 {
00324 unsigned long itemnum;
00325 unsigned long size;
00326 EnumDictItem *itemarray;
00327 unsigned char *strbuf;
00328 };
00329
00330
00331
00332
00333 class EnumerationUncompressor : public UserUncompressor
00334 {
00335 EnumUncompressState *GetNextPossibleEnumUnCompressState();
00336
00337
00338
00339
00340 public:
00341
00342 EnumerationUncompressor()
00343 {
00344 datasize=sizeof(EnumUncompressState);
00345 contnum=1;
00346 }
00347
00348 void InitUncompress(UncompressContainer *cont,char *dataptr)
00349
00350
00351 {
00352 *(EnumUncompressState *)dataptr=*GetNextPossibleEnumUnCompressState();
00353 }
00354
00355 void UncompressItem(UncompressContainer *cont,char *dataptr,XMLOutput *output)
00356
00357 {
00358 unsigned idx=cont->LoadUInt32();
00359 EnumDictItem *item=((EnumUncompressState *)dataptr)->itemarray+idx;
00360
00361 output->characters((char *)item->dataptr,item->len);
00362 }
00363 };
00364
00365
00366 #endif
00367
00368
00369
00370 class EnumerationCompressorFactory : public UserCompressorFactory
00371
00372 {
00373 unsigned enuminstancecount;
00374
00375
00376 #ifdef XMILL
00377 EnumerationCompressor enumcompress;
00378
00379 EnumCompressState *enumstatelist,
00380 **lastenumstateref;
00381 #endif
00382
00383
00384 #ifdef XDEMILL
00385 EnumerationUncompressor enumuncompress;
00386
00387
00388
00389
00390
00391 EnumUncompressState *enumuncompressstates;
00392 unsigned long activeenumuncompressstates;
00393 #endif
00394
00395 public:
00396 EnumerationCompressorFactory()
00397 {
00398 enuminstancecount=0;
00399
00400 #ifdef XMILL
00401 enumstatelist=NULL;
00402 lastenumstateref=&enumstatelist;
00403 #endif
00404
00405 #ifdef XDEMILL
00406 activeenumuncompressstates=0;
00407 #endif
00408 }
00409
00410 char *GetName() { return "e"; }
00411 char *GetDescription() { return "Compressor for small number of distinct data values"; }
00412 char IsRejecting() { return 0; }
00413 char CanOverlap() { return 1; }
00414
00415
00416 #ifdef XMILL
00417
00418 void AddEnumCompressState(EnumCompressState *state)
00419
00420 {
00421 *lastenumstateref=state;
00422 lastenumstateref=&(state->next);
00423 state->next=NULL;
00424 enuminstancecount++;
00425 }
00426
00427 UserCompressor *InstantiateCompressor(char *paramstr,int len)
00428
00429 {
00430 if(paramstr!=NULL)
00431 {
00432 Error("Enumeration compressor 'e' should not have any arguments ('");
00433 ErrorCont(paramstr,len);
00434 Error("')");
00435 return NULL;
00436 }
00437 return &enumcompress;
00438 }
00439 #endif
00440
00441
00442
00443 #ifdef XDEMILL
00444 UserUncompressor *InstantiateUncompressor(char *paramstr,int len)
00445
00446 {
00447 return &enumuncompress;
00448 }
00449 #endif
00450
00451
00452
00453
00454
00455
00456
00457
00458 #ifdef XMILL
00459 void CompressSmallGlobalData(Compressor *compressor)
00460
00461 {
00462 MemStreamer headmem;
00463 EnumCompressState *state=enumstatelist;
00464
00465
00466 headmem.StoreUInt32(enuminstancecount);
00467
00468
00469
00470 while(state!=NULL)
00471 {
00472 headmem.StoreUInt32(state->curidx);
00473 headmem.StoreUInt32(state->stringmem.GetSize());
00474
00475 state=state->next;
00476 }
00477
00478 compressor->CompressMemStream(&headmem);
00479
00480
00481 state=enumstatelist;
00482
00483 while(state!=NULL)
00484 {
00485 if(state->stringmem.GetSize()<SMALLCOMPRESS_THRESHOLD)
00486 compressor->CompressMemStream(&(state->stringmem));
00487 state=state->next;
00488 }
00489 }
00490
00491 void CompressLargeGlobalData(Output *output)
00492
00493
00494 {
00495 EnumCompressState *state=enumstatelist;
00496 Compressor compressor(output);
00497 unsigned long idx=0,uncompressedsize;
00498
00499 state=enumstatelist;
00500
00501 while(state!=NULL)
00502 {
00503
00504
00505 state->uncompressed_size=state->stringmem.GetSize();
00506
00507 if(state->stringmem.GetSize()>=SMALLCOMPRESS_THRESHOLD)
00508 {
00509 compressor.CompressMemStream(&state->stringmem);
00510 compressor.FinishCompress(&uncompressedsize,&(state->compressed_size));
00511
00512 }
00513 else
00514 state->compressed_size=0;
00515
00516
00517 state->stringmem.ReleaseMemory(0);
00518
00519 state=state->next;
00520 idx++;
00521 }
00522
00523
00524
00525
00526 EnumHashTable::Reset();
00527 enumstatelist=NULL;
00528 lastenumstateref=&enumstatelist;
00529 enuminstancecount=0;
00530 }
00531
00532 unsigned long GetGlobalDataSize()
00533
00534
00535
00536 {
00537 EnumCompressState *state=enumstatelist;
00538 unsigned long size=0;
00539
00540 while(state!=NULL)
00541 {
00542 size+=sizeof(EnumDictItem)*state->curidx+
00543 WordAlignUInt(state->stringmem.GetSize());
00544 state=state->next;
00545 }
00546 return size;
00547 }
00548 #endif
00549
00550
00551
00552
00553 #ifdef XDEMILL
00554 void UncompressSmallGlobalData(SmallBlockUncompressor *uncompressor)
00555 {
00556 MemStreamer headmem;
00557 unsigned long idx=0,i,j;
00558 unsigned char *srcptr,*ptr;
00559 EnumDictItem *curitem;
00560
00561
00562 enuminstancecount=uncompressor->LoadUInt32();
00563
00564
00565 enumuncompressstates=(EnumUncompressState *)enumcompressmem->GetByteBlock(sizeof(EnumUncompressState)*enuminstancecount);
00566
00567
00568
00569 for(i=0;i<enuminstancecount;i++)
00570 {
00571 enumuncompressstates[i].itemnum=uncompressor->LoadUInt32();
00572 enumuncompressstates[i].size=uncompressor->LoadUInt32();
00573 }
00574
00575
00576 WordAlignMemBlock();
00577
00578 for(i=0;i<enuminstancecount;i++)
00579 {
00580
00581 if(enumuncompressstates[i].size<SMALLCOMPRESS_THRESHOLD)
00582 {
00583
00584 srcptr=uncompressor->LoadData(enumuncompressstates[i].size);
00585
00586
00587 enumuncompressstates[i].strbuf=AllocateMemBlock(enumuncompressstates[i].size);
00588 WordAlignMemBlock();
00589 memcpy(enumuncompressstates[i].strbuf,srcptr,enumuncompressstates[i].size);
00590
00591 ptr=enumuncompressstates[i].strbuf;
00592
00593
00594 enumuncompressstates[i].itemarray=(EnumDictItem *)AllocateMemBlock(sizeof(EnumDictItem)*enumuncompressstates[i].itemnum);
00595
00596 curitem=enumuncompressstates[i].itemarray;
00597
00598
00599 for(j=0;j<enumuncompressstates[i].itemnum;j++)
00600 {
00601
00602 curitem->len=LoadUInt32(ptr);
00603
00604 curitem->dataptr=LoadData(ptr,curitem->len);
00605
00606
00607 curitem++;
00608 }
00609
00610
00611 if(ptr!=enumuncompressstates[i].strbuf+enumuncompressstates[i].size)
00612 ExitCorruptFile();
00613 }
00614 }
00615
00616
00617 activeenumuncompressstates=0;
00618 }
00619
00620 void UncompressLargeGlobalData(Input *input)
00621
00622 {
00623 unsigned long i,j,tmpsize;
00624 unsigned char *ptr;
00625 EnumDictItem *curitem;
00626
00627 Uncompressor uncompressor;
00628
00629 WordAlignMemBlock();
00630
00631 for(i=0;i<enuminstancecount;i++)
00632 {
00633 if(enumuncompressstates[i].size>=SMALLCOMPRESS_THRESHOLD)
00634 {
00635
00636 enumuncompressstates[i].strbuf=AllocateMemBlock(enumuncompressstates[i].size);
00637 WordAlignMemBlock();
00638
00639 tmpsize=enumuncompressstates[i].size;
00640
00641
00642 if(uncompressor.Uncompress(input,enumuncompressstates[i].strbuf,&tmpsize))
00643 ExitCorruptFile();
00644
00645
00646 if(tmpsize!=enumuncompressstates[i].size)
00647 ExitCorruptFile();
00648
00649 ptr=enumuncompressstates[i].strbuf;
00650
00651
00652 enumuncompressstates[i].itemarray=(EnumDictItem *)AllocateMemBlock(sizeof(EnumDictItem)*enumuncompressstates[i].itemnum);
00653
00654 curitem=enumuncompressstates[i].itemarray;
00655
00656
00657 for(j=0;j<enumuncompressstates[i].itemnum;j++)
00658 {
00659
00660 curitem->len=LoadUInt32(ptr);
00661
00662 curitem->dataptr=LoadData(ptr,curitem->len);
00663
00664
00665 curitem++;
00666 }
00667
00668
00669 if(ptr!=enumuncompressstates[i].strbuf+enumuncompressstates[i].size)
00670 ExitCorruptFile();
00671 }
00672 }
00673 }
00674
00675 EnumUncompressState *GetNextPossibleEnumUnCompressState()
00676
00677
00678 {
00679 activeenumuncompressstates++;
00680 return enumuncompressstates+activeenumuncompressstates-1;
00681 }
00682
00683 void FinishUncompress()
00684
00685 {
00686 for(unsigned long i=0;i<enuminstancecount;i++)
00687 {
00688 FreeMemBlock(enumuncompressstates[i].strbuf,enumuncompressstates[i].size);
00689 FreeMemBlock(enumuncompressstates[i].itemarray,sizeof(EnumDictItem)*enumuncompressstates[i].itemnum);
00690 }
00691 }
00692 #endif
00693 };
00694
00695 EnumerationCompressorFactory enumcompressfactory;
00696
00697 #ifdef XMILL
00698 void AddEnumCompressState(EnumCompressState *state)
00699 {
00700 enumcompressfactory.AddEnumCompressState(state);
00701 }
00702 #endif
00703
00704 #ifdef XDEMILL
00705 EnumUncompressState *EnumerationUncompressor::GetNextPossibleEnumUnCompressState()
00706 {
00707 return enumcompressfactory.GetNextPossibleEnumUnCompressState();
00708 }
00709 #endif