00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include <stdio.h>
00034 #include <time.h>
00035 #include <stdlib.h>
00036 #include <fcntl.h>
00037
00038 #include <ctype.h>
00039
00040 #ifdef WIN32
00041 #include <conio.h>
00042 #else
00043 #endif
00044
00045 #include "CompressMan.hpp"
00046 #include "VPathExprMan.hpp"
00047 #include "Input.hpp"
00048 #include "CurPath.hpp"
00049
00050 #ifdef XMILL
00051 #include "Output.hpp"
00052 #include "XMLParse.hpp"
00053 #include "SAXClient.hpp"
00054 #include "Compress.hpp"
00055 #include "ContMan.hpp"
00056
00057 #include "xmltk.h"
00058 #include "xmltkobj.h"
00059
00060 #endif
00061
00062 #ifdef XDEMILL
00063 #include "LabelDict.hpp"
00064 #include "ISAXClient.hpp"
00065 #include "XMLOutput.hpp"
00066 #include "BXMLOutput.hpp"
00067 #include "SmallUncompress.hpp"
00068 #include "UnCompCont.hpp"
00069
00070 #endif
00071
00072 #define MAGIC_KEY 0x5e3d29e
00073
00074
00075
00076 CurPath curpath;
00077 LabelDict globallabeldict;
00078
00079 VPathExprMan pathexprman;
00080
00081 #ifdef XMILL
00082 PathTree pathtree;
00083
00084 CompressContainerMan compresscontman;
00085
00086
00087
00088
00089 CompressContainerBlock *globalcontblock;
00090
00091
00092
00093 CompressContainer *globalwhitespacecont;
00094 CompressContainer *globalspecialcont;
00095 CompressContainer *globaltreecont;
00096
00097
00098
00099 extern unsigned long fileheadersize_orig;
00100 extern unsigned long fileheadersize_compressed;
00101 #endif
00102
00103
00104
00105 #ifdef XDEMILL
00106
00107 XMLOutput output;
00108 BXMLOutput boutput;
00109
00110
00111 extern UncompressContainerMan uncomprcont;
00112
00113
00114
00115
00116
00117
00118
00119
00120 unsigned char *memoryalloc_buf=NULL;
00121 unsigned char *memoryalloc_curptr=NULL;
00122 unsigned long memoryalloc_bufsize=0;
00123
00124 extern char g_bBinary;
00125
00126 #endif
00127
00128
00129 extern char usestdout;
00130 extern char no_output;
00131
00132 #ifdef TIMING
00133 extern char timing;
00134 #endif
00135
00136 extern char globalfullwhitespacescompress;
00137 extern char verbose;
00138 extern char output_initialized;
00139 extern char delete_inputfiles;
00140 extern unsigned long memory_cutoff;
00141
00142
00143
00144
00145 MemStreamer tmpmem(5);
00146 MemStreamer mainmem(1000);
00147
00148 MemStreamer blockmem(1000);
00149
00150
00151
00152
00153
00154 #ifdef XMILL
00155 void FSMInit();
00156
00157
00158 void InitSpecialContainerSizeSum();
00159 void PrintSpecialContainerSizeSum();
00160
00161 void Compress(char *srcfile,char *destfile);
00162 #endif
00163
00164 #ifdef XDEMILL
00165 void Uncompress(char *sourcefile,char *destfile);
00166 #endif
00167
00168
00169 void PrintUsage(char showmoreoptions);
00170 int HandleAllOptions(char **argv,int argc);
00171
00172
00173
00174
00175 extern char overwrite_files;
00176 extern char skip_all_files;
00177
00178 char AskOverwriteFile(char *file)
00179
00180
00181
00182
00183 {
00184 int c;
00185
00186 if((!FileExists(file))||(overwrite_files))
00187
00188
00189 return 1;
00190
00191 printf("Overwrite file %s ? (Y(es) | N(o) | A(ll) | Q(uit)) ",file);
00192
00193
00194 do
00195 {
00196 #ifdef WIN32
00197 c=_getch();
00198 printf("\n");
00199 #else
00200 c=getchar();
00201 #endif
00202 switch(toupper(c))
00203 {
00204 case 'Y': return 1;
00205 case 'N': return 0;
00206 case 'Q': skip_all_files=1;return 0;
00207 case 'A': overwrite_files=1;return 1;
00208 }
00209 }
00210 while(1);
00211 }
00212
00213 void HandleSingleFile(char *file)
00214
00215
00216
00217 {
00218 int len=strlen(file);
00219 char *outfilename=file+len+5;
00220
00221
00222
00223 strcpy(outfilename,file);
00224
00225 try{
00226
00227 #ifdef XMILL
00228
00229
00230
00231 if((len>=4)&&(strcmp(file+len-4,".xml")==0))
00232 strcpy(outfilename+len-4,".xmi");
00233 else
00234 strcat(outfilename,".xm");
00235
00236 Compress(file,usestdout ? NULL : outfilename);
00237
00238 #ifdef PROFILE
00239 if(verbose)
00240 globallabeldict.PrintProfile();
00241 #endif
00242
00243 #endif
00244
00245 #ifdef XDEMILL
00246
00247
00248 if((len>=3)&&(strcmp(file+len-3,".xm")==0))
00249
00250 {
00251 outfilename[len-3]=0;
00252 Uncompress(file,usestdout ? NULL : outfilename);
00253 }
00254 else
00255 {
00256
00257 if((len>=4)&&(strcmp(file+len-4,".xmi")==0))
00258 {
00259 strcpy(outfilename+len-4,".xml");
00260 Uncompress(file,usestdout ? NULL : outfilename);
00261 }
00262 else
00263 {
00264
00265
00266
00267
00268 if((len>=4)&&(strcmp(file+len-4,".xml")==0))
00269 {
00270 strcpy(file+len-4,".xmi");
00271 if(FileExists(file))
00272 {
00273 Uncompress(file,usestdout ? NULL : outfilename);
00274 return;
00275 }
00276 strcpy(file+len-4,".xml");
00277 }
00278
00279
00280 strcpy(file+len,".xm");
00281
00282 if(FileExists(file)==0)
00283 {
00284 strcpy(file+len,"");
00285 Error("Could not find file '");
00286 ErrorCont(file);
00287 ErrorCont("' with extension '.xm'!");
00288 PrintErrorMsg();
00289 return;
00290 }
00291 Uncompress(file,usestdout ? NULL : outfilename);
00292 return;
00293 }
00294 }
00295 #endif
00296 }
00297 catch(XMillException *)
00298
00299 {
00300 Error("Error in file '");
00301 ErrorCont(file);
00302 ErrorCont("':");
00303 PrintErrorMsg();
00304 }
00305 }
00306
00307 void HandleFileArg(char *filepattern)
00308
00309
00310
00311
00312 {
00313 char fullpath[400];
00314 #ifdef WIN32
00315 _finddata_t finddata;
00316 long handle;
00317 char *ptr;
00318 int fullpathlen;
00319
00320
00321
00322
00323 ptr=filepattern;
00324 while(*ptr!=0)
00325 {
00326 if((*ptr=='*')||(*ptr=='?'))
00327 break;
00328 ptr++;
00329 }
00330
00331 if(*ptr==0)
00332
00333 {
00334 strcpy(fullpath,filepattern);
00335 HandleSingleFile(fullpath);
00336 return;
00337 }
00338
00339
00340
00341
00342 ptr=filepattern+strlen(filepattern)-1;
00343
00344 while(ptr>=filepattern)
00345 {
00346 if((*ptr=='\\')||(*ptr=='/'))
00347 break;
00348 ptr--;
00349 }
00350
00351 if(ptr<filepattern)
00352 {
00353
00354 *fullpath=0;
00355 fullpathlen=0;
00356 }
00357 else
00358 {
00359
00360
00361 memcpy(fullpath,filepattern,ptr-filepattern+1);
00362 fullpath[ptr-filepattern+1]=0;
00363 fullpathlen=ptr-filepattern+1;
00364 }
00365
00366
00367 handle=_findfirst(filepattern,&finddata);
00368 if(handle==-1)
00369 {
00370 printf("Could not find %s!\n",filepattern);
00371 return;
00372 }
00373
00374 do
00375 {
00376
00377 strcpy(fullpath+fullpathlen,finddata.name);
00378
00379 HandleSingleFile(fullpath);
00380 if(skip_all_files)
00381 break;
00382
00383 if(_findnext(handle,&finddata)!=0)
00384 break;
00385 }
00386 while(1);
00387
00388 _findclose(handle);
00389 #else
00390
00391
00392
00393 strcpy(fullpath,filepattern);
00394 HandleSingleFile(fullpath);
00395 #endif
00396 }
00397
00398
00399
00400
00401 #ifdef WIN32
00402
00403 int _cdecl main(int argc,char **argv)
00404
00405 #else
00406
00407 int main(int argc,char **argv)
00408
00409 #endif
00410 {
00411 int fileidx;
00412
00413
00414 #ifdef WIN32
00415 _fmode=_O_BINARY;
00416 #endif
00417
00418
00419
00420
00421
00422 InitGlobalTokenTable();
00423
00424 // Jason Kim added for Binary format as input
00425
00426
00427
00428 if(argc==1)
00429 {
00430 PrintUsage(0);
00431 return 0;
00432 }
00433
00434 #ifdef XMILL
00435 else
00436 {
00437 if((argc==2)&&(strcmp(argv[1],"-h")==0))
00438
00439 {
00440 PrintUsage(1);
00441 return 0;
00442 }
00443 }
00444 #endif
00445
00446
00447
00448 try{
00449
00450 globallabeldict.Init();
00451
00452 #ifdef XMILL
00453
00454
00455 FSMInit();
00456
00457 #ifdef USE_FORWARD_DATAGUIDE
00458 extern void InitForwardDataGuide();
00459
00460 InitForwardDataGuide();
00461 #endif
00462
00463 #endif
00464
00465
00466 fileidx=HandleAllOptions(argv+1,argc-1)+1;
00467
00468 #ifdef XMILL
00469
00470
00471 char *pathptr="//#";
00472 pathexprman.AddNewVPathExpr(pathptr,pathptr+strlen(pathptr));
00473 pathptr="/";
00474 pathexprman.AddNewVPathExpr(pathptr,pathptr+strlen(pathptr));
00475
00476 globallabeldict.FinishedPredefinedLabels();
00477
00478
00479
00480
00481 pathexprman.InitWhitespaceHandling();
00482
00483
00484
00485
00486
00487
00488 #endif
00489
00490
00491
00492
00493 if(fileidx>=argc)
00494 {
00495 if(usestdout)
00496 {
00497
00498 #ifdef XMILL
00499 Compress(NULL,NULL);
00500 #endif
00501
00502 #ifdef XDEMILL
00503 Uncompress(NULL,NULL);
00504 #endif
00505
00506 return 0;
00507 }
00508 else
00509 {
00510 Error("No input file specified! Specify '-c' to use stdin/stdout");
00511 Exit();
00512 }
00513 }
00514
00515 }
00516 catch(XMillException *)
00517
00518 {
00519 return -1;
00520 }
00521
00522
00523 do
00524 {
00525 HandleFileArg(argv[fileidx]);
00526 if(skip_all_files)
00527 break;
00528 fileidx++;
00529 }
00530 while(fileidx<argc);
00531
00532
00533
00534
00535
00536 CleanupGlobalTokenTable();
00537
00538 // Jason Kim added for Binary format as input
00539
00540
00541
00542 return 0;
00543 }
00544
00545
00546
00547
00548
00549
00550
00551
00552 #ifdef XMILL
00553
00554 inline void StoreFileHeader(Compressor *compressor)
00555 {
00556 MemStreamer tmpoutputstream(1);
00557
00558 tmpoutputstream.StoreSInt32(
00559 (globalfullwhitespacescompress==WHITESPACE_IGNORE) ? 1 : 0,
00560 MAGIC_KEY);
00561
00562 pathexprman.Store(&tmpoutputstream);
00563
00564 compressor->CompressMemStream(&tmpoutputstream);
00565 }
00566
00567 inline void CompressBlockHeader(Compressor *compressor,unsigned long totaldatasize)
00568 {
00569 MemStreamer memstream;
00570
00571 memstream.StoreUInt32(totaldatasize);
00572
00573
00574
00575 compresscontman.StoreMainInfo(&memstream);
00576
00577 compressor->CompressMemStream(&memstream);
00578
00579
00580 globallabeldict.Store(compressor);
00581
00582 compressman.CompressSmallGlobalData(compressor);
00583
00584 compresscontman.CompressSmallContainers(compressor);
00585 }
00586
00587 char fileheader_iswritten=0;
00588
00589 inline void CompressCurrentBlock(Output *output,unsigned long totaldatasize)
00590 {
00591 {
00592 Compressor compressor(output);
00593 unsigned long headersize,headersize_compressed;
00594
00595 if(fileheader_iswritten==0)
00596 {
00597 StoreFileHeader(&compressor);
00598 fileheader_iswritten=1;
00599 }
00600 CompressBlockHeader(&compressor,totaldatasize);
00601 compressor.FinishCompress(&headersize,&headersize_compressed);
00602
00603 fileheadersize_orig +=headersize;
00604 fileheadersize_compressed +=headersize_compressed;
00605 }
00606
00607 compressman.CompressLargeGlobalData(output);
00608
00609
00610 compresscontman.CompressLargeContainers(output);
00611 }
00612
00613 void Compress(char *srcfile,char *destfile)
00614 {
00615 SAXClient saxclient;
00616 XMLParse xmlparse;
00617 Output output;
00618 #ifdef TIMING
00619 clock_t c1,c2,c3;
00620 #endif
00621 int parsetime=0,compresstime=0;
00622 char isend;
00623
00624 bool b_isend;
00625
00626 unsigned long totaldatasize;
00627
00628
00629
00630
00631 InitSpecialContainerSizeSum();
00632
00633 fileheader_iswritten=0;
00634
00635 if(AskOverwriteFile(destfile)==0)
00636 return;
00637
00638 if(xmlparse.OpenFile(srcfile)==0)
00639 {
00640 Error("Could not find file '");
00641 ErrorCont(srcfile);
00642 ErrorCont("'!");
00643 PrintErrorMsg();
00644 return;
00645 }
00646
00647 if(output.CreateFile((no_output==0) ? destfile : (char*)"")==0)
00648 {
00649 Error("Could not create output file '");
00650 ErrorCont(destfile);
00651 PrintErrorMsg();
00652 xmlparse.CloseFile();
00653 return;
00654 }
00655
00656 mainmem.StartNewMemBlock();
00657
00658 #ifdef USE_FORWARD_DATAGUIDE
00659 pathdict.Init();
00660 pathtree.CreateRootNode();
00661 #endif
00662
00663
00664 try{
00665 do
00666 {
00667 #ifndef USE_FORWARD_DATAGUIDE
00668 pathdict.Init();
00669 pathtree.CreateRootNode();
00670 #else
00671 pathdict.ResetContBlockPtrs();
00672 #endif
00673
00674 globalcontblock =compresscontman.CreateNewContainerBlock(3,0,NULL,NULL);
00675 globaltreecont =globalcontblock->GetContainer(0);
00676 globalwhitespacecont =globalcontblock->GetContainer(1);
00677 globalspecialcont =globalcontblock->GetContainer(2);
00678 #ifdef TIMING
00679 if(timing)
00680 c1=clock();
00681 #endif
00682
00683
00684
00685
00686
00687
00688 ITSAXContentHandler *pch = NULL;
00689 IParse2TSAX *pparseBin;
00690 CreateBin2TSAX(&IID_IParse2TSAX, (void**)&pparseBin);
00691 CreateTSAX2SAX(&IID_ITSAXContentHandler, (void**)&pch, &saxclient);
00692
00693
00694
00695 IFileStream *pfs = _CreateFileStream(srcfile);
00696
00697 b_isend = pparseBin->Parse(pfs, pch);
00698
00699 pfs->Release();
00700 pparseBin->Release();
00701 pch->Release();
00702
00703 if(b_isend)
00704 {
00705 isend=1;
00706 }
00707
00708 if (!b_isend)
00709 {
00710 isend=xmlparse.DoParsing(&saxclient);
00711
00712 if(isend)
00713 {
00714 isend=1;
00715 }
00716 }
00717
00718
00719
00720
00721
00722 #ifdef TIMING
00723 if(timing)
00724 c2=clock();
00725 #endif
00726
00727 compresscontman.FinishCompress();
00728
00729 totaldatasize= compresscontman.GetDataSize()+
00730 compressman.GetDataSize();
00731
00732 CompressCurrentBlock(&output,totaldatasize);
00733 #ifdef TIMING
00734 if(timing)
00735 {
00736 c3=clock();
00737 parsetime+=(c2-c1);
00738 compresstime+=(c3-c2);
00739 }
00740 #endif
00741 #ifdef PROFILE
00742 if(verbose)
00743 printf("Pathtree size: %lu\n",pathtreemem.GetSize());
00744 #endif
00745
00746 if(verbose)
00747 {
00748
00749 #ifdef PROFILE
00750 pathtree.PrintProfile();
00751 pathdict.PrintProfile();
00752 #endif
00753 }
00754
00755 #ifndef USE_FORWARD_DATAGUIDE
00756 pathtree.ReleaseMemory();
00757 #endif
00758 compresscontman.ReleaseMemory();
00759 blockmem.ReleaseMemory(1000);
00760 }
00761 while(isend==0);
00762 }
00763 catch(XMillException *)
00764 {
00765 output.CloseAndDeleteFile();
00766 xmlparse.CloseFile();
00767 Exit();
00768 }
00769 #ifdef TIMING
00770 if(timing)
00771 printf("%fs + %fs = %fs\n",(float)parsetime/(float)CLOCKS_PER_SEC,
00772 (float)compresstime/(float)CLOCKS_PER_SEC,
00773 (float)(parsetime+compresstime)/(float)CLOCKS_PER_SEC);
00774 #endif
00775 if(verbose)
00776 PrintSpecialContainerSizeSum();
00777
00778 #ifdef PROFILE
00779 if(verbose)
00780 curpath.PrintProfile();
00781 #endif
00782
00783 xmlparse.CloseFile();
00784 output.CloseFile();
00785
00786 globallabeldict.Reset();
00787
00788 mainmem.RemoveLastMemBlock();
00789
00790 if(delete_inputfiles)
00791 RemoveFile(srcfile);
00792 }
00793
00794 #endif //XMILL
00795
00796
00797
00798
00799
00800
00801
00802 #ifdef XDEMILL
00803
00804 void DecodeTreeBlock(char g_bBinary, UncompressContainer *treecont, UncompressContainer *whitespacecont, UncompressContainer *specialcont, ISAXClient *output);
00805
00806
00807
00808
00809 void UncompressFileHeader(SmallBlockUncompressor *uncompressor)
00810 {
00811 char iswhitespaceignore;
00812
00813 if(uncompressor->LoadSInt32(&iswhitespaceignore)!=MAGIC_KEY)
00814 {
00815 Error("The file is not a compressed XMill file!");
00816 Exit();
00817 }
00818
00819 if(iswhitespaceignore)
00820 {
00821 globalfullwhitespacescompress=WHITESPACE_IGNORE;
00822 if(output_initialized==0)
00823 output.Init(XMLINTENT_SPACES,0,1);
00824 }
00825 else
00826 {
00827 globalfullwhitespacescompress=WHITESPACE_STOREGLOBAL;
00828 if(output_initialized==0)
00829 output.Init(XMLINTENT_NONE,0,1);
00830 }
00831 pathexprman.Load(uncompressor);
00832 }
00833
00834 static char fileheader_isread=0;
00835
00836 char UncompressBlockHeader(Input *input)
00837 {
00838 SmallBlockUncompressor uncompressor(input);
00839
00840 if(fileheader_isread==0)
00841 {
00842 UncompressFileHeader(&uncompressor);
00843 fileheader_isread=1;
00844 }
00845 else
00846 {
00847 if(input->IsEndOfFile())
00848 return 1;
00849 }
00850
00851 memory_cutoff=uncompressor.LoadUInt32();
00852
00853 SetMemoryAllocationSize(memory_cutoff);
00854
00855 uncomprcont.Load(&uncompressor);
00856
00857 globallabeldict.Load(&uncompressor);
00858
00859 compressman.UncompressSmallGlobalData(&uncompressor);
00860
00861 uncomprcont.AllocateContMem();
00862
00863 uncomprcont.UncompressSmallContainers(&uncompressor);
00864
00865 return 0;
00866 }
00867
00868 #undef CreateFile
00869
00870 void Uncompress(char *sourcefile,char *destfile)
00871
00872 {
00873 Input input;
00874 #ifdef TIMING
00875 clock_t c1,c2,c3,ct1=0,ct2=0;
00876 #endif
00877
00878 UncompressContainer *uncomprtreecont;
00879 UncompressContainer *uncomprwhitespacecont;
00880 UncompressContainer *uncomprspecialcont;
00881
00882
00883 if(!g_bBinary)
00884 {
00885 if(AskOverwriteFile(destfile)==0)
00886 return;
00887 }
00888
00889 if(input.OpenFile(sourcefile)==0)
00890 {
00891 Error("Could not find file '");
00892 ErrorCont(sourcefile);
00893 PrintErrorMsg();
00894 return;
00895 }
00896
00897 globallabeldict.Init();
00898
00899
00900 if(!g_bBinary)
00901 {
00902 if(output.CreateFile((no_output==0) ? destfile : (char*)"")==0)
00903 {
00904 Error("Could not create output file '");
00905 ErrorCont(destfile);
00906 PrintErrorMsg();
00907 input.CloseFile();
00908 return;
00909 }
00910 }
00911
00912 #ifdef TIMING
00913 c1=clock();
00914 #endif
00915
00916 unsigned long blockidx=0;
00917
00918 mainmem.StartNewMemBlock();
00919
00920 while(UncompressBlockHeader(&input)==0)
00921 {
00922 compressman.UncompressLargeGlobalData(&input);
00923 uncomprcont.UncompressLargeContainers(&input);
00924
00925 uncomprcont.Init();
00926
00927 uncomprtreecont =uncomprcont.GetContBlock(0)->GetContainer(0);
00928 uncomprwhitespacecont=uncomprcont.GetContBlock(0)->GetContainer(1);
00929 uncomprspecialcont =uncomprcont.GetContBlock(0)->GetContainer(2);
00930 #ifdef TIMING
00931 c2=clock();
00932 #endif
00933
00934
00935
00936
00937
00938
00939 if (g_bBinary)
00940 {
00941
00942 DecodeTreeBlock(g_bBinary, uncomprtreecont,uncomprwhitespacecont,uncomprspecialcont, &boutput);
00943
00944 }
00945 else
00946 {
00947
00948 DecodeTreeBlock(g_bBinary, uncomprtreecont,uncomprwhitespacecont,uncomprspecialcont,&output);
00949 }
00950
00951
00952
00953
00954
00955
00956
00957 #ifdef TIMING
00958 c3=clock();
00959 #endif
00960
00961 #ifdef TIMING
00962 ct1+=c2-c1;
00963 ct2+=c3-c2;
00964 #endif
00965
00966 uncomprcont.FinishUncompress();
00967 uncomprcont.ReleaseContMem();
00968 compressman.FinishUncompress();
00969 blockmem.ReleaseMemory(1000);
00970 #ifdef TIMING
00971 c1=clock();
00972 #endif
00973 blockidx++;
00974 }
00975 #ifdef TIMING
00976 if(verbose)
00977 printf("%fs + %fs = %fs\n",(float)ct1/(float)CLOCKS_PER_SEC,
00978 (float)ct2/(float)CLOCKS_PER_SEC,
00979 (float)(ct1+ct2)/(float)CLOCKS_PER_SEC);
00980 #endif
00981 input.CloseFile();
00982 output.CloseFile();
00983
00984 if(delete_inputfiles)
00985 RemoveFile(sourcefile);
00986
00987 globallabeldict.Reset();
00988
00989 fileheader_isread=0;
00990
00991 mainmem.RemoveLastMemBlock();
00992 }
00993
00994 #endif // XDEMILL