Main Page   Namespace List   Class Hierarchy   Compound List   File List   Compound Members   File Members  

Main.cpp

Go to the documentation of this file.
00001 /*
00002 This product contains certain software code or other information
00003 ("AT&T Software") proprietary to AT&T Corp. ("AT&T").  The AT&T
00004 Software is provided to you "AS IS".  YOU ASSUME TOTAL RESPONSIBILITY
00005 AND RISK FOR USE OF THE AT&T SOFTWARE.  AT&T DOES NOT MAKE, AND
00006 EXPRESSLY DISCLAIMS, ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND
00007 WHATSOEVER, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
00008 MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, WARRANTIES OF
00009 TITLE OR NON-INFRINGEMENT OF ANY INTELLECTUAL PROPERTY RIGHTS, ANY
00010 WARRANTIES ARISING BY USAGE OF TRADE, COURSE OF DEALING OR COURSE OF
00011 PERFORMANCE, OR ANY WARRANTY THAT THE AT&T SOFTWARE IS "ERROR FREE" OR
00012 WILL MEET YOUR REQUIREMENTS.
00013 
00014 Unless you accept a license to use the AT&T Software, you shall not
00015 reverse compile, disassemble or otherwise reverse engineer this
00016 product to ascertain the source code for any AT&T Software.
00017 
00018 (c) AT&T Corp. All rights reserved.  AT&T is a registered trademark of AT&T Corp.
00019 
00020 ***********************************************************************
00021 
00022 History:
00023 
00024       11/30/99  - initial release by Hartmut Liefke, liefke@seas.upenn.edu
00025                                      Dan Suciu,      suciu@research.att.com
00026 
00027       10/10/01  - Added Binary Format input and output by Jason Kim, jasonkim@cs.washington.edu
00028 */
00029 
00030 //**************************************************************************
00031 //**************************************************************************
00032 
00033 #include <stdio.h>
00034 #include <time.h>
00035 #include <stdlib.h>
00036 #include <fcntl.h>
00037 
00038 #include <ctype.h>
00039 
00040 #ifdef WIN32
00041 #include <conio.h>
00042 #else
00043 #endif
00044 
00045 #include "CompressMan.hpp"
00046 #include "VPathExprMan.hpp"
00047 #include "Input.hpp"
00048 #include "CurPath.hpp"
00049 
00050 #ifdef XMILL
00051 #include "Output.hpp"
00052 #include "XMLParse.hpp"
00053 #include "SAXClient.hpp"
00054 #include "Compress.hpp"
00055 #include "ContMan.hpp"
00056 
00057 #include "xmltk.h"
00058 #include "xmltkobj.h"
00059 
00060 #endif
00061 
00062 #ifdef XDEMILL
00063 #include "LabelDict.hpp"
00064 #include "ISAXClient.hpp"
00065 #include "XMLOutput.hpp"
00066 #include "BXMLOutput.hpp"
00067 #include "SmallUncompress.hpp"
00068 #include "UnCompCont.hpp"
00069 
00070 #endif
00071 
00072 #define MAGIC_KEY 0x5e3d29e
00073    // The uncompressed first block of an XMill file
00074    // must start with these bytes
00075 
00076 CurPath           curpath;          // The current path in the XML document
00077 LabelDict         globallabeldict;  // The label dictionary
00078 
00079 VPathExprMan         pathexprman;   // The path manager
00080 
00081 #ifdef XMILL
00082 PathTree             pathtree;      // The path tree
00083 
00084 CompressContainerMan    compresscontman;  // The user compressor manager
00085 
00086 
00087 // We keep the pointers to the special containers
00088 
00089 CompressContainerBlock  *globalcontblock;
00090    // The first container block in the system contains
00091    // those three containers
00092 
00093 CompressContainer       *globalwhitespacecont;  // The special container for white spaces
00094 CompressContainer       *globalspecialcont;     // The special container for special sequences (DTDs, PIs...)
00095 CompressContainer       *globaltreecont;        // The structure container
00096 
00097 // We keep the accumulated (un)compressed size of the
00098 // header while compressing the blocks
00099 extern unsigned long fileheadersize_orig;
00100 extern unsigned long fileheadersize_compressed;
00101 #endif
00102 
00103 //**********************************
00104 
00105 #ifdef XDEMILL
00106 
00107 XMLOutput output;
00108 BXMLOutput boutput;
00109    // The output file
00110 
00111 extern UncompressContainerMan  uncomprcont;
00112    // The container manager for the decompressor
00113 
00114 // To read the structural information at the beginning
00115 // of each file and each run in the file, a input buffer
00116 // is kept that can change (increase) in size.
00117 // The input buffer here can be larger than the input buffer
00118 // in 'Input'. The buffer here can contain an entire block
00119 // that is decompressed
00120 unsigned char *memoryalloc_buf=NULL;
00121 unsigned char *memoryalloc_curptr=NULL;
00122 unsigned long memoryalloc_bufsize=0;
00123 
00124 extern char g_bBinary; // For binary format output
00125 
00126 #endif
00127 
00128 // Several flags (defined in Options.hpp)
00129 extern char usestdout;
00130 extern char no_output;
00131 
00132 #ifdef TIMING
00133 extern char timing;
00134 #endif
00135 
00136 extern char globalfullwhitespacescompress;
00137 extern char verbose;
00138 extern char output_initialized;
00139 extern char delete_inputfiles;
00140 extern unsigned long memory_cutoff;
00141 
00142 //**********************************
00143 
00144 // There are three separate memory spaces used:
00145 MemStreamer tmpmem(5);        // The temporary memory space
00146 MemStreamer mainmem(1000);    // The main memory space:
00147                               // Takes information about path expressions
00148 MemStreamer blockmem(1000);   // The block memory space:
00149                               // This space is deleted after each run
00150                               // It contains all structural information from the
00151                               // containers, the path dictionary, etc.
00152 
00153 // Several functions prototypes
00154 #ifdef XMILL
00155 void FSMInit();   // Initializes the FSM machinery
00156                   // It creates a '#' and '@#' label
00157 
00158 void InitSpecialContainerSizeSum(); // Resets the accumulate size for special containers
00159 void PrintSpecialContainerSizeSum();// Prints the accumulate size for special containers
00160 
00161 void Compress(char *srcfile,char *destfile);
00162 #endif
00163 
00164 #ifdef XDEMILL
00165 void Uncompress(char *sourcefile,char *destfile);
00166 #endif
00167 
00168 // Defined in Options.cpp
00169 void PrintUsage(char showmoreoptions);
00170 int HandleAllOptions(char **argv,int argc);
00171 
00172 //************************************************************************
00173 //************************************************************************
00174 
00175 extern char overwrite_files;  // Is 1, if the user wants to overwrite all files
00176 extern char skip_all_files;   // Is 1, if the user want to skip all remaining files
00177 
00178 char AskOverwriteFile(char *file)
00179    // Asks the user whether to overwrite
00180    // Returns 1, if yes,
00181    // Returns 0, if no,
00182    // Returns -1, if quit
00183 {
00184    int c;
00185 
00186    if((!FileExists(file))||(overwrite_files))
00187       // If the file doesn't exist or we automatically overwrite it,
00188       // then we can simply return true
00189       return 1;
00190 
00191    printf("Overwrite file %s ? (Y(es) | N(o) | A(ll) | Q(uit)) ",file);
00192 
00193    // We wait until the user presses 'y', 'n', 'q', or 'a'
00194    do
00195    {
00196 #ifdef WIN32
00197       c=_getch();
00198       printf("\n");
00199 #else
00200       c=getchar();
00201 #endif
00202       switch(toupper(c))
00203       {
00204       case 'Y':   return 1;
00205       case 'N':   return 0;
00206       case 'Q':   skip_all_files=1;return 0;
00207       case 'A':   overwrite_files=1;return 1;
00208       }
00209    }
00210    while(1);
00211 }
00212 
00213 void HandleSingleFile(char *file)
00214    // Considers a single file 'file' and (de)compresses it
00215    // Most importantly, the name of the destination file is
00216    // determines by modifying/adding/removing extensions '.xml', '.xmi', '.xm'
00217 {
00218    int len=strlen(file);
00219    char  *outfilename=file+len+5;
00220       // We use the space after the input file
00221       // and leave a little bit of space for possible extension '.xmi' or '.xm'
00222 
00223    strcpy(outfilename,file);
00224 
00225    try{
00226 
00227 #ifdef XMILL
00228    // For the compressor, we replace ending '.xml' with '.xmi'
00229    // Or, if there is no ending '.xml', we replace by '.xm'
00230 
00231    if((len>=4)&&(strcmp(file+len-4,".xml")==0))
00232       strcpy(outfilename+len-4,".xmi");
00233    else
00234       strcat(outfilename,".xm");
00235 
00236    Compress(file,usestdout ? NULL : outfilename);
00237 
00238 #ifdef PROFILE
00239    if(verbose)
00240       globallabeldict.PrintProfile();
00241 #endif
00242 
00243 #endif
00244 
00245 #ifdef XDEMILL
00246    // For decompression, we omit ending '.xm' or replace
00247    // ending '.xmi' with '.xml'
00248    if((len>=3)&&(strcmp(file+len-3,".xm")==0))
00249       // Do we have ending '.xm' ?
00250    {
00251       outfilename[len-3]=0;   // We eliminate the ending in the out file name
00252       Uncompress(file,usestdout ? NULL : outfilename);
00253    }
00254    else
00255    {
00256       // We replace '.xmi' by '.xml'
00257       if((len>=4)&&(strcmp(file+len-4,".xmi")==0))
00258       {
00259          strcpy(outfilename+len-4,".xml");
00260          Uncompress(file,usestdout ? NULL : outfilename);
00261       }
00262       else
00263       {
00264          // Otherwise, we assume the user specified the *uncompressed*
00265          // file and we try to either replace '.xml' by '.xmi'
00266          // or append '.xm'.
00267 
00268          if((len>=4)&&(strcmp(file+len-4,".xml")==0))
00269          {
00270             strcpy(file+len-4,".xmi");
00271             if(FileExists(file))
00272             {
00273                Uncompress(file,usestdout ? NULL : outfilename);
00274                return;
00275             }
00276             strcpy(file+len-4,".xml");
00277          }
00278 
00279          // Let's try to append '.xm'
00280          strcpy(file+len,".xm");
00281 
00282          if(FileExists(file)==0)
00283          {
00284             strcpy(file+len,"");
00285             Error("Could not find file '");
00286             ErrorCont(file);
00287             ErrorCont("' with extension '.xm'!");
00288             PrintErrorMsg();
00289             return;
00290          }
00291          Uncompress(file,usestdout ? NULL : outfilename);
00292          return;
00293       }
00294    }
00295 #endif
00296    }
00297    catch(XMillException *)
00298       // An error occurred
00299    {
00300       Error("Error in file '");
00301       ErrorCont(file);
00302       ErrorCont("':");
00303       PrintErrorMsg();
00304    }
00305 }
00306 
00307 void HandleFileArg(char *filepattern)
00308    // Takes a file name argument from the command line
00309    // and forward the file names to 'HandleSingleFile'
00310    // In Windows, file patterns with '*' and '?' must be explicitly
00311    // resolved.
00312 {
00313    char        fullpath[400];
00314 #ifdef WIN32
00315    _finddata_t finddata;
00316    long        handle;
00317    char        *ptr;
00318    int         fullpathlen;
00319 
00320    // Let's check if we have any meta characters '*' or '?' ?
00321    // We don't have them, we go directly to 'HandleSingleFile'
00322 
00323    ptr=filepattern;
00324    while(*ptr!=0)
00325    {
00326       if((*ptr=='*')||(*ptr=='?'))
00327          break;
00328       ptr++;
00329    }
00330 
00331    if(*ptr==0) // We didn't find any metacharacter?
00332                // The file name gets directly forwarded to HandleSingleFile
00333    {
00334       strcpy(fullpath,filepattern);
00335       HandleSingleFile(fullpath);
00336       return;
00337    }
00338    // Otherwise, we apply functions '_findfirst' and '_findnext'
00339 
00340    // We scan from the back of the file name and look
00341    // for a separator
00342    ptr=filepattern+strlen(filepattern)-1;
00343 
00344    while(ptr>=filepattern)
00345    {
00346       if((*ptr=='\\')||(*ptr=='/'))
00347          break;
00348       ptr--;
00349    }
00350 
00351    if(ptr<filepattern)   // We didn't find a separator ?
00352    {
00353       // The file path is empty
00354       *fullpath=0;
00355       fullpathlen=0;
00356    }
00357    else
00358    {
00359       // We the path part from the file pattern including
00360       // the separator that we found
00361       memcpy(fullpath,filepattern,ptr-filepattern+1);
00362       fullpath[ptr-filepattern+1]=0;
00363       fullpathlen=ptr-filepattern+1;
00364    }
00365 
00366    // Let's now look for the file
00367    handle=_findfirst(filepattern,&finddata);
00368    if(handle==-1)
00369    {
00370       printf("Could not find %s!\n",filepattern);
00371       return;
00372    }
00373 
00374    do
00375    {
00376       // We concatenate the file name to the path
00377       strcpy(fullpath+fullpathlen,finddata.name);
00378 
00379       HandleSingleFile(fullpath);
00380       if(skip_all_files)
00381          break;
00382 
00383       if(_findnext(handle,&finddata)!=0)
00384          break;
00385    }
00386    while(1);
00387 
00388    _findclose(handle);
00389 #else
00390 
00391    // In UNIX, the file name expansion is done by the shell
00392    // ==> We only need to look at the specific file
00393    strcpy(fullpath,filepattern);
00394    HandleSingleFile(fullpath);
00395 #endif
00396 }
00397 
00398 //************************************************************************
00399 //************************************************************************
00400 
00401 #ifdef WIN32
00402 
00403 int _cdecl main(int argc,char **argv)
00404 
00405 #else
00406 
00407 int main(int argc,char **argv)
00408 
00409 #endif
00410 {
00411    int fileidx;
00412 
00413    // We set the default file mode to 'binary'
00414 #ifdef WIN32
00415    _fmode=_O_BINARY;
00416 #endif
00417 
00418 
00419 ////////////////////////////////////////////////
00420 // Jason Kim added for Binary format as input
00421 ////////////////////////////////////////////////
00422    InitGlobalTokenTable();
00423 ////////////////////////////////////////////////
00424 // Jason Kim added for Binary format as input
00425 ////////////////////////////////////////////////
00426 
00427 
00428    if(argc==1) // No arguments for the program?
00429    {
00430       PrintUsage(0);
00431       return 0;
00432    }
00433 
00434 #ifdef XMILL
00435    else
00436    {
00437       if((argc==2)&&(strcmp(argv[1],"-h")==0))
00438          // Is there is exactly on argument '-h' ?
00439       {
00440          PrintUsage(1);
00441          return 0;
00442       }
00443    }
00444 #endif
00445 
00446    // Now we start the heavy work!
00447 
00448    try{
00449 
00450    globallabeldict.Init(); // Initialized the label dictionary
00451 
00452 #ifdef XMILL
00453    // Initializes the FSM structures.
00454    // It creates two labels '#' and '@#'
00455    FSMInit();
00456 
00457 #ifdef USE_FORWARD_DATAGUIDE
00458 extern void InitForwardDataGuide();
00459 
00460    InitForwardDataGuide();
00461 #endif
00462 
00463 #endif
00464 
00465    // Parse options
00466    fileidx=HandleAllOptions(argv+1,argc-1)+1;
00467 
00468 #ifdef XMILL
00469    // In the compressor, we append two default paths: '//#' and '/'
00470    // to take care of all paths
00471    char *pathptr="//#";
00472    pathexprman.AddNewVPathExpr(pathptr,pathptr+strlen(pathptr));
00473    pathptr="/";
00474    pathexprman.AddNewVPathExpr(pathptr,pathptr+strlen(pathptr));
00475 
00476    globallabeldict.FinishedPredefinedLabels();
00477       // We remember which labels are predefined (i.e. labels defined through FSMs)
00478       // All labels that are inserted later will be eliminated
00479       // between two parses of two input files.
00480 
00481    pathexprman.InitWhitespaceHandling();
00482       // If the default white space handling for the path expression
00483       // is the global setting, then we replace that reference
00484       // by the global default value.
00485       // This is done after all options are parsed,
00486       // since the global white space options could come *after*
00487       // the path expressions have been inserted.
00488 #endif
00489 
00490 
00491 
00492    // Are there no arguments except options?
00493    if(fileidx>=argc)
00494    {
00495       if(usestdout)  // Did the user specify '-c' for using 'stdout'?
00496       {
00497 
00498 #ifdef XMILL
00499          Compress(NULL,NULL);
00500 #endif
00501 
00502 #ifdef XDEMILL
00503          Uncompress(NULL,NULL);
00504 #endif
00505 
00506          return 0;
00507       }
00508       else
00509       {
00510          Error("No input file specified! Specify '-c' to use stdin/stdout");
00511          Exit();
00512       }
00513    }
00514 
00515    }
00516    catch(XMillException *)
00517       // An error occurred
00518    {
00519       return -1;
00520    }
00521 
00522    // Let's look at all files
00523    do
00524    {
00525       HandleFileArg(argv[fileidx]);
00526       if(skip_all_files)
00527          break;
00528       fileidx++;
00529    }
00530    while(fileidx<argc);
00531 
00532 
00533 ////////////////////////////////////////////////
00534 // Jason Kim added for Binary format as input
00535 ////////////////////////////////////////////////
00536    CleanupGlobalTokenTable();
00537 ////////////////////////////////////////////////
00538 // Jason Kim added for Binary format as input
00539 ////////////////////////////////////////////////
00540 
00541 
00542    return 0;
00543 }
00544 
00545 //************************************************************************
00546 //************************************************************************
00547 //************************************************************************
00548 //************************************************************************
00549 //************************************************************************
00550 //************************************************************************
00551 
00552 #ifdef XMILL
00553 
00554 inline void StoreFileHeader(Compressor *compressor)
00555 {
00556    MemStreamer tmpoutputstream(1);
00557 
00558    tmpoutputstream.StoreSInt32(
00559       (globalfullwhitespacescompress==WHITESPACE_IGNORE) ? 1 : 0,
00560       MAGIC_KEY);
00561 
00562    pathexprman.Store(&tmpoutputstream);
00563 
00564    compressor->CompressMemStream(&tmpoutputstream);
00565 }
00566 
00567 inline void CompressBlockHeader(Compressor *compressor,unsigned long totaldatasize)
00568 {
00569    MemStreamer memstream;
00570 
00571    memstream.StoreUInt32(totaldatasize);
00572 // First, we put info about path expressions, containers, and labels into
00573 // container 'tmpoutputstream'
00574 
00575    compresscontman.StoreMainInfo(&memstream);
00576 
00577    compressor->CompressMemStream(&memstream);
00578 
00579    // Let's store the new labels from the label dictionary
00580    globallabeldict.Store(compressor);
00581 
00582    compressman.CompressSmallGlobalData(compressor);
00583 
00584    compresscontman.CompressSmallContainers(compressor);
00585 }
00586 
00587 char fileheader_iswritten=0;
00588 
00589 inline void CompressCurrentBlock(Output *output,unsigned long totaldatasize)
00590 {
00591    {
00592       Compressor     compressor(output);
00593       unsigned long  headersize,headersize_compressed;
00594 
00595       if(fileheader_iswritten==0)
00596       {
00597          StoreFileHeader(&compressor);
00598          fileheader_iswritten=1;
00599       }
00600       CompressBlockHeader(&compressor,totaldatasize);
00601       compressor.FinishCompress(&headersize,&headersize_compressed);
00602 
00603       fileheadersize_orig        +=headersize;
00604       fileheadersize_compressed  +=headersize_compressed;
00605    }
00606 
00607    compressman.CompressLargeGlobalData(output);
00608 
00609    // Let's compress the actual containers
00610    compresscontman.CompressLargeContainers(output);
00611 }
00612 
00613 void Compress(char *srcfile,char *destfile)
00614 {
00615    SAXClient      saxclient;
00616    XMLParse       xmlparse;
00617    Output         output;
00618 #ifdef TIMING
00619    clock_t        c1,c2,c3;
00620 #endif
00621    int            parsetime=0,compresstime=0;
00622    char           isend;
00623 
00624    bool                   b_isend;
00625 
00626    unsigned long  totaldatasize;
00627 
00628    // We count the overal sizes of the compressed/uncompressed
00629    // structure, white space, and special (DTD...) containers
00630    // We initialize the counters here
00631    InitSpecialContainerSizeSum();
00632 
00633    fileheader_iswritten=0;
00634 
00635    if(AskOverwriteFile(destfile)==0)
00636       return;
00637 
00638    if(xmlparse.OpenFile(srcfile)==0)
00639    {
00640       Error("Could not find file '");
00641       ErrorCont(srcfile);
00642       ErrorCont("'!");
00643       PrintErrorMsg();
00644       return;
00645    }
00646 
00647    if(output.CreateFile((no_output==0) ? destfile : (char*)"")==0)
00648    {
00649       Error("Could not create output file '");
00650       ErrorCont(destfile);
00651       PrintErrorMsg();
00652       xmlparse.CloseFile();
00653       return;
00654    }
00655 
00656    mainmem.StartNewMemBlock();
00657 
00658 #ifdef USE_FORWARD_DATAGUIDE
00659    pathdict.Init();
00660    pathtree.CreateRootNode();
00661 #endif
00662 
00663 
00664    try{
00665       do
00666       {
00667 #ifndef USE_FORWARD_DATAGUIDE
00668          pathdict.Init();
00669          pathtree.CreateRootNode();
00670 #else
00671          pathdict.ResetContBlockPtrs();
00672 #endif
00673 
00674          globalcontblock      =compresscontman.CreateNewContainerBlock(3,0,NULL,NULL);
00675          globaltreecont       =globalcontblock->GetContainer(0);
00676          globalwhitespacecont =globalcontblock->GetContainer(1);
00677          globalspecialcont    =globalcontblock->GetContainer(2);
00678 #ifdef TIMING
00679          if(timing)
00680             c1=clock();
00681 #endif
00682 
00683 //////////////////////////////////////////////////////
00684 // Jason Kim added for Binary format as input : Start
00685 //////////////////////////////////////////////////////
00686 
00687                 // Use bin2tsax and tsax2sax obj
00688                 ITSAXContentHandler *pch = NULL;
00689                 IParse2TSAX *pparseBin;
00690                 CreateBin2TSAX(&IID_IParse2TSAX, (void**)&pparseBin);
00691                 CreateTSAX2SAX(&IID_ITSAXContentHandler, (void**)&pch, &saxclient);
00692 
00693                 // IFileStream *pfs = _CreateFileStream(stdin);
00694 
00695                 IFileStream *pfs = _CreateFileStream(srcfile);
00696 
00697                 b_isend = pparseBin->Parse(pfs, pch);
00698 
00699                 pfs->Release();
00700                 pparseBin->Release();
00701                 pch->Release();
00702 
00703                 if(b_isend)
00704                 {
00705                         isend=1;
00706                 }
00707 
00708                 if (!b_isend)
00709                 {
00710                         isend=xmlparse.DoParsing(&saxclient);
00711 
00712                 if(isend)
00713                 {
00714                 isend=1;
00715                         }
00716                 }
00717 
00718 ////////////////////////////////////////////////////
00719 // Jason Kim added for Binary format as input : End
00720 ////////////////////////////////////////////////////
00721 
00722 #ifdef TIMING
00723          if(timing)
00724             c2=clock();
00725 #endif
00726 
00727          compresscontman.FinishCompress();
00728 
00729          totaldatasize= compresscontman.GetDataSize()+
00730                         compressman.GetDataSize();
00731 
00732          CompressCurrentBlock(&output,totaldatasize);
00733 #ifdef TIMING
00734          if(timing)
00735          {
00736             c3=clock();
00737             parsetime+=(c2-c1);
00738             compresstime+=(c3-c2);
00739          }
00740 #endif
00741 #ifdef PROFILE
00742          if(verbose)
00743             printf("Pathtree size: %lu\n",pathtreemem.GetSize());
00744 #endif
00745 
00746          if(verbose)
00747          {
00748 
00749 #ifdef PROFILE
00750             pathtree.PrintProfile();
00751             pathdict.PrintProfile();
00752 #endif
00753          }
00754 
00755 #ifndef USE_FORWARD_DATAGUIDE
00756          pathtree.ReleaseMemory();
00757 #endif
00758          compresscontman.ReleaseMemory();
00759          blockmem.ReleaseMemory(1000);
00760       }
00761       while(isend==0);
00762    }
00763    catch(XMillException *)
00764    {
00765       output.CloseAndDeleteFile();
00766       xmlparse.CloseFile();
00767       Exit();
00768    }
00769 #ifdef TIMING
00770    if(timing)
00771       printf("%fs + %fs = %fs\n",(float)parsetime/(float)CLOCKS_PER_SEC,
00772                                  (float)compresstime/(float)CLOCKS_PER_SEC,
00773                                  (float)(parsetime+compresstime)/(float)CLOCKS_PER_SEC);
00774 #endif
00775    if(verbose)
00776       PrintSpecialContainerSizeSum();
00777 
00778 #ifdef PROFILE
00779    if(verbose)
00780       curpath.PrintProfile();
00781 #endif
00782 
00783    xmlparse.CloseFile();
00784    output.CloseFile();
00785 
00786    globallabeldict.Reset();
00787 
00788    mainmem.RemoveLastMemBlock();
00789 
00790    if(delete_inputfiles)
00791       RemoveFile(srcfile);
00792 }
00793 
00794 #endif //XMILL
00795 
00796 //********************************************************************************
00797 //********************************************************************************
00798 //********************************************************************************
00799 //********************************************************************************
00800 //********************************************************************************
00801 
00802 #ifdef XDEMILL
00803 
00804 void DecodeTreeBlock(char g_bBinary, UncompressContainer *treecont, UncompressContainer *whitespacecont, UncompressContainer *specialcont, ISAXClient *output);
00805 
00806 //****************************************************************************
00807 //****************************************************************************
00808 
00809 void UncompressFileHeader(SmallBlockUncompressor *uncompressor)
00810 {
00811    char iswhitespaceignore;
00812 
00813    if(uncompressor->LoadSInt32(&iswhitespaceignore)!=MAGIC_KEY)
00814    {
00815       Error("The file is not a compressed XMill file!");
00816       Exit();
00817    }
00818 
00819    if(iswhitespaceignore)
00820    {
00821       globalfullwhitespacescompress=WHITESPACE_IGNORE;
00822       if(output_initialized==0)
00823          output.Init(XMLINTENT_SPACES,0,1);
00824    }
00825    else
00826    {
00827       globalfullwhitespacescompress=WHITESPACE_STOREGLOBAL;
00828       if(output_initialized==0)
00829          output.Init(XMLINTENT_NONE,0,1);
00830    }
00831    pathexprman.Load(uncompressor);
00832 }
00833 
00834 static char fileheader_isread=0;
00835 
00836 char UncompressBlockHeader(Input *input)
00837 {
00838    SmallBlockUncompressor  uncompressor(input);
00839 
00840    if(fileheader_isread==0)
00841    {
00842       UncompressFileHeader(&uncompressor);
00843       fileheader_isread=1;
00844    }
00845    else
00846    {
00847       if(input->IsEndOfFile())
00848          return 1;
00849    }
00850 
00851    memory_cutoff=uncompressor.LoadUInt32();
00852 
00853    SetMemoryAllocationSize(memory_cutoff);
00854 
00855    uncomprcont.Load(&uncompressor);
00856 
00857    globallabeldict.Load(&uncompressor);
00858 
00859    compressman.UncompressSmallGlobalData(&uncompressor);
00860 
00861    uncomprcont.AllocateContMem();
00862 
00863    uncomprcont.UncompressSmallContainers(&uncompressor);
00864 
00865    return 0;
00866 }
00867 
00868 #undef CreateFile
00869 
00870 void Uncompress(char *sourcefile,char *destfile)
00871    // The main compres function
00872 {
00873    Input                input;
00874 #ifdef TIMING
00875    clock_t              c1,c2,c3,ct1=0,ct2=0;
00876 #endif
00877 
00878    UncompressContainer  *uncomprtreecont;
00879    UncompressContainer  *uncomprwhitespacecont;
00880    UncompressContainer  *uncomprspecialcont;
00881 
00882         // For Decompress with binary format, we don't ask overwritefile questions
00883         if(!g_bBinary)
00884         {
00885                 if(AskOverwriteFile(destfile)==0)
00886                         return;
00887         }
00888 
00889    if(input.OpenFile(sourcefile)==0)
00890    {
00891       Error("Could not find file '");
00892       ErrorCont(sourcefile);
00893       PrintErrorMsg();
00894       return;
00895    }
00896 
00897    globallabeldict.Init();
00898 
00899         // For Decompress with binary format, we don't create output file
00900         if(!g_bBinary)
00901         {
00902                 if(output.CreateFile((no_output==0) ? destfile : (char*)"")==0)
00903                 {
00904                         Error("Could not create output file '");
00905                         ErrorCont(destfile);
00906                         PrintErrorMsg();
00907                         input.CloseFile();
00908                         return;
00909                 }
00910         }
00911 
00912 #ifdef TIMING
00913    c1=clock();
00914 #endif
00915 
00916    unsigned long blockidx=0;
00917 
00918    mainmem.StartNewMemBlock();
00919 
00920    while(UncompressBlockHeader(&input)==0)
00921    {
00922       compressman.UncompressLargeGlobalData(&input);
00923       uncomprcont.UncompressLargeContainers(&input);
00924 
00925       uncomprcont.Init();
00926 
00927       uncomprtreecont      =uncomprcont.GetContBlock(0)->GetContainer(0);
00928       uncomprwhitespacecont=uncomprcont.GetContBlock(0)->GetContainer(1);
00929       uncomprspecialcont   =uncomprcont.GetContBlock(0)->GetContainer(2);
00930 #ifdef TIMING
00931       c2=clock();
00932 #endif
00933 
00934 
00935 /////////////////////////////////////////////////////////////////////////
00936 // Jason Kim added: Decompress file and create Binary Format file: Start
00937 /////////////////////////////////////////////////////////////////////////
00938 
00939 if (g_bBinary)
00940 {
00941         // Output is Binary Format File
00942         DecodeTreeBlock(g_bBinary, uncomprtreecont,uncomprwhitespacecont,uncomprspecialcont, &boutput);
00943 
00944 }
00945 else
00946 {
00947         // Output is Text-based XML Format File
00948         DecodeTreeBlock(g_bBinary, uncomprtreecont,uncomprwhitespacecont,uncomprspecialcont,&output);
00949 }
00950 
00951 ///////////////////////////////////////////////////////////////////////
00952 // Jason Kim added: Decompress file and create Binary Format file: End
00953 ///////////////////////////////////////////////////////////////////////
00954 
00955 
00956 
00957 #ifdef TIMING
00958       c3=clock();
00959 #endif
00960 
00961 #ifdef TIMING
00962       ct1+=c2-c1;
00963       ct2+=c3-c2;
00964 #endif
00965 
00966       uncomprcont.FinishUncompress();
00967       uncomprcont.ReleaseContMem();
00968       compressman.FinishUncompress();
00969       blockmem.ReleaseMemory(1000);
00970 #ifdef TIMING
00971       c1=clock();
00972 #endif
00973       blockidx++;
00974    }
00975 #ifdef TIMING
00976    if(verbose)
00977       printf("%fs + %fs = %fs\n",(float)ct1/(float)CLOCKS_PER_SEC,
00978                                  (float)ct2/(float)CLOCKS_PER_SEC,
00979                                  (float)(ct1+ct2)/(float)CLOCKS_PER_SEC);
00980 #endif
00981    input.CloseFile();
00982    output.CloseFile();
00983 
00984    if(delete_inputfiles)
00985       RemoveFile(sourcefile);
00986 
00987    globallabeldict.Reset();
00988 
00989    fileheader_isread=0;
00990 
00991    mainmem.RemoveLastMemBlock();
00992 }
00993 
00994 #endif // XDEMILL

Generated on Sat Dec 22 16:01:50 2001 for XMILLforBinaryFormat by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001