00001 /* 00002 This product contains certain software code or other information 00003 ("AT&T Software") proprietary to AT&T Corp. ("AT&T"). The AT&T 00004 Software is provided to you "AS IS". YOU ASSUME TOTAL RESPONSIBILITY 00005 AND RISK FOR USE OF THE AT&T SOFTWARE. AT&T DOES NOT MAKE, AND 00006 EXPRESSLY DISCLAIMS, ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND 00007 WHATSOEVER, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 00008 MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, WARRANTIES OF 00009 TITLE OR NON-INFRINGEMENT OF ANY INTELLECTUAL PROPERTY RIGHTS, ANY 00010 WARRANTIES ARISING BY USAGE OF TRADE, COURSE OF DEALING OR COURSE OF 00011 PERFORMANCE, OR ANY WARRANTY THAT THE AT&T SOFTWARE IS "ERROR FREE" OR 00012 WILL MEET YOUR REQUIREMENTS. 00013 00014 Unless you accept a license to use the AT&T Software, you shall not 00015 reverse compile, disassemble or otherwise reverse engineer this 00016 product to ascertain the source code for any AT&T Software. 00017 00018 (c) AT&T Corp. All rights reserved. AT&T is a registered trademark of AT&T Corp. 00019 00020 *********************************************************************** 00021 00022 History: 00023 00024 24/11/99 - initial release by Hartmut Liefke, liefke@seas.upenn.edu 00025 Dan Suciu, suciu@research.att.com 00026 */ 00027 00028 //************************************************************************** 00029 //************************************************************************** 00030 00031 // This module contains the abstract definition of user compressors 00032 00033 #ifndef USERCOMPRESS_HPP 00034 #define USERCOMPRESS_HPP 00035 00036 class CompressMan; 00037 00038 #include "MemStreamer.hpp" 00039 00040 #ifdef XMILL 00041 #include "ContMan.hpp" 00042 00043 class UserCompressor; 00044 class CompressContainer; 00045 #endif 00046 00047 00048 #ifdef XDEMILL 00049 #include "XMLOutput.hpp" 00050 00051 class UserUncompressor; 00052 class Input; 00053 class SmallBlockUncompressor; 00054 #endif 00055 00056 extern MemStreamer mainmem; 00057 00058 class UserCompressorFactory 00059 // Each user compressor is firstly represented by a UserCompressorFactory object 00060 // Typically, there is one global UserCompressorFactory object. 00061 // This object is then registered and provides basic capabilities 00062 // such as returning its name, instantiating compressor or decompressor 00063 { 00064 friend CompressMan; 00065 00066 UserCompressorFactory *next; // The next factory in the list of compressor factories 00067 00068 virtual char *GetName()=0; 00069 // Must return the (unique!) name of the compressor 00070 00071 virtual char *GetDescription()=0; 00072 // Should return a single-line (without '\n') description of the compressor 00073 00074 #ifdef XMILL 00075 virtual UserCompressor *InstantiateCompressor(char *paramstr,int len)=0; 00076 // Instantiates the Compressor for specific parameters 00077 #endif 00078 #ifdef XDEMILL 00079 virtual UserUncompressor *InstantiateUncompressor(char *paramstr,int len)=0; 00080 // Instantiates the Decompressor for specific parameters 00081 #endif 00082 00083 // CompressFactories are also allowed to store status information 00084 // in the compressed file! The following procedure as used for 00085 // compressing/decompressing this information 00086 // Small data (<2KBytes) is stored in the header, while 00087 // lare data is stored in separate zlib-blocks in the output file 00088 #ifdef XMILL 00089 virtual void CompressSmallGlobalData(Compressor *compressor) {} 00090 virtual void CompressLargeGlobalData(Output *output) {} 00091 00092 virtual unsigned long GetGlobalDataSize() { return 0;} 00093 // Determines how much memory the uncompressed status information 00094 // needs. This number is stored in the compressed file and 00095 // later used in the decompressor to allocate the right amount 00096 // of memory 00097 #endif 00098 // Similarly, the user compressor factory on the decompressor site 00099 // is able to decompress the data from the input file 00100 #ifdef XDEMILL 00101 virtual void UncompressSmallGlobalData(SmallBlockUncompressor *uncompressor) {} 00102 virtual void UncompressLargeGlobalData(Input *input) {} 00103 virtual void FinishUncompress() {} 00104 #endif 00105 public: 00106 UserCompressorFactory(); 00107 }; 00108 00109 //************************************************************************** 00110 //************************************************************************** 00111 00112 // THe definition of the user compressor class follows 00113 00114 #ifdef XMILL 00115 00116 class UserCompressor 00117 { 00118 friend CompressMan; 00119 00120 protected: 00121 unsigned short datasize; // The memory space needed for representing the state 00122 // of this compressor 00123 unsigned short contnum:13; // The number of containers requested by this compressor 00124 unsigned short isrejecting:1; // Determines whether the compressor can potentially reject strings 00125 unsigned short canoverlap:1; // Determines whether the containers used by the compressor can also be 00126 // used by other compressor. In other words, is it possible that other 00127 // is stored in the container between the compressed data of two strings 00128 // of this compressor. For example, the run-length encode 00129 // is not 'overlapping', while most other 00130 // compressors are overlapping 00131 unsigned short isfixedlen:1; // Does the compressor accept strings of fixed length? 00132 // Most compressors don't. The constant compressor does. 00133 00134 public: 00135 00136 void *operator new(size_t size) { return mainmem.GetByteBlock(size);} 00137 void operator delete(void *ptr) {} 00138 00139 // The following functions must be overloaded by the corresponding 00140 // user compressor 00141 00142 unsigned short GetUserContNum() { return contnum; } 00143 unsigned short GetUserDataSize() { return datasize; } 00144 unsigned char IsRejecting() { return (unsigned char)isrejecting; } 00145 unsigned char CanOverlap() { return (unsigned char)canoverlap; } 00146 unsigned char IsFixedLen() { return (unsigned char)isfixedlen; } 00147 00148 // The following function are for compression 00149 00150 virtual void InitCompress(CompressContainer *cont,char *dataptr) {} 00151 // Before we start any compression, we initialize the compressor. 00152 // 'dataptr' denotes the state 00153 00154 virtual char ParseString(char *str,unsigned len,char *dataptr) { return 1; } 00155 // Parses the string and returns 1, if accepted, otherwise 0. 00156 // This function does not actually store/compreess the string 00157 // But it can keep an internal state - in the next step, 00158 // CompressString is called. 00159 00160 virtual void CompressString(char *str,unsigned len,CompressContainer *cont,char *dataptr)=0; 00161 // Compresses the given input string 00162 // 'dataptr' denotes the state 00163 // If the compressor is 'rejecting', then the function can expect 00164 // that 'ParseString' has been called before. 00165 00166 virtual void FinishCompress(CompressContainer *cont,char *dataptr) {} 00167 // Finishes the compression - the compressor should write any 00168 // remaining data to the containers 00169 00170 virtual void PrintCompressInfo(char *dataptr,unsigned long *overalluncomprsize,unsigned long *overallcomprsize) {} 00171 // Prints statistical information about how well the compressor compressed 00172 // the data 00173 }; 00174 00175 #endif 00176 00177 //*************************************************************************** 00178 00179 #ifdef XDEMILL 00180 00181 class UncompressContainer; 00182 00183 class UserUncompressor 00184 { 00185 friend CompressMan; 00186 00187 protected: 00188 unsigned short datasize; // The size of the user compressor state space 00189 unsigned short contnum; // The number of containers for this user compressor 00190 00191 public: 00192 00193 void *operator new(size_t size) { return mainmem.GetByteBlock(size);} 00194 void operator delete(void *ptr) {} 00195 00196 unsigned short GetUserContNum() { return contnum; } 00197 unsigned short GetUserDataSize() { return datasize; } 00198 00199 // The following functions must be overloaded by the corresponding 00200 // user decompressor 00201 virtual void InitUncompress(UncompressContainer *cont,char *dataptr) {} 00202 // Initializes the decompressor 00203 00204 virtual void UncompressItem(UncompressContainer *cont,char *dataptr,XMLOutput *output)=0; 00205 // Does the actual decompression of a single text item 00206 // and prints the text to 'output' 00207 00208 virtual void FinishUncompress(UncompressContainer *cont,char *dataptr) {} 00209 // Finished the decompression 00210 }; 00211 00212 #endif 00213 00214 //************************************************************************* 00215 //************************************************************************* 00216 00217 // An auxiliary function to identify the end of a string 00218 inline char *ParseString(char *from,char *to) 00219 { 00220 if(*from!='"') 00221 return NULL; 00222 00223 from++; 00224 00225 while(from<to) 00226 { 00227 if(*from=='"') 00228 return from; 00229 from++; 00230 } 00231 00232 Error("String constant \"...\" expected instead of '"); 00233 ErrorCont(from,to-from); 00234 Exit(); 00235 return NULL; 00236 } 00237 00238 inline char *SkipWhiteSpaces(char *ptr,char *endptr) 00239 // Skips white spaces starting at *ptr and returns the 00240 // the pointer to the first non-white-space character. 00241 // The iteration stops if 'endptr' is reached. 00242 { 00243 while((ptr<endptr)&& 00244 ((*ptr==' ')||(*ptr==',')||(*ptr=='\t')||(*ptr=='\r')||(*ptr=='\n'))) 00245 00246 ptr++; 00247 00248 return ptr; 00249 } 00250 00251 #endif