Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

RepeatCompress.cpp

Go to the documentation of this file.
00001 /*
00002 This product contains certain software code or other information
00003 ("AT&T Software") proprietary to AT&T Corp. ("AT&T").  The AT&T
00004 Software is provided to you "AS IS".  YOU ASSUME TOTAL RESPONSIBILITY
00005 AND RISK FOR USE OF THE AT&T SOFTWARE.  AT&T DOES NOT MAKE, AND
00006 EXPRESSLY DISCLAIMS, ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND
00007 WHATSOEVER, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
00008 MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, WARRANTIES OF
00009 TITLE OR NON-INFRINGEMENT OF ANY INTELLECTUAL PROPERTY RIGHTS, ANY
00010 WARRANTIES ARISING BY USAGE OF TRADE, COURSE OF DEALING OR COURSE OF
00011 PERFORMANCE, OR ANY WARRANTY THAT THE AT&T SOFTWARE IS "ERROR FREE" OR
00012 WILL MEET YOUR REQUIREMENTS.
00013 
00014 Unless you accept a license to use the AT&T Software, you shall not
00015 reverse compile, disassemble or otherwise reverse engineer this
00016 product to ascertain the source code for any AT&T Software.
00017 
00018 (c) AT&T Corp. All rights reserved.  AT&T is a registered trademark of AT&T Corp.
00019 
00020 ***********************************************************************
00021 
00022 History:
00023 
00024       24/11/99  - initial release by Hartmut Liefke, liefke@seas.upenn.edu
00025                                      Dan Suciu,      suciu@research.att.com
00026 */
00027 
00028 //***************************************************************************
00029 //***************************************************************************
00030 
00031 // This module contains the repeat compressor 'rep'
00032 
00033 // We implemented the 'separate' repeat compressor:
00034 // The count is stored in a different container separate from
00035 // the data stored by the subcompressor
00036 
00037 // Note that the subcompressor is *always* required to be a non-restricting -
00038 // i.e. the subcompressor must always accept the strings.
00039 
00040 #include "CompressMan.hpp"
00041 
00042 #ifdef XDEMILL
00043 #include "UnCompCont.hpp"
00044 #endif
00045 
00046 class RepeatSepCompressorFactory;
00047 
00048 // The structure for representing the the subcompressor. There is exactly
00049 // one repeat subcompressor and one optional tail compressor.
00050 struct RepeatSepCompressorInfo
00051 {
00052    union{
00053 #ifdef XMILL
00054       UserCompressor       *subcompressor;
00055 #endif
00056 #ifdef XDEMILL
00057       UserUncompressor     *subuncompressor;
00058 #endif
00059    };
00060    union{
00061 #ifdef XMILL
00062       UserCompressor       *subcompressor2;
00063 #endif
00064 #ifdef XDEMILL
00065       UserUncompressor     *subuncompressor2;
00066 #endif
00067    };
00068 
00069    // The delimiter
00070    char                 *delimiter;
00071    unsigned long        delimiterlen;
00072 
00073    void ScanParamString(char *paramstr,int len)
00074       // Parses a parameter string
00075    {
00076       char  *endptr=paramstr+len,*endstringptr;
00077 
00078       // Skip all white spaces
00079       while(paramstr<endptr)
00080       {
00081          if((*paramstr!=' ')&&(*paramstr!=',')&&(*paramstr!='\t')&&
00082             (*paramstr!='\r')&&(*paramstr!='\n'))
00083             break;
00084          paramstr++;
00085       }
00086 
00087       // Parse the delimiter string
00088       endstringptr=::ParseString(paramstr,endptr);
00089       if(endstringptr==NULL)
00090       {
00091          Error("First parameter in rep(");
00092          ErrorCont(paramstr,len);
00093          ErrorCont(") should be a string!");
00094          Exit();
00095       }
00096 
00097       // Store the delimiter string
00098       delimiter=paramstr+1;
00099       delimiterlen=endstringptr-delimiter;
00100 
00101       // Skip white spaces
00102       endstringptr++;
00103 
00104       while(endstringptr<endptr)
00105       {
00106          if((*endstringptr!=' ')&&(*endstringptr!=',')&&(*endstringptr!='\t')&&
00107             (*endstringptr!='\r')&&(*endstringptr!='\n'))
00108             break;
00109          endstringptr++;
00110       }
00111 
00112       // Next, there must be the compressor as a parameter
00113 #ifdef XMILL
00114       subcompressor2=NULL;
00115       subcompressor=compressman.CreateCompressorInstance(endstringptr,endptr);
00116 
00117       if(subcompressor->IsRejecting())
00118       {
00119          Error("Compressor '");
00120          ErrorCont(endstringptr,endptr-endstringptr);
00121          ErrorCont("' must always accept the string!\n");
00122          Exit();
00123       }
00124 #endif
00125 #ifdef XDEMILL
00126       subuncompressor2=NULL;
00127       subuncompressor=compressman.CreateUncompressorInstance(endstringptr,endptr);
00128 #endif
00129       // Skip white spaces
00130       while(endstringptr<endptr)
00131       {
00132          if((*endstringptr!=' ')&&(*endstringptr!=',')&&(*endstringptr!='\t')&&
00133             (*endstringptr!='\r')&&(*endstringptr!='\n'))
00134             break;
00135          endstringptr++;
00136       }
00137 
00138       // No additional tail compressor?
00139       if(*endstringptr==')')
00140          return;
00141 
00142       // Otherwise find the tail compressor
00143 #ifdef XMILL
00144       subcompressor2=compressman.CreateCompressorInstance(endstringptr,endptr);
00145 #endif
00146 #ifdef XDEMILL
00147       subuncompressor2=compressman.CreateUncompressorInstance(endstringptr,endptr);
00148 #endif
00149 
00150       // Skip white spaces
00151       while(endstringptr<endptr)
00152       {
00153          if((*endstringptr!=' ')&&(*endstringptr!=',')&&(*endstringptr!='\t')&&
00154             (*endstringptr!='\r')&&(*endstringptr!='\n'))
00155             break;
00156          endstringptr++;
00157       }
00158 
00159       // Last character must be ')'
00160       if(*endstringptr!=')')
00161       {
00162          Error("Missing closed parenthesis in '");
00163          ErrorCont(paramstr,len);
00164          Exit();
00165       }
00166    }
00167 };
00168 
00169 //********************************************************************************
00170 
00171 // The repeat compressor compressor
00172 
00173 #ifdef XMILL
00174 
00175 class RepeatSepCompressor : public UserCompressor
00176 {
00177    friend RepeatSepCompressorFactory;
00178 protected:
00179 
00180    RepeatSepCompressorInfo info;
00181    unsigned long           curcount;   // The number strings already parsed
00182 
00183 public:
00184    void ComputeProperties()
00185       // Determines the properties of the compressor
00186       // Also determines the number of containers and the size of the user data
00187    {
00188       isrejecting=0;canoverlap=1;
00189       isfixedlen=0;
00190       contnum=1+info.subcompressor->GetUserContNum();
00191       datasize=info.subcompressor->GetUserDataSize();
00192 
00193       // If there is a tail compressor, then the compressor can
00194       // be rejecting
00195       if(info.subcompressor2!=NULL)
00196       {
00197          isrejecting=info.subcompressor2->IsRejecting();
00198          contnum+=info.subcompressor2->GetUserContNum();
00199          datasize+=info.subcompressor2->GetUserDataSize();
00200       }
00201    }
00202 
00203    void InitCompress(CompressContainer *cont,char *dataptr)
00204       // Initializes the subcompressors
00205    {
00206       info.subcompressor->InitCompress(cont+1,dataptr);
00207 
00208       if(info.subcompressor2!=NULL)
00209          info.subcompressor2->InitCompress(
00210             cont+1+info.subcompressor->GetUserContNum(),
00211             dataptr+info.subcompressor->GetUserDataSize());
00212    }
00213 
00214    char ParseString(char *str,unsigned len,char *dataptr)
00215       // Parses the string and returns 1, if accepted, otherwise 0.
00216       // ParseString is *only* called if there is a tail compressor!
00217       // Only in this case, the compressor is rejecting.
00218 
00219       // This function does not actually store/compress the string
00220       // But it can keep an internal state - in the next step,
00221       // CompressString is called.
00222    {
00223       if(info.subcompressor2==NULL)
00224          return 1;
00225 
00226       char     *delimptr,*curptr,*endptr,*saveptr;
00227       unsigned i;
00228       endptr=str+len;
00229 
00230       do
00231       {
00232          curptr=str;
00233 
00234          // Let's look for the delimiter
00235          while(curptr<endptr-info.delimiterlen)
00236          {
00237             saveptr=curptr;
00238 
00239             // Check whether the string at 'curptr' is the delimiter string
00240             delimptr=info.delimiter;
00241             for(i=0;i<info.delimiterlen;i++)
00242             {
00243                if(*curptr!=*delimptr)
00244                   break;
00245                curptr++;
00246                delimptr++;
00247             }
00248             // We found the separator ?
00249             if(i==info.delimiterlen)
00250                break;
00251 
00252             // We didn't find the separator
00253             // Go to next position
00254             curptr=saveptr+1;
00255          }
00256 
00257          // There is not enough space for another delimiter?
00258          // ==> We use the 
00259          if(curptr>=endptr-info.delimiterlen)
00260             return info.subcompressor2->ParseString(str,endptr-str,
00261                      dataptr+info.subcompressor->GetUserDataSize());
00262 
00263          str=curptr;
00264       }
00265       while(1);
00266    }
00267 
00268    void CompressString(char *str,unsigned len,CompressContainer *cont,char *dataptr)
00269    {
00270       char  *delimptr,*curptr,*endptr;
00271       unsigned i;
00272 
00273       endptr=str+len;
00274 
00275       curcount=0;
00276 
00277       do
00278       {
00279          curptr=str;
00280 
00281          while(curptr<endptr-info.delimiterlen)
00282          {
00283             delimptr=info.delimiter;
00284             for(i=0;i<info.delimiterlen;i++)
00285             {
00286                if(*curptr!=*delimptr)
00287                   break;
00288                curptr++;
00289                delimptr++;
00290             }
00291             // We found the separator ?
00292             if(i==info.delimiterlen)
00293                break;
00294 
00295             curptr++;
00296          }
00297          curcount++;
00298 
00299          if(curptr>=endptr-info.delimiterlen)
00300          {
00301             curptr=endptr-info.delimiterlen;
00302             if(info.subcompressor2!=NULL)
00303                info.subcompressor2->CompressString(str,endptr-str,
00304                   cont+1+info.subcompressor->GetUserContNum(),
00305                   dataptr+info.subcompressor->GetUserDataSize());
00306             else
00307                info.subcompressor->CompressString(str,endptr-str,cont+1,dataptr);
00308          }
00309          else
00310             info.subcompressor->CompressString(str,curptr-str-info.delimiterlen,cont+1,dataptr);
00311 
00312          str=curptr;
00313       }
00314       while(curptr<endptr-info.delimiterlen);
00315 
00316       cont->StoreUInt32(curcount-1);   // 'curcount' is at least 1 !!
00317    }
00318 
00319    void FinishCompress(CompressContainer *cont,char *dataptr)
00320    {
00321       info.subcompressor->FinishCompress(cont+1,dataptr);
00322 
00323       if(info.subcompressor2!=NULL)
00324          info.subcompressor2->FinishCompress(
00325             cont+1+info.subcompressor->GetUserContNum(),
00326             dataptr+info.subcompressor->GetUserDataSize());
00327    }
00328 
00329    void PrintCompressInfo(char *dataptr,unsigned long *overalluncomprsize,unsigned long *overallcomprsize)
00330    {
00331       info.subcompressor->PrintCompressInfo(dataptr,overalluncomprsize,overallcomprsize);
00332 
00333       if(info.subcompressor2!=NULL)
00334          info.subcompressor2->PrintCompressInfo(
00335             dataptr+info.subcompressor->GetUserDataSize(),
00336             overalluncomprsize,overallcomprsize);
00337    }
00338 };
00339 #endif
00340 
00341 #ifdef XDEMILL
00342 
00343 class RepeatSepUncompressor : public UserUncompressor
00344 {
00345    friend RepeatSepCompressorFactory;
00346 
00347    RepeatSepCompressorInfo info;
00348 public:
00349 
00350    void ComputeProperties()
00351    {
00352       contnum=1+info.subuncompressor->GetUserContNum();
00353       datasize=info.subuncompressor->GetUserDataSize();
00354 
00355       if(info.subuncompressor2!=NULL)
00356       {
00357          contnum+=info.subuncompressor2->GetUserContNum();
00358          datasize+=info.subuncompressor2->GetUserDataSize();
00359       }
00360    }
00361 
00362    void InitUncompress(UncompressContainer *cont,char *dataptr)
00363    {
00364       info.subuncompressor->InitUncompress(cont,dataptr);
00365 
00366       if(info.subuncompressor2!=NULL)
00367          info.subuncompressor2->InitUncompress(
00368             cont+1+info.subuncompressor->GetUserContNum(),
00369             dataptr+info.subuncompressor->GetUserDataSize());
00370    }
00371 
00372    void UncompressItem(UncompressContainer *cont,char *dataptr,XMLOutput *output)
00373    {
00374       unsigned long count=cont->LoadUInt32();
00375 
00376       while(count>0)
00377       {
00378          info.subuncompressor->UncompressItem(cont+1,dataptr,output);
00379 
00380          output->characters(info.delimiter,info.delimiterlen);
00381          count--;
00382       }
00383 
00384       if(info.subuncompressor2!=NULL)
00385          info.subuncompressor2->UncompressItem(
00386             cont+1+info.subuncompressor->GetUserContNum(),
00387             dataptr+info.subuncompressor->GetUserDataSize(),output);
00388       else
00389          info.subuncompressor->UncompressItem(cont+1,dataptr,output);
00390    }
00391 };
00392 
00393 #endif
00394 
00395 class RepeatSepCompressorFactory : public UserCompressorFactory
00396 {
00397 public:
00398    char *GetName()         {  return "rep"; }
00399    char *GetDescription()  {  return "Compressor for substrings separated by some delimiter string"; }
00400 
00401 #ifdef XMILL
00402    UserCompressor *InstantiateCompressor(char *paramstr,int len)
00403    {
00404       if(paramstr==NULL)
00405       {
00406          Error("Division compressor 'seq' requires a sequence of strings and compressors as parameters");
00407          Exit();
00408       }
00409 
00410       RepeatSepCompressor  *repeatcompressor=new RepeatSepCompressor();
00411 
00412       repeatcompressor->info.ScanParamString(paramstr,len);
00413       repeatcompressor->ComputeProperties();
00414 
00415       return repeatcompressor;
00416    }
00417 #endif
00418 
00419 #ifdef XDEMILL
00420    UserUncompressor *InstantiateUncompressor(char *paramstr,int len)
00421    {
00422       RepeatSepUncompressor  *repeatuncompressor=new RepeatSepUncompressor();
00423 
00424       repeatuncompressor->info.ScanParamString(paramstr,len);
00425       repeatuncompressor->ComputeProperties();
00426 
00427       return repeatuncompressor;
00428    }
00429 #endif
00430 };
00431 
00432 RepeatSepCompressorFactory  repeatsepcompressfactory;

Generated on Sat Oct 13 16:08:40 2001 for XMILL by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001