Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

VPathExprMan.hpp

Go to the documentation of this file.
00001 /*
00002 This product contains certain software code or other information
00003 ("AT&T Software") proprietary to AT&T Corp. ("AT&T").  The AT&T
00004 Software is provided to you "AS IS".  YOU ASSUME TOTAL RESPONSIBILITY
00005 AND RISK FOR USE OF THE AT&T SOFTWARE.  AT&T DOES NOT MAKE, AND
00006 EXPRESSLY DISCLAIMS, ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND
00007 WHATSOEVER, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
00008 MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, WARRANTIES OF
00009 TITLE OR NON-INFRINGEMENT OF ANY INTELLECTUAL PROPERTY RIGHTS, ANY
00010 WARRANTIES ARISING BY USAGE OF TRADE, COURSE OF DEALING OR COURSE OF
00011 PERFORMANCE, OR ANY WARRANTY THAT THE AT&T SOFTWARE IS "ERROR FREE" OR
00012 WILL MEET YOUR REQUIREMENTS.
00013 
00014 Unless you accept a license to use the AT&T Software, you shall not
00015 reverse compile, disassemble or otherwise reverse engineer this
00016 product to ascertain the source code for any AT&T Software.
00017 
00018 (c) AT&T Corp. All rights reserved.  AT&T is a registered trademark of AT&T Corp.
00019 
00020 ***********************************************************************
00021 
00022 History:
00023 
00024       24/11/99  - initial release by Hartmut Liefke, liefke@seas.upenn.edu
00025                                      Dan Suciu,      suciu@research.att.com
00026 */
00027 
00028 //**************************************************************************
00029 //**************************************************************************
00030 
00031 // This module implements the management of container path expressions
00032 
00033 #ifndef REGEXPRMAN_HPP
00034 #define REGEXPRMAN_HPP
00035 
00036 #include "FSM.hpp"
00037 #include "CompressMan.hpp"
00038 
00039 class VPathExprMan;
00040 struct FSMManStateItem;
00041 class SmallBlockUncompressor;
00042 
00043 
00044 class VPathExpr
00045    // Represents a single container path expression
00046 {
00047    friend VPathExprMan;
00048 
00049    VPathExpr      *next;   // The next container path expression in the path manager
00050 
00051 #ifdef XMILL
00052 
00053    // For forward dataguides, we also keep the forward FSM
00054 #ifdef USE_FORWARD_DATAGUIDE
00055    FSM            *forwardfsm;   // The forward FSM
00056 #endif
00057    FSM            *reversefsm;   // The reverse FSM
00058 #endif
00059 
00060    // We also keep the original path expression string
00061    char           *regexprstr,*regexprendptr,   // The entire string
00062                   *regexprusercompressptr;      // A pointer to the user compressor string
00063 
00064    unsigned long  idx:20;  // The index of this path expression
00065 
00066    // Information about the compressor
00067 
00068    unsigned long  leftwhitespacescompress:2; // Describes how left white spaces should be compressed
00069    unsigned long  rightwhitespacescompress:2;// Describes how right white spaces should be compressed
00070 
00071 //   unsigned long  compresscontnum:10;        // Number of containers that are needed by the compressor
00072 //   unsigned long  compressuserdatasize:10;   // Number of containers that are needed by the compressor
00073 
00074 #ifdef XMILL
00075    UserCompressor    *usercompressor;     // The user compressor
00076 #endif
00077 #ifdef XDEMILL
00078    UserUncompressor  *useruncompressor;   // The user decompressor
00079 #endif
00080 
00081    void HandlePathExprOption(char * &str,char *endptr);
00082       // Parses the specific path expression option and constructs the user compressor object
00083 
00084    void ParseUserCompressorString(char * &str,char *endptr);
00085       // Parses the user compressor string + options and constructs the user compressor object
00086 
00087    void PathParseError(char *errmsg,char *errptr);
00088       // Outputs an path error message and exits
00089 
00090    void CreateXPathEdge(char *from,char *to,FSM *fsm,FSMState *fromstate,FSMState *tostate,char ignore_pounds);
00091       // Reads the atomic symbol betwen 'from' and 'to' and generates
00092       // the corresponding edge between 'fromstate' and 'tostate' in 'fsm.
00093       // If ignore_pound is 1, then pound symbols are simply treated as '*' symbols.
00094       
00095    void ParseXPathItem(char * &startptr,char *endptr,FSM *fsm,FSMState *fromstate,FSMState *tostate,char ignore_pounds);
00096    FSM *ParseXPath(char * &str,char *endptr,char ignore_pounds);
00097 
00098    void InitWhitespaceHandling();
00099       // If the default white space handling for the path expression
00100       // is the global setting, then we replace that reference
00101       // by the global default value
00102       // This function is called after the parsing
00103 
00104 public:
00105 
00106    void *operator new(size_t size, MemStreamer *mem)  {  return mem->GetByteBlock(size); }
00107    void operator delete(void *ptr)  {}
00108 #ifdef SPECIAL_DELETE
00109    void operator delete(void *ptr,MemStreamer *mem)  {}
00110 #endif
00111 
00112    VPathExpr()
00113    {
00114 #ifdef XMILL
00115 
00116 #ifdef USE_FORWARD_DATAGUIDE
00117       forwardfsm=NULL;
00118 #endif
00119       reversefsm=NULL;
00120       next=NULL;
00121 #endif
00122    }
00123 
00124    void CreateFromString(char * &str,char *endptr);
00125       // This function initializes the object with the path expression
00126       // found between 'str' and 'endptr.
00127       // It creates the forward and backward FSM and parses the
00128       // user compressor string
00129 
00130    void PrintRegExpr(); // Prints the container path expression
00131 
00132    unsigned long GetIdx()  {  return idx; }
00133    VPathExpr *GetNext() {  return next;   }
00134 
00135 #ifdef XMILL
00136    FSMState *GetReverseFSMStartState() {  return reversefsm->GetStartState();}
00137 #ifdef USE_FORWARD_DATAGUIDE
00138    FSMState *GetForwardFSMStartState() {  return forwardfsm->GetStartState();}
00139 #endif
00140 
00141    void Store(MemStreamer *output);    // Stores the FSM
00142       // Stores the path expression in 'output'
00143 #endif
00144 
00145 #ifdef XDEMILL
00146    void Load(SmallBlockUncompressor *uncompressor);
00147       // Loads the user compressor string from 'uncompress'
00148       // It parses the user compressor string and creates the corresponding user compressor
00149 #endif
00150 
00151 #ifdef XMILL
00152    unsigned long GetUserContNum()   {  return usercompressor->GetUserContNum(); }
00153    unsigned long GetUserDataSize()  {  return usercompressor->GetUserDataSize();  }
00154 #endif
00155 #ifdef XDEMILL
00156    unsigned long GetUserContNum()   {  return useruncompressor->GetUserContNum(); }
00157    unsigned long GetUserDataSize()  {  return useruncompressor->GetUserDataSize();  }
00158 #endif
00159 
00160 #ifdef XMILL
00161    void InitCompress(CompressContainer *cont,char *dataptr)
00162    {
00163       usercompressor->InitCompress(cont,dataptr);
00164    }
00165 
00166    void FinishCompress(CompressContainer *cont,char *dataptr)
00167    {
00168       usercompressor->FinishCompress(cont,dataptr);
00169    }
00170 
00171    char CompressTextItem(char *str,int len,PathDictNode *pathdictnode,int wsleftlen,int wsrightlen);
00172       // This is the main function for compressing a text item
00173       // The function returns 1 if the user compressor accepted the string and
00174       // thecompression was successful
00175       // Otherwise, the function returns 0
00176 
00177    UserCompressor *GetUserCompressor()
00178    {
00179       return usercompressor;
00180    }
00181    
00182 #endif
00183 
00184 #ifdef XDEMILL
00185    UserUncompressor *GetUserUncompressor()
00186    {
00187       return useruncompressor;
00188    }
00189 #endif
00190 };
00191 
00192 
00193 //**********************************************************
00194 //**********************************************************
00195 //**********************************************************
00196 
00197 struct PathTreeNode;
00198 
00199 class VPathExprMan
00200    // The path expression manager
00201 {
00202    unsigned       pathexprnum;      // The number of paths
00203    VPathExpr      *pathexprs;       // The list of paths
00204    VPathExpr      *lastpathexpr;    // The pointer to the last path
00205 
00206 public:
00207 
00208    VPathExprMan()
00209    {
00210       pathexprnum=0;
00211       pathexprs=lastpathexpr=NULL;
00212    }
00213 
00214    VPathExpr *GetPathExpr(unsigned long idx)
00215       // Returns the path expression with index 'idx'
00216    {
00217       VPathExpr *curpathexpr=pathexprs;
00218       while(idx--)
00219          curpathexpr=curpathexpr->next;
00220       return curpathexpr;
00221    }
00222 #ifdef XMILL
00223    void AddNewVPathExpr(char * &str,char *endptr);
00224       // Adds a new path expression to the set of paths
00225 
00226    void Store(MemStreamer *memstream);
00227       // Stores all path expressions
00228 #endif
00229 
00230 #ifdef XDEMILL
00231    void Load(SmallBlockUncompressor *uncompressor);
00232       // Load the set of path expressions from 'uncompressor'
00233 #endif
00234 
00235    VPathExpr *GetVPathExprs() {  return pathexprs; }
00236 
00237    void InitWhitespaceHandling();
00238       // If the default white space handling for the path expression
00239       // is the global setting, then we replace that reference
00240       // by the global default value
00241       // This function is called after all path expressions
00242       // habe been inserted
00243 };
00244 
00245 #ifdef XMILL
00246 
00247 class PathDictNode;
00248 
00249 extern MemStreamer *pathtreemem;
00250 
00251 struct FSMManStateItem
00252    // This structure is used to represent a state within a set of states
00253    // for a specific path in the XML document
00254 {
00255    FSMManStateItem   *next;            // The state of the next FSM in the list
00256 
00257    FSMState          *curstate;        // The current state
00258    VPathExpr         *pathexpr;        // The path expression that this state belongs to
00259    PathDictNode      *pathdictnode;    // The node in the path dictionary
00260                                        // representing the pounds that already occurred
00261 #ifndef USE_FORWARD_DATAGUIDE
00262    unsigned long     overpoundedge:1;  // State has been reached over a pound-edge?
00263    unsigned long     poundcount:7;     // How many pound-edges have been passed?
00264 #endif
00265 
00266 public:
00267 
00268    void *operator new(size_t size)  {  return pathtreemem->GetByteBlock(size); }
00269    void operator delete(void *ptr)  {}
00270 
00271    PathDictNode *GetPathDictNode()
00272    {
00273       return pathdictnode;
00274    }
00275 };
00276 
00277 #endif
00278 
00279 #endif

Generated on Sat Oct 13 16:08:41 2001 for XMILL by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001