00001 /*
00002 This product contains certain software code or other information
00003 ("AT&T Software") proprietary to AT&T Corp. ("AT&T"). The AT&T
00004 Software is provided to you "AS IS". YOU ASSUME TOTAL RESPONSIBILITY
00005 AND RISK FOR USE OF THE AT&T SOFTWARE. AT&T DOES NOT MAKE, AND
00006 EXPRESSLY DISCLAIMS, ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND
00007 WHATSOEVER, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
00008 MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, WARRANTIES OF
00009 TITLE OR NON-INFRINGEMENT OF ANY INTELLECTUAL PROPERTY RIGHTS, ANY
00010 WARRANTIES ARISING BY USAGE OF TRADE, COURSE OF DEALING OR COURSE OF
00011 PERFORMANCE, OR ANY WARRANTY THAT THE AT&T SOFTWARE IS "ERROR FREE" OR
00012 WILL MEET YOUR REQUIREMENTS.
00013
00014 Unless you accept a license to use the AT&T Software, you shall not
00015 reverse compile, disassemble or otherwise reverse engineer this
00016 product to ascertain the source code for any AT&T Software.
00017
00018 (c) AT&T Corp. All rights reserved. AT&T is a registered trademark of AT&T Corp.
00019
00020 ***********************************************************************
00021
00022 History:
00023
00024 24/11/99 - initial release by Hartmut Liefke, liefke@seas.upenn.edu
00025 Dan Suciu, suciu@research.att.com
00026 */
00027
00028 //**************************************************************************
00029 //**************************************************************************
00030
00031 // This module implements the management of container path expressions
00032
00033 #ifndef REGEXPRMAN_HPP
00034 #define REGEXPRMAN_HPP
00035
00036 #include "FSM.hpp"
00037 #include "CompressMan.hpp"
00038
00039 class VPathExprMan;
00040 struct FSMManStateItem;
00041 class SmallBlockUncompressor;
00042
00043
00044 class VPathExpr
00045 // Represents a single container path expression
00046 {
00047 friend VPathExprMan;
00048
00049 VPathExpr *next; // The next container path expression in the path manager
00050
00051 #ifdef XMILL
00052
00053 // For forward dataguides, we also keep the forward FSM
00054 #ifdef USE_FORWARD_DATAGUIDE
00055 FSM *forwardfsm; // The forward FSM
00056 #endif
00057 FSM *reversefsm; // The reverse FSM
00058 #endif
00059
00060 // We also keep the original path expression string
00061 char *regexprstr,*regexprendptr, // The entire string
00062 *regexprusercompressptr; // A pointer to the user compressor string
00063
00064 unsigned long idx:20; // The index of this path expression
00065
00066 // Information about the compressor
00067
00068 unsigned long leftwhitespacescompress:2; // Describes how left white spaces should be compressed
00069 unsigned long rightwhitespacescompress:2;// Describes how right white spaces should be compressed
00070
00071 // unsigned long compresscontnum:10; // Number of containers that are needed by the compressor
00072 // unsigned long compressuserdatasize:10; // Number of containers that are needed by the compressor
00073
00074 #ifdef XMILL
00075 UserCompressor *usercompressor; // The user compressor
00076 #endif
00077 #ifdef XDEMILL
00078 UserUncompressor *useruncompressor; // The user decompressor
00079 #endif
00080
00081 void HandlePathExprOption(char * &str,char *endptr);
00082 // Parses the specific path expression option and constructs the user compressor object
00083
00084 void ParseUserCompressorString(char * &str,char *endptr);
00085 // Parses the user compressor string + options and constructs the user compressor object
00086
00087 void PathParseError(char *errmsg,char *errptr);
00088 // Outputs an path error message and exits
00089
00090 void CreateXPathEdge(char *from,char *to,FSM *fsm,FSMState *fromstate,FSMState *tostate,char ignore_pounds);
00091 // Reads the atomic symbol betwen 'from' and 'to' and generates
00092 // the corresponding edge between 'fromstate' and 'tostate' in 'fsm.
00093 // If ignore_pound is 1, then pound symbols are simply treated as '*' symbols.
00094
00095 void ParseXPathItem(char * &startptr,char *endptr,FSM *fsm,FSMState *fromstate,FSMState *tostate,char ignore_pounds);
00096 FSM *ParseXPath(char * &str,char *endptr,char ignore_pounds);
00097
00098 void InitWhitespaceHandling();
00099 // If the default white space handling for the path expression
00100 // is the global setting, then we replace that reference
00101 // by the global default value
00102 // This function is called after the parsing
00103
00104 public:
00105
00106 void *operator new(size_t size, MemStreamer *mem) { return mem->GetByteBlock(size); }
00107 void operator delete(void *ptr) {}
00108 #ifdef SPECIAL_DELETE
00109 void operator delete(void *ptr,MemStreamer *mem) {}
00110 #endif
00111
00112 VPathExpr()
00113 {
00114 #ifdef XMILL
00115
00116 #ifdef USE_FORWARD_DATAGUIDE
00117 forwardfsm=NULL;
00118 #endif
00119 reversefsm=NULL;
00120 next=NULL;
00121 #endif
00122 }
00123
00124 void CreateFromString(char * &str,char *endptr);
00125 // This function initializes the object with the path expression
00126 // found between 'str' and 'endptr.
00127 // It creates the forward and backward FSM and parses the
00128 // user compressor string
00129
00130 void PrintRegExpr(); // Prints the container path expression
00131
00132 unsigned long GetIdx() { return idx; }
00133 VPathExpr *GetNext() { return next; }
00134
00135 #ifdef XMILL
00136 FSMState *GetReverseFSMStartState() { return reversefsm->GetStartState();}
00137 #ifdef USE_FORWARD_DATAGUIDE
00138 FSMState *GetForwardFSMStartState() { return forwardfsm->GetStartState();}
00139 #endif
00140
00141 void Store(MemStreamer *output); // Stores the FSM
00142 // Stores the path expression in 'output'
00143 #endif
00144
00145 #ifdef XDEMILL
00146 void Load(SmallBlockUncompressor *uncompressor);
00147 // Loads the user compressor string from 'uncompress'
00148 // It parses the user compressor string and creates the corresponding user compressor
00149 #endif
00150
00151 #ifdef XMILL
00152 unsigned long GetUserContNum() { return usercompressor->GetUserContNum(); }
00153 unsigned long GetUserDataSize() { return usercompressor->GetUserDataSize(); }
00154 #endif
00155 #ifdef XDEMILL
00156 unsigned long GetUserContNum() { return useruncompressor->GetUserContNum(); }
00157 unsigned long GetUserDataSize() { return useruncompressor->GetUserDataSize(); }
00158 #endif
00159
00160 #ifdef XMILL
00161 void InitCompress(CompressContainer *cont,char *dataptr)
00162 {
00163 usercompressor->InitCompress(cont,dataptr);
00164 }
00165
00166 void FinishCompress(CompressContainer *cont,char *dataptr)
00167 {
00168 usercompressor->FinishCompress(cont,dataptr);
00169 }
00170
00171 char CompressTextItem(char *str,int len,PathDictNode *pathdictnode,int wsleftlen,int wsrightlen);
00172 // This is the main function for compressing a text item
00173 // The function returns 1 if the user compressor accepted the string and
00174 // thecompression was successful
00175 // Otherwise, the function returns 0
00176
00177 UserCompressor *GetUserCompressor()
00178 {
00179 return usercompressor;
00180 }
00181
00182 #endif
00183
00184 #ifdef XDEMILL
00185 UserUncompressor *GetUserUncompressor()
00186 {
00187 return useruncompressor;
00188 }
00189 #endif
00190 };
00191
00192
00193 //**********************************************************
00194 //**********************************************************
00195 //**********************************************************
00196
00197 struct PathTreeNode;
00198
00199 class VPathExprMan
00200 // The path expression manager
00201 {
00202 unsigned pathexprnum; // The number of paths
00203 VPathExpr *pathexprs; // The list of paths
00204 VPathExpr *lastpathexpr; // The pointer to the last path
00205
00206 public:
00207
00208 VPathExprMan()
00209 {
00210 pathexprnum=0;
00211 pathexprs=lastpathexpr=NULL;
00212 }
00213
00214 VPathExpr *GetPathExpr(unsigned long idx)
00215 // Returns the path expression with index 'idx'
00216 {
00217 VPathExpr *curpathexpr=pathexprs;
00218 while(idx--)
00219 curpathexpr=curpathexpr->next;
00220 return curpathexpr;
00221 }
00222 #ifdef XMILL
00223 void AddNewVPathExpr(char * &str,char *endptr);
00224 // Adds a new path expression to the set of paths
00225
00226 void Store(MemStreamer *memstream);
00227 // Stores all path expressions
00228 #endif
00229
00230 #ifdef XDEMILL
00231 void Load(SmallBlockUncompressor *uncompressor);
00232 // Load the set of path expressions from 'uncompressor'
00233 #endif
00234
00235 VPathExpr *GetVPathExprs() { return pathexprs; }
00236
00237 void InitWhitespaceHandling();
00238 // If the default white space handling for the path expression
00239 // is the global setting, then we replace that reference
00240 // by the global default value
00241 // This function is called after all path expressions
00242 // habe been inserted
00243 };
00244
00245 #ifdef XMILL
00246
00247 class PathDictNode;
00248
00249 extern MemStreamer *pathtreemem;
00250
00251 struct FSMManStateItem
00252 // This structure is used to represent a state within a set of states
00253 // for a specific path in the XML document
00254 {
00255 FSMManStateItem *next; // The state of the next FSM in the list
00256
00257 FSMState *curstate; // The current state
00258 VPathExpr *pathexpr; // The path expression that this state belongs to
00259 PathDictNode *pathdictnode; // The node in the path dictionary
00260 // representing the pounds that already occurred
00261 #ifndef USE_FORWARD_DATAGUIDE
00262 unsigned long overpoundedge:1; // State has been reached over a pound-edge?
00263 unsigned long poundcount:7; // How many pound-edges have been passed?
00264 #endif
00265
00266 public:
00267
00268 void *operator new(size_t size) { return pathtreemem->GetByteBlock(size); }
00269 void operator delete(void *ptr) {}
00270
00271 PathDictNode *GetPathDictNode()
00272 {
00273 return pathdictnode;
00274 }
00275 };
00276
00277 #endif
00278
00279 #endif
1.2.11.1 written by Dimitri van Heesch,
© 1997-2001