00001 /* 00002 This product contains certain software code or other information 00003 ("AT&T Software") proprietary to AT&T Corp. ("AT&T"). The AT&T 00004 Software is provided to you "AS IS". YOU ASSUME TOTAL RESPONSIBILITY 00005 AND RISK FOR USE OF THE AT&T SOFTWARE. AT&T DOES NOT MAKE, AND 00006 EXPRESSLY DISCLAIMS, ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND 00007 WHATSOEVER, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 00008 MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, WARRANTIES OF 00009 TITLE OR NON-INFRINGEMENT OF ANY INTELLECTUAL PROPERTY RIGHTS, ANY 00010 WARRANTIES ARISING BY USAGE OF TRADE, COURSE OF DEALING OR COURSE OF 00011 PERFORMANCE, OR ANY WARRANTY THAT THE AT&T SOFTWARE IS "ERROR FREE" OR 00012 WILL MEET YOUR REQUIREMENTS. 00013 00014 Unless you accept a license to use the AT&T Software, you shall not 00015 reverse compile, disassemble or otherwise reverse engineer this 00016 product to ascertain the source code for any AT&T Software. 00017 00018 (c) AT&T Corp. All rights reserved. AT&T is a registered trademark of AT&T Corp. 00019 00020 *********************************************************************** 00021 00022 History: 00023 00024 24/11/99 - initial release by Hartmut Liefke, liefke@seas.upenn.edu 00025 Dan Suciu, suciu@research.att.com 00026 */ 00027 00028 //************************************************************************** 00029 //************************************************************************** 00030 00031 // This module contains the backward data guide 00032 // The backward data guide maps paths into path dictionary (see PathDict.hpp) 00033 // nodes. Each path in the backward data guide represents a 00034 // path in the XML document that is *backward* - i.e. from 00035 // the leaf (i.e. a leaf node with some string value) toward 00036 // the root of the XML document. 00037 00038 // The backward dataguide is implemented as a tree. 00039 00040 #ifndef PATHTREE_HPP 00041 #define PATHTREE_HPP 00042 00043 #include "LabelDict.hpp" 00044 #include "PathDict.hpp" 00045 #include "MemStreamer.hpp" 00046 #include "VPathExprMan.hpp" 00047 00048 extern MemStreamer *pathtreemem; 00049 // The memory for the dataguide nodes 00050 00051 extern VPathExprMan pathexprman; 00052 // The path expression manager 00053 00054 class UnpackContainer; 00055 00056 struct PathTreeNode 00057 // Represents a single dataguide node 00058 { 00059 PathTreeNode *parent; // The parent 00060 PathTreeNode *nextsamehash; // The next node with the same hash value 00061 00062 TLabelID labelid; // The label that leads to this node 00063 unsigned char isaccepting:1; // Describes whether each of the states in 00064 // fsmstatelist is accepting 00065 00066 FSMManStateItem *fsmstatelist; // This contains the states of the 00067 // automata 00068 #ifdef USE_FORWARD_DATAGUIDE 00069 void ComputePathDictNode(FSMManStateItem *stateitem); 00070 #endif 00071 00072 public: 00073 void *operator new(size_t size) { return pathtreemem->GetByteBlock(size); } 00074 void operator delete(void *ptr) {} 00075 00076 void PrintPath() 00077 { 00078 if(parent==NULL) 00079 { 00080 printf("<>"); 00081 return; 00082 } 00083 if(parent->parent!=NULL) 00084 { 00085 parent->PrintPath(); 00086 printf("."); 00087 } 00088 globallabeldict.PrintLabel(labelid); 00089 } 00090 00091 char IsAccepting() { return isaccepting; } 00092 FSMManStateItem *GetFSMStates() { return fsmstatelist; } 00093 00094 void ComputeInitStateList(unsigned char *isaccepting); 00095 // Computes the list of initial FSM states for this node 00096 // The state must be the root node! 00097 00098 void ComputeNextStateList(TLabelID labelid,unsigned char *isaccepting); 00099 // Computes the list of FSM states for this non-root node 00100 // The function looks up the states of the parent node and 00101 // computes the next states for each FSM that is reachable over 'labelid' 00102 }; 00103 00104 // The hash table size for the dataguide 00105 #define PATHTREE_HASHSIZE 65536 00106 #define PATHTREE_HASHMASK 65535 00107 00108 class PathTree 00109 { 00110 // The hash table 00111 PathTreeNode *pathtreehashtable[PATHTREE_HASHSIZE]; 00112 00113 PathTreeNode rootnode; // The root node 00114 00115 public: 00116 void CreateRootNode(); 00117 PathTreeNode *GetRootNode() { return &rootnode; } 00118 00119 PathTreeNode *ExtendCurPath(PathTreeNode *curnode,TLabelID labelid); 00120 // This important function extends the dataguide from the 00121 // current node 'curnode' by adding an edge with label 'labelid' 00122 // It also computes the corresponding FSM state annotations 00123 // If such an edge already exists, the function simply returns 00124 // the existing child node 00125 00126 void ReleaseMemory(); 00127 00128 #ifdef PROFILE 00129 unsigned long nodecount,lookupcount,hashitercount,fsmstatecount; 00130 00131 void PrintProfile() 00132 { 00133 printf("PathTree: nodecount=%lu FSMStatecount=%lu lookupcount=%lu hashitercount=%lu\n", 00134 nodecount,fsmstatecount,lookupcount,hashitercount); 00135 } 00136 #endif 00137 }; 00138 00139 // The global dataguide dictionary 00140 extern PathTree pathtree; 00141 00142 #endif