Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

VPathExprMan.cpp

Go to the documentation of this file.
00001 /*
00002 This product contains certain software code or other information
00003 ("AT&T Software") proprietary to AT&T Corp. ("AT&T").  The AT&T
00004 Software is provided to you "AS IS".  YOU ASSUME TOTAL RESPONSIBILITY
00005 AND RISK FOR USE OF THE AT&T SOFTWARE.  AT&T DOES NOT MAKE, AND
00006 EXPRESSLY DISCLAIMS, ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND
00007 WHATSOEVER, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
00008 MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, WARRANTIES OF
00009 TITLE OR NON-INFRINGEMENT OF ANY INTELLECTUAL PROPERTY RIGHTS, ANY
00010 WARRANTIES ARISING BY USAGE OF TRADE, COURSE OF DEALING OR COURSE OF
00011 PERFORMANCE, OR ANY WARRANTY THAT THE AT&T SOFTWARE IS "ERROR FREE" OR
00012 WILL MEET YOUR REQUIREMENTS.
00013 
00014 Unless you accept a license to use the AT&T Software, you shall not
00015 reverse compile, disassemble or otherwise reverse engineer this
00016 product to ascertain the source code for any AT&T Software.
00017 
00018 (c) AT&T Corp. All rights reserved.  AT&T is a registered trademark of AT&T Corp.
00019 
00020 ***********************************************************************
00021 
00022 History:
00023 
00024       24/11/99  - initial release by Hartmut Liefke, liefke@seas.upenn.edu
00025                                      Dan Suciu,      suciu@research.att.com
00026 */
00027 
00028 //**************************************************************************
00029 //**************************************************************************
00030 
00031 // This module implements the management of container path expressions
00032 
00033 #include "VPathExprMan.hpp"
00034 
00035 #ifdef XMILL
00036 #ifdef FULL_PATHEXPR
00037 #include "VRegExpr.hpp"
00038 #endif
00039 #include "PathTree.hpp"
00040 #include "ContMan.hpp"
00041 #endif
00042 
00043 #ifdef XDEMILL
00044 #include "SmallUncompress.hpp"
00045 #endif
00046 
00047 #include "Load.hpp"
00048 
00049 // The memory used for allocating the path expression and FSM information
00050 extern MemStreamer mainmem;
00051 extern MemStreamer tmpmem;
00052 
00053 // The following flags determine how white spaces should be stored
00054 extern char globalleftwhitespacescompress;
00055 extern char globalrightwhitespacescompress;
00056 
00057 #ifdef XMILL
00058 extern UserCompressor   *plaincompressorptr; // The plain compressor: 't'
00059 
00060 extern CompressContainer *globalwhitespacecont;
00061    // The global white space container
00062 #endif
00063 
00064 inline void VPathExpr::PathParseError(char *errmsg,char *errptr)
00065    // Prints an error message, if the parsing of some path expression failed.
00066 {
00067    Error("Error while parsing path expression:\n\n   ");
00068    ErrorCont(regexprstr,regexprendptr-regexprstr);
00069    ErrorCont("\n");
00070    for(int i=0;i<errptr-regexprstr+3;i++)
00071       ErrorCont(" ",1);
00072    ErrorCont("^\n");
00073    ErrorCont(errmsg);
00074    Exit();
00075 }
00076 
00077 inline void VPathExpr::HandlePathExprOption(char * &str,char *endptr)
00078    // Parses one single option
00079    // Options are separated by ':'
00080 {
00081    // The user can specify options of the form l(i|g|t) or r(i|g|t) to
00082    // influence the handling of left/right white spaces directly for
00083    // a given path expression
00084    if(str+2<=endptr)
00085    {
00086       switch(*str)
00087       {
00088       case 'l':switch(str[1]) // Left white space option?
00089                {
00090                case 'i':      leftwhitespacescompress=WHITESPACE_IGNORE;str+=2;return;
00091                case 'g':      leftwhitespacescompress=WHITESPACE_STOREGLOBAL;str+=2;return;
00092                case 't':      leftwhitespacescompress=WHITESPACE_STORETEXT;str+=2;return;
00093                }
00094                break;
00095       case 'r':switch(str[1]) // Right white space option?
00096                {
00097                case 'i':      rightwhitespacescompress=WHITESPACE_IGNORE;str+=2;return;
00098                case 'g':      rightwhitespacescompress=WHITESPACE_STOREGLOBAL;str+=2;return;
00099                case 't':      rightwhitespacescompress=WHITESPACE_STORETEXT;str+=2;return;
00100                }
00101                break;
00102 /*
00103       case 'w':switch(str[1])
00104                {
00105                case 'i':      fullwhitespacescompress=WHITESPACE_IGNORE;str+=2;return;
00106                case 'g':      fullwhitespacescompress=WHITESPACE_STOREGLOBAL;str+=2;return;
00107                case 't':      fullwhitespacescompress=WHITESPACE_STORETEXT;str+=2;return;
00108                }
00109                break;
00110 */
00111       }
00112    }
00113 
00114    // Otherwise, it must be a compressor:
00115 
00116 #ifdef XMILL
00117    usercompressor=compressman.CreateCompressorInstance(str,endptr);
00118 #endif
00119 #ifdef XDEMILL
00120    useruncompressor=compressman.CreateUncompressorInstance(str,endptr);
00121 #endif
00122 }
00123 
00124 inline void VPathExpr::ParseUserCompressorString(char * &str,char *endptr)
00125    // Parses the user compressor string
00126    // It parses the options. Note that the actual user compressor *must*
00127    // come at the end
00128 {
00129    // We continue parsing and look for ':'
00130    while(str<endptr)
00131    {
00132       // We exit if we find a white-space
00133       if((*str==0)||(*str==' ')||(*str=='\t')||(*str=='\r')||(*str=='\n'))
00134       {
00135          regexprendptr=str;
00136          return;
00137       }
00138 
00139       // Let's handle the option or user compressor
00140       HandlePathExprOption(str,endptr);
00141 
00142       if(str==endptr)
00143       {
00144          regexprendptr=str;
00145          return;
00146       }
00147 
00148       // We exit, if we find a white-space
00149       if((*str==0)||(*str==' ')||(*str=='\t')||(*str=='\r')||(*str=='\n'))
00150       {
00151          regexprendptr=str;
00152          return;
00153       }
00154 
00155       if(*str!=':')
00156       {
00157          Error("Character ':' expected at '...'");
00158          if(endptr-str>5)
00159          {
00160             ErrorCont(str,5);
00161             ErrorCont("...'");
00162          }
00163          else
00164             ErrorCont(str,endptr-str);
00165          Exit();
00166       }
00167       str++;
00168 
00169    }
00170 }
00171 
00172 #ifdef XMILL
00173 
00174 inline void VPathExpr::CreateXPathEdge(char *from,char *to,FSM *fsm,FSMState *fromstate,FSMState *tostate,char ignore_pounds)
00175    // Reads the atomic symbol betwen 'from' and 'to' and generates
00176    // the corresponding edge between 'fromstate' and 'tostate' in 'fsm.
00177    // If ignore_pound is 1, then pound symbols are simply treated as '*' symbols.
00178 {
00179    if(from==to)   // Empty string?
00180       PathParseError("Unexpected character",from);
00181 
00182    switch(*from)
00183    {
00184    case '@':   // Do we have '@#' or '@name' ?
00185                // ==> Create a corresponding
00186       if((from+2==to)&&(from[1]=='#'))
00187          fsm->CreateLabelEdge(fromstate,tostate,attribpoundlabelid);
00188       else
00189          fsm->CreateLabelEdge(fromstate,tostate,globallabeldict.GetLabelOrAttrib(from+1,to-from-1,1));
00190       return;
00191 
00192    case '#':   // Do we have '#' or '##'
00193       if((from+2==to)&&(from[1]=='#'))
00194          // Do we have a double-pound '##' ?
00195       {
00196          FSMState *middlestate=fsm->CreateState();
00197          fsm->CreateEmptyEdge(fromstate,middlestate);
00198          fsm->CreateEmptyEdge(middlestate,tostate);
00199          if(ignore_pounds)
00200             fsm->CreateNegEdge(middlestate,middlestate);
00201          else
00202          {
00203             fsm->CreateLabelEdge(middlestate,middlestate,elementpoundlabelid);
00204             fsm->CreateLabelEdge(middlestate,middlestate,attribpoundlabelid);
00205          }
00206       }
00207       else  // we have '#'
00208       {
00209          if(from+1!=to)
00210             PathParseError("Symbol '/' or '|' expected after '#'",from+1);
00211 
00212          if(ignore_pounds)
00213             fsm->CreateNegEdge(fromstate,tostate);
00214          else
00215          {
00216             fsm->CreateLabelEdge(fromstate,tostate,elementpoundlabelid);
00217             fsm->CreateLabelEdge(fromstate,tostate,attribpoundlabelid);
00218          }
00219       }
00220       return;
00221 
00222    case '*':   // We have '*'
00223       if(from+1!=to)
00224          PathParseError("Symbol '/' or '|' expected after '*'",from+1);
00225   
00226       fsm->CreateNegEdge(fromstate,tostate);
00227       return;
00228 
00229    default:
00230          fsm->CreateLabelEdge(fromstate,tostate,globallabeldict.GetLabelOrAttrib(from,to-from,0));
00231    }
00232 }
00233 
00234 inline void VPathExpr::ParseXPathItem(char * &startptr,char *endptr,FSM *fsm,FSMState *fromstate,FSMState *tostate,char ignore_pounds)
00235    // Reads the path expression betwen 'from' and 'to' and generates
00236    // the corresponding edges and states between 'fromstate' and 'tostate' in 'fsm.
00237    // If ignore_pound is 1, then pound symbols are simply treated as '*' symbols.
00238 {
00239    FSMState *curfromstate=fromstate;
00240    FSMState *curtostate=fsm->CreateState();
00241    char     *to;
00242 
00243    do
00244    {
00245       if(*startptr=='(')
00246          // We try to consume a label or an expression enclosed in  '(...)'
00247       {
00248          startptr++;
00249          ParseXPathItem(startptr,endptr,fsm,curfromstate,curtostate,ignore_pounds);
00250 
00251          // Afterwards, there must be a symbol ')'
00252          if((startptr==endptr)||(*startptr!=')'))
00253             PathParseError("Missing closed parenthesis ')'",startptr);
00254          startptr++;
00255       }
00256       else
00257       {
00258          // First, we find the end of the label
00259          to=startptr;
00260          while((to<endptr)&&(*to!='/')&&(*to!='=')&&(*to!='|')&&(*to!=')'))
00261             to++;
00262 
00263          // We create the actual edge
00264          CreateXPathEdge(startptr,to,fsm,curfromstate,curtostate,ignore_pounds);
00265 
00266          startptr=to;
00267       }
00268 
00269       if(startptr==endptr) // The path expression is finished after the label
00270          break;
00271 
00272       // We look at the character coming after the label
00273       switch(*startptr)
00274       {
00275       case '/':   // We have a separator
00276          startptr++;
00277 
00278          if((startptr<endptr)&&(*startptr=='/'))   // Do we have another '/' following ?
00279                                                    // i.e. we have '//'
00280          {
00281             // Let's create a middle state with a self-loop
00282             // and an empty edge from 'curfromstate' to 'middlestate'
00283             // and an empty edge from 'middlestate' to 'curtostate'
00284             FSMState *middlestate=fsm->CreateState();
00285             fsm->CreateEmptyEdge(curtostate,middlestate);
00286             fsm->CreateNegEdge(middlestate,middlestate,NULL);
00287 
00288             curtostate=fsm->CreateState();
00289             fsm->CreateEmptyEdge(middlestate,curtostate);
00290             startptr++;
00291          }
00292          if((startptr==endptr)||(*startptr=='=')||(*startptr==')'))
00293             // Is '/' the last character in the expression ? ==> We are done
00294          {
00295             fsm->CreateEmptyEdge(curtostate,tostate);
00296             return;
00297          }
00298 
00299          curfromstate=curtostate;
00300          curtostate=fsm->CreateState();
00301          break;
00302 
00303       case '|':   
00304          startptr++;
00305          break;
00306 
00307       case ')':
00308       case '=':
00309          fsm->CreateEmptyEdge(curtostate,tostate);
00310          return;
00311 
00312       default:
00313          PathParseError("Invalid symbol",startptr);
00314       }
00315    }
00316    while(1);
00317 
00318    fsm->CreateEmptyEdge(curtostate,tostate);
00319 }
00320 
00321 inline FSM *VPathExpr::ParseXPath(char * &str,char *endptr,char ignore_pounds)
00322    // Generates the actual FSM for a given string
00323    // If ignore_pound is 1, then pound symbols are simply treated as '*' symbols.
00324 {
00325    FSM *fsm=new(fsmmem) FSM();
00326    FSMState *startstate=fsm->CreateState();
00327 
00328    fsm->SetStartState(startstate);
00329 
00330    str++;   // We skip the starting '/'
00331 
00332    if(str==endptr)   // We have the single XPath expression '/'
00333    {
00334       startstate->SetFinal();
00335       return fsm;
00336    }
00337 
00338    if(str[0]=='/')   // Do we have '//...'
00339    {
00340       // We loop in start state
00341       fsm->CreateNegEdge(startstate,startstate,NULL);
00342 
00343       if(str+1==endptr)
00344          // We have path expression '//'
00345       {
00346          str++;
00347          startstate->SetFinal();
00348          return fsm;
00349       }
00350       FSMState *middlestate=fsm->CreateState();
00351       fsm->CreateEmptyEdge(startstate,middlestate);
00352       startstate=middlestate;
00353       str++;
00354    }
00355 
00356    // Let's create the final state
00357    FSMState *finalstate=fsm->CreateState();
00358    finalstate->SetFinal();
00359 
00360    // We can now parse the path expression and create states/edges between
00361    // 'startstate' and 'finalstate'
00362    // If there '=', then we simply have '//=>...' or '/=>...'
00363    if(*str!='=')
00364       ParseXPathItem(str,endptr,fsm,startstate,finalstate,ignore_pounds);
00365    else
00366       fsm->CreateEmptyEdge(startstate,finalstate);
00367 
00368    // If we are not at the end, then we must have '=>' at the end
00369    if(str<endptr)
00370    {
00371       if((str+1==endptr)||(*str!='=')||(str[1]!='>'))
00372          PathParseError("Unexpected character",str);
00373 
00374       str+=2;  // We move the 'str'-pointer to the string coming
00375                // after '=>'
00376    }
00377    return fsm;
00378 }
00379 
00380 void VPathExpr::CreateFromString(char * &str,char *endptr)
00381    // This function initializes the object with the path expression
00382    // found between 'str' and 'endptr.
00383    // It creates the forward and backward FSM and parses the
00384    // user compressor string
00385 {
00386 #ifdef FULL_PATHEXPR
00387    VRegExpr       *regexpr;
00388 #endif
00389    FSM            *tmpforwardfsm;
00390    char           *savestr=str;
00391 
00392    regexprstr=str;
00393    regexprendptr=endptr;   // The end ptr will be set later
00394 
00395    // We start a new block of temporary data
00396    tmpmem.StartNewMemBlock();
00397 
00398    // The forward FSM is only generated in temporary memory
00399    fsmmem=&tmpmem;
00400    fsmtmpmem=&tmpmem;
00401 
00402    // For now, it is required that paths start with '/'
00403    if(*str=='/')
00404       tmpforwardfsm=ParseXPath(str,endptr,0);
00405    else
00406    {
00407 #ifdef FULL_PATHEXPR
00408       // Let's firstly take care of the main expression
00409 
00410       vregexprmem=&tmpmem;
00411       regexpr=VRegExpr::ParseVRegExpr(str,endptr);
00412 
00413       // Let's convert the regular expression into an automaton
00414       // Let's create an FSM
00415       tmpforwardfsm=regexpr->CreateNonDetFSM();
00416 #else
00417       PathParseError("Character '/' expected",str);
00418 #endif
00419    }
00420 
00421    // Let's make the FSM deterministic
00422    tmpforwardfsm=tmpforwardfsm->MakeDeterministic();
00423 
00424    // Let's minimize
00425    tmpforwardfsm=tmpforwardfsm->Minimize();
00426 
00427    // We compute which states are accepting
00428    //tmpforwardfsm->FindAcceptingStates();
00429 
00430    // We store the following automata in the temporary memory
00431    fsmmem=&tmpmem;
00432 
00433    // Now we reverse the FSM
00434    reversefsm=tmpforwardfsm->CreateReverseFSM();
00435 
00436    reversefsm=reversefsm->MakeDeterministic();
00437 
00438    // We store the following automaton in the main memory
00439    fsmmem=&mainmem;
00440    mainmem.WordAlign();
00441 
00442    // Only now we create the FSM in main memory
00443    reversefsm=reversefsm->Minimize();
00444 
00445    // We compute which states are accepting
00446    reversefsm->FindAcceptingStates();
00447 
00448    // For each state, we also determine whether there
00449    // are pounds coming afterwards
00450    reversefsm->ComputeStatesHasPoundsAhead();
00451 
00452 #ifdef USE_FORWARD_DATAGUIDE
00453    if(*savestr=='/')
00454       forwardfsm=ParseXPath(savestr,endptr,1);
00455    else
00456    {
00457       Error("Fatal Error in VPathExpr::CreateFromString\n");
00458       Exit();
00459    }
00460 
00461    // Let's make the FSM deterministic
00462    
00463    forwardfsm=forwardfsm->MakeDeterministic();
00464 
00465    fsmmem=&mainmem;
00466    mainmem.WordAlign();
00467 
00468    // Let's minimize
00469    forwardfsm=forwardfsm->Minimize();
00470 
00471 #endif
00472 
00473    // We remove all the temporary data
00474    tmpmem.RemoveLastMemBlock();
00475 
00476 //*************************************************************************
00477 
00478    // We use the global setting as default for the white space handling
00479    // for the specific path expression. This might be overwritten by
00480    // function 'ParseUserCompressorString' below, which parses the user compressor
00481    // string
00482    leftwhitespacescompress=WHITESPACE_DEFAULT;
00483    rightwhitespacescompress=WHITESPACE_DEFAULT;
00484 
00485    // As the default compressor, we set the plain text compressor
00486    char *textstring="t";
00487    usercompressor=compressman.CreateCompressorInstance(textstring,textstring+1);
00488 
00489    regexprusercompressptr=str;
00490 
00491    // Let's parse the compressor string now
00492    ParseUserCompressorString(str,endptr);
00493 
00494    regexprendptr=str;
00495 }
00496 
00497 void VPathExpr::InitWhitespaceHandling()
00498    // If the default white space handling for the path expression
00499    // is the global setting, then we replace that reference
00500    // by the global default value
00501 {
00502    if(leftwhitespacescompress==WHITESPACE_DEFAULT)
00503       leftwhitespacescompress=globalleftwhitespacescompress;
00504 
00505    if(rightwhitespacescompress==WHITESPACE_DEFAULT)
00506       rightwhitespacescompress=globalrightwhitespacescompress;
00507 }
00508 
00509 void VPathExprMan::InitWhitespaceHandling()
00510    // If the default white space handling for some path expressions
00511    // is the global setting, then we replace that reference
00512    // by the global default value
00513 {
00514    VPathExpr *pathexpr=pathexprs;
00515    while(pathexpr!=NULL)
00516    {
00517       pathexpr->InitWhitespaceHandling();
00518       pathexpr=pathexpr->next;
00519    }
00520 }
00521 
00522 #endif
00523 
00524 void VPathExpr::PrintRegExpr()
00525    // Outputs the path expression string
00526 {
00527    fwrite(regexprstr,regexprendptr-regexprstr,1,stdout);
00528 }
00529 
00530 //*******************************************************************
00531 //*******************************************************************
00532 //*******************************************************************
00533 //*******************************************************************
00534 
00535 #ifdef XMILL
00536 void VPathExprMan::AddNewVPathExpr(char * &str,char *endptr)
00537    // Adds a new path expression to the set of paths
00538 {
00539    // Create the path expression
00540    VPathExpr *item=new(&mainmem) VPathExpr();
00541 
00542    item->idx=pathexprnum+1;
00543    pathexprnum++;
00544 
00545    // Parse the path expression string
00546    item->CreateFromString(str,endptr);
00547 
00548    // Add the path expression to the list
00549    if(pathexprs==NULL)
00550       pathexprs=lastpathexpr=item;
00551    else
00552    {
00553       lastpathexpr->next=item;
00554       lastpathexpr=item;
00555    }
00556 }
00557 #endif
00558 
00559 //***************************************************************************************
00560 //***************************************************************************************
00561 //***************************************************************************************
00562 
00563 #ifdef XMILL
00564 inline void VPathExpr::Store(MemStreamer *output)
00565    // Stores the path expression in 'output'
00566 {
00567    // We only store the user compressor expression
00568    // This can even be the empty string, if there is no user compressor string
00569    output->StoreUInt32(regexprendptr-regexprusercompressptr);
00570    output->StoreData(regexprusercompressptr,regexprendptr-regexprusercompressptr);
00571 }
00572 #endif
00573 
00574 #ifdef XDEMILL
00575 void VPathExpr::Load(SmallBlockUncompressor *uncompress)
00576    // Loads the user compressor string from 'uncompress'
00577    // It parses the user compressor string and creates the corresponding user compressor
00578 {
00579    char           *ptr;
00580    unsigned long  len=uncompress->LoadString(&ptr);
00581 
00582    // We allocate some memory for the user compressor string
00583    regexprusercompressptr=mainmem.GetByteBlock(len);
00584    mainmem.WordAlign();
00585 
00586    memcpy(regexprusercompressptr,ptr,len);
00587 
00588    regexprendptr=regexprusercompressptr+len;
00589    regexprstr=NULL;
00590 
00591    char *str=regexprusercompressptr;
00592 
00593    // The default user compressor
00594    char *textstring="t";
00595    useruncompressor=compressman.CreateUncompressorInstance(textstring,textstring+1);
00596 
00597    // Let's parse the user compressor
00598    ParseUserCompressorString(str,regexprendptr);
00599 }
00600 #endif
00601 
00602 //************************************************************************
00603 //************************************************************************
00604 
00605 #ifdef XMILL
00606 void VPathExprMan::Store(MemStreamer *memstream)
00607    // Stores all path expressions
00608 {
00609    // Store the number of path expressions
00610    memstream->StoreUInt32(pathexprnum);
00611 
00612    VPathExpr   *curpathexpr=pathexprs;
00613 
00614    // We store all paths
00615    while(curpathexpr!=NULL)
00616    {
00617       curpathexpr->Store(memstream);
00618       curpathexpr=curpathexpr->next;
00619    }
00620 }
00621 #endif
00622 
00623 #ifdef XDEMILL
00624 void VPathExprMan::Load(SmallBlockUncompressor *uncompress)
00625    // Load the set of path expressions from 'uncompress'
00626 {
00627    // Load the number
00628    pathexprnum=uncompress->LoadUInt32();
00629 
00630    VPathExpr **pathexprref=&pathexprs;
00631 
00632    // Load all path expressions
00633 
00634    for(unsigned long i=0;i<pathexprnum;i++)
00635    {
00636       *pathexprref=new(&mainmem) VPathExpr();
00637 
00638       (*pathexprref)->idx=i;
00639 
00640       (*pathexprref)->Load(uncompress);
00641 
00642       pathexprref=&((*pathexprref)->next);
00643    }
00644 }
00645 #endif

Generated on Sat Oct 13 16:08:41 2001 for XMILL by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001