00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 #include "VPathExprMan.hpp"
00034 
00035 #ifdef XMILL
00036 #ifdef FULL_PATHEXPR
00037 #include "VRegExpr.hpp"
00038 #endif
00039 #include "PathTree.hpp"
00040 #include "ContMan.hpp"
00041 #endif
00042 
00043 #ifdef XDEMILL
00044 #include "SmallUncompress.hpp"
00045 #endif
00046 
00047 #include "Load.hpp"
00048 
00049 
00050 extern MemStreamer mainmem;
00051 extern MemStreamer tmpmem;
00052 
00053 
00054 extern char globalleftwhitespacescompress;
00055 extern char globalrightwhitespacescompress;
00056 
00057 #ifdef XMILL
00058 extern UserCompressor   *plaincompressorptr; 
00059 
00060 extern CompressContainer *globalwhitespacecont;
00061    
00062 #endif
00063 
00064 inline void VPathExpr::PathParseError(char *errmsg,char *errptr)
00065    
00066 {
00067    Error("Error while parsing path expression:\n\n   ");
00068    ErrorCont(regexprstr,regexprendptr-regexprstr);
00069    ErrorCont("\n");
00070    for(int i=0;i<errptr-regexprstr+3;i++)
00071       ErrorCont(" ",1);
00072    ErrorCont("^\n");
00073    ErrorCont(errmsg);
00074    Exit();
00075 }
00076 
00077 inline void VPathExpr::HandlePathExprOption(char * &str,char *endptr)
00078    
00079    
00080 {
00081    
00082    
00083    
00084    if(str+2<=endptr)
00085    {
00086       switch(*str)
00087       {
00088       case 'l':switch(str[1]) 
00089                {
00090                case 'i':      leftwhitespacescompress=WHITESPACE_IGNORE;str+=2;return;
00091                case 'g':      leftwhitespacescompress=WHITESPACE_STOREGLOBAL;str+=2;return;
00092                case 't':      leftwhitespacescompress=WHITESPACE_STORETEXT;str+=2;return;
00093                }
00094                break;
00095       case 'r':switch(str[1]) 
00096                {
00097                case 'i':      rightwhitespacescompress=WHITESPACE_IGNORE;str+=2;return;
00098                case 'g':      rightwhitespacescompress=WHITESPACE_STOREGLOBAL;str+=2;return;
00099                case 't':      rightwhitespacescompress=WHITESPACE_STORETEXT;str+=2;return;
00100                }
00101                break;
00102 
00103 
00104 
00105 
00106 
00107 
00108 
00109 
00110 
00111       }
00112    }
00113 
00114    
00115 
00116 #ifdef XMILL
00117    usercompressor=compressman.CreateCompressorInstance(str,endptr);
00118 #endif
00119 #ifdef XDEMILL
00120    useruncompressor=compressman.CreateUncompressorInstance(str,endptr);
00121 #endif
00122 }
00123 
00124 inline void VPathExpr::ParseUserCompressorString(char * &str,char *endptr)
00125    
00126    
00127    
00128 {
00129    
00130    while(str<endptr)
00131    {
00132       
00133       if((*str==0)||(*str==' ')||(*str=='\t')||(*str=='\r')||(*str=='\n'))
00134       {
00135          regexprendptr=str;
00136          return;
00137       }
00138 
00139       
00140       HandlePathExprOption(str,endptr);
00141 
00142       if(str==endptr)
00143       {
00144          regexprendptr=str;
00145          return;
00146       }
00147 
00148       
00149       if((*str==0)||(*str==' ')||(*str=='\t')||(*str=='\r')||(*str=='\n'))
00150       {
00151          regexprendptr=str;
00152          return;
00153       }
00154 
00155       if(*str!=':')
00156       {
00157          Error("Character ':' expected at '...'");
00158          if(endptr-str>5)
00159          {
00160             ErrorCont(str,5);
00161             ErrorCont("...'");
00162          }
00163          else
00164             ErrorCont(str,endptr-str);
00165          Exit();
00166       }
00167       str++;
00168 
00169    }
00170 }
00171 
00172 #ifdef XMILL
00173 
00174 inline void VPathExpr::CreateXPathEdge(char *from,char *to,FSM *fsm,FSMState *fromstate,FSMState *tostate,char ignore_pounds)
00175    
00176    
00177    
00178 {
00179    if(from==to)   
00180       PathParseError("Unexpected character",from);
00181 
00182    switch(*from)
00183    {
00184    case '@':   
00185                
00186       if((from+2==to)&&(from[1]=='#'))
00187          fsm->CreateLabelEdge(fromstate,tostate,attribpoundlabelid);
00188       else
00189          fsm->CreateLabelEdge(fromstate,tostate,globallabeldict.GetLabelOrAttrib(from+1,to-from-1,1));
00190       return;
00191 
00192    case '#':   
00193       if((from+2==to)&&(from[1]=='#'))
00194          
00195       {
00196          FSMState *middlestate=fsm->CreateState();
00197          fsm->CreateEmptyEdge(fromstate,middlestate);
00198          fsm->CreateEmptyEdge(middlestate,tostate);
00199          if(ignore_pounds)
00200             fsm->CreateNegEdge(middlestate,middlestate);
00201          else
00202          {
00203             fsm->CreateLabelEdge(middlestate,middlestate,elementpoundlabelid);
00204             fsm->CreateLabelEdge(middlestate,middlestate,attribpoundlabelid);
00205          }
00206       }
00207       else  
00208       {
00209          if(from+1!=to)
00210             PathParseError("Symbol '/' or '|' expected after '#'",from+1);
00211 
00212          if(ignore_pounds)
00213             fsm->CreateNegEdge(fromstate,tostate);
00214          else
00215          {
00216             fsm->CreateLabelEdge(fromstate,tostate,elementpoundlabelid);
00217             fsm->CreateLabelEdge(fromstate,tostate,attribpoundlabelid);
00218          }
00219       }
00220       return;
00221 
00222    case '*':   
00223       if(from+1!=to)
00224          PathParseError("Symbol '/' or '|' expected after '*'",from+1);
00225   
00226       fsm->CreateNegEdge(fromstate,tostate);
00227       return;
00228 
00229    default:
00230          fsm->CreateLabelEdge(fromstate,tostate,globallabeldict.GetLabelOrAttrib(from,to-from,0));
00231    }
00232 }
00233 
00234 inline void VPathExpr::ParseXPathItem(char * &startptr,char *endptr,FSM *fsm,FSMState *fromstate,FSMState *tostate,char ignore_pounds)
00235    
00236    
00237    
00238 {
00239    FSMState *curfromstate=fromstate;
00240    FSMState *curtostate=fsm->CreateState();
00241    char     *to;
00242 
00243    do
00244    {
00245       if(*startptr=='(')
00246          
00247       {
00248          startptr++;
00249          ParseXPathItem(startptr,endptr,fsm,curfromstate,curtostate,ignore_pounds);
00250 
00251          
00252          if((startptr==endptr)||(*startptr!=')'))
00253             PathParseError("Missing closed parenthesis ')'",startptr);
00254          startptr++;
00255       }
00256       else
00257       {
00258          
00259          to=startptr;
00260          while((to<endptr)&&(*to!='/')&&(*to!='=')&&(*to!='|')&&(*to!=')'))
00261             to++;
00262 
00263          
00264          CreateXPathEdge(startptr,to,fsm,curfromstate,curtostate,ignore_pounds);
00265 
00266          startptr=to;
00267       }
00268 
00269       if(startptr==endptr) 
00270          break;
00271 
00272       
00273       switch(*startptr)
00274       {
00275       case '/':   
00276          startptr++;
00277 
00278          if((startptr<endptr)&&(*startptr=='/'))   
00279                                                    
00280          {
00281             
00282             
00283             
00284             FSMState *middlestate=fsm->CreateState();
00285             fsm->CreateEmptyEdge(curtostate,middlestate);
00286             fsm->CreateNegEdge(middlestate,middlestate,NULL);
00287 
00288             curtostate=fsm->CreateState();
00289             fsm->CreateEmptyEdge(middlestate,curtostate);
00290             startptr++;
00291          }
00292          if((startptr==endptr)||(*startptr=='=')||(*startptr==')'))
00293             
00294          {
00295             fsm->CreateEmptyEdge(curtostate,tostate);
00296             return;
00297          }
00298 
00299          curfromstate=curtostate;
00300          curtostate=fsm->CreateState();
00301          break;
00302 
00303       case '|':   
00304          startptr++;
00305          break;
00306 
00307       case ')':
00308       case '=':
00309          fsm->CreateEmptyEdge(curtostate,tostate);
00310          return;
00311 
00312       default:
00313          PathParseError("Invalid symbol",startptr);
00314       }
00315    }
00316    while(1);
00317 
00318    fsm->CreateEmptyEdge(curtostate,tostate);
00319 }
00320 
00321 inline FSM *VPathExpr::ParseXPath(char * &str,char *endptr,char ignore_pounds)
00322    
00323    
00324 {
00325    FSM *fsm=new(fsmmem) FSM();
00326    FSMState *startstate=fsm->CreateState();
00327 
00328    fsm->SetStartState(startstate);
00329 
00330    str++;   
00331 
00332    if(str==endptr)   
00333    {
00334       startstate->SetFinal();
00335       return fsm;
00336    }
00337 
00338    if(str[0]=='/')   
00339    {
00340       
00341       fsm->CreateNegEdge(startstate,startstate,NULL);
00342 
00343       if(str+1==endptr)
00344          
00345       {
00346          str++;
00347          startstate->SetFinal();
00348          return fsm;
00349       }
00350       FSMState *middlestate=fsm->CreateState();
00351       fsm->CreateEmptyEdge(startstate,middlestate);
00352       startstate=middlestate;
00353       str++;
00354    }
00355 
00356    
00357    FSMState *finalstate=fsm->CreateState();
00358    finalstate->SetFinal();
00359 
00360    
00361    
00362    
00363    if(*str!='=')
00364       ParseXPathItem(str,endptr,fsm,startstate,finalstate,ignore_pounds);
00365    else
00366       fsm->CreateEmptyEdge(startstate,finalstate);
00367 
00368    
00369    if(str<endptr)
00370    {
00371       if((str+1==endptr)||(*str!='=')||(str[1]!='>'))
00372          PathParseError("Unexpected character",str);
00373 
00374       str+=2;  
00375                
00376    }
00377    return fsm;
00378 }
00379 
00380 void VPathExpr::CreateFromString(char * &str,char *endptr)
00381    
00382    
00383    
00384    
00385 {
00386 #ifdef FULL_PATHEXPR
00387    VRegExpr       *regexpr;
00388 #endif
00389    FSM            *tmpforwardfsm;
00390    char           *savestr=str;
00391 
00392    regexprstr=str;
00393    regexprendptr=endptr;   
00394 
00395    
00396    tmpmem.StartNewMemBlock();
00397 
00398    
00399    fsmmem=&tmpmem;
00400    fsmtmpmem=&tmpmem;
00401 
00402    
00403    if(*str=='/')
00404       tmpforwardfsm=ParseXPath(str,endptr,0);
00405    else
00406    {
00407 #ifdef FULL_PATHEXPR
00408       
00409 
00410       vregexprmem=&tmpmem;
00411       regexpr=VRegExpr::ParseVRegExpr(str,endptr);
00412 
00413       
00414       
00415       tmpforwardfsm=regexpr->CreateNonDetFSM();
00416 #else
00417       PathParseError("Character '/' expected",str);
00418 #endif
00419    }
00420 
00421    
00422    tmpforwardfsm=tmpforwardfsm->MakeDeterministic();
00423 
00424    
00425    tmpforwardfsm=tmpforwardfsm->Minimize();
00426 
00427    
00428    
00429 
00430    
00431    fsmmem=&tmpmem;
00432 
00433    
00434    reversefsm=tmpforwardfsm->CreateReverseFSM();
00435 
00436    reversefsm=reversefsm->MakeDeterministic();
00437 
00438    
00439    fsmmem=&mainmem;
00440    mainmem.WordAlign();
00441 
00442    
00443    reversefsm=reversefsm->Minimize();
00444 
00445    
00446    reversefsm->FindAcceptingStates();
00447 
00448    
00449    
00450    reversefsm->ComputeStatesHasPoundsAhead();
00451 
00452 #ifdef USE_FORWARD_DATAGUIDE
00453    if(*savestr=='/')
00454       forwardfsm=ParseXPath(savestr,endptr,1);
00455    else
00456    {
00457       Error("Fatal Error in VPathExpr::CreateFromString\n");
00458       Exit();
00459    }
00460 
00461    
00462    
00463    forwardfsm=forwardfsm->MakeDeterministic();
00464 
00465    fsmmem=&mainmem;
00466    mainmem.WordAlign();
00467 
00468    
00469    forwardfsm=forwardfsm->Minimize();
00470 
00471 #endif
00472 
00473    
00474    tmpmem.RemoveLastMemBlock();
00475 
00476 
00477 
00478    
00479    
00480    
00481    
00482    leftwhitespacescompress=WHITESPACE_DEFAULT;
00483    rightwhitespacescompress=WHITESPACE_DEFAULT;
00484 
00485    
00486    char *textstring="t";
00487    usercompressor=compressman.CreateCompressorInstance(textstring,textstring+1);
00488 
00489    regexprusercompressptr=str;
00490 
00491    
00492    ParseUserCompressorString(str,endptr);
00493 
00494    regexprendptr=str;
00495 }
00496 
00497 void VPathExpr::InitWhitespaceHandling()
00498    
00499    
00500    
00501 {
00502    if(leftwhitespacescompress==WHITESPACE_DEFAULT)
00503       leftwhitespacescompress=globalleftwhitespacescompress;
00504 
00505    if(rightwhitespacescompress==WHITESPACE_DEFAULT)
00506       rightwhitespacescompress=globalrightwhitespacescompress;
00507 }
00508 
00509 void VPathExprMan::InitWhitespaceHandling()
00510    
00511    
00512    
00513 {
00514    VPathExpr *pathexpr=pathexprs;
00515    while(pathexpr!=NULL)
00516    {
00517       pathexpr->InitWhitespaceHandling();
00518       pathexpr=pathexpr->next;
00519    }
00520 }
00521 
00522 #endif
00523 
00524 void VPathExpr::PrintRegExpr()
00525    
00526 {
00527    fwrite(regexprstr,regexprendptr-regexprstr,1,stdout);
00528 }
00529 
00530 
00531 
00532 
00533 
00534 
00535 #ifdef XMILL
00536 void VPathExprMan::AddNewVPathExpr(char * &str,char *endptr)
00537    
00538 {
00539    
00540    VPathExpr *item=new(&mainmem) VPathExpr();
00541 
00542    item->idx=pathexprnum+1;
00543    pathexprnum++;
00544 
00545    
00546    item->CreateFromString(str,endptr);
00547 
00548    
00549    if(pathexprs==NULL)
00550       pathexprs=lastpathexpr=item;
00551    else
00552    {
00553       lastpathexpr->next=item;
00554       lastpathexpr=item;
00555    }
00556 }
00557 #endif
00558 
00559 
00560 
00561 
00562 
00563 #ifdef XMILL
00564 inline void VPathExpr::Store(MemStreamer *output)
00565    
00566 {
00567    
00568    
00569    output->StoreUInt32(regexprendptr-regexprusercompressptr);
00570    output->StoreData(regexprusercompressptr,regexprendptr-regexprusercompressptr);
00571 }
00572 #endif
00573 
00574 #ifdef XDEMILL
00575 void VPathExpr::Load(SmallBlockUncompressor *uncompress)
00576    
00577    
00578 {
00579    char           *ptr;
00580    unsigned long  len=uncompress->LoadString(&ptr);
00581 
00582    
00583    regexprusercompressptr=mainmem.GetByteBlock(len);
00584    mainmem.WordAlign();
00585 
00586    memcpy(regexprusercompressptr,ptr,len);
00587 
00588    regexprendptr=regexprusercompressptr+len;
00589    regexprstr=NULL;
00590 
00591    char *str=regexprusercompressptr;
00592 
00593    
00594    char *textstring="t";
00595    useruncompressor=compressman.CreateUncompressorInstance(textstring,textstring+1);
00596 
00597    
00598    ParseUserCompressorString(str,regexprendptr);
00599 }
00600 #endif
00601 
00602 
00603 
00604 
00605 #ifdef XMILL
00606 void VPathExprMan::Store(MemStreamer *memstream)
00607    
00608 {
00609    
00610    memstream->StoreUInt32(pathexprnum);
00611 
00612    VPathExpr   *curpathexpr=pathexprs;
00613 
00614    
00615    while(curpathexpr!=NULL)
00616    {
00617       curpathexpr->Store(memstream);
00618       curpathexpr=curpathexpr->next;
00619    }
00620 }
00621 #endif
00622 
00623 #ifdef XDEMILL
00624 void VPathExprMan::Load(SmallBlockUncompressor *uncompress)
00625    
00626 {
00627    
00628    pathexprnum=uncompress->LoadUInt32();
00629 
00630    VPathExpr **pathexprref=&pathexprs;
00631 
00632    
00633 
00634    for(unsigned long i=0;i<pathexprnum;i++)
00635    {
00636       *pathexprref=new(&mainmem) VPathExpr();
00637 
00638       (*pathexprref)->idx=i;
00639 
00640       (*pathexprref)->Load(uncompress);
00641 
00642       pathexprref=&((*pathexprref)->next);
00643    }
00644 }
00645 #endif