00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "VPathExprMan.hpp"
00034
00035 #ifdef XMILL
00036 #ifdef FULL_PATHEXPR
00037 #include "VRegExpr.hpp"
00038 #endif
00039 #include "PathTree.hpp"
00040 #include "ContMan.hpp"
00041 #endif
00042
00043 #ifdef XDEMILL
00044 #include "SmallUncompress.hpp"
00045 #endif
00046
00047 #include "Load.hpp"
00048
00049
00050 extern MemStreamer mainmem;
00051 extern MemStreamer tmpmem;
00052
00053
00054 extern char globalleftwhitespacescompress;
00055 extern char globalrightwhitespacescompress;
00056
00057 #ifdef XMILL
00058 extern UserCompressor *plaincompressorptr;
00059
00060 extern CompressContainer *globalwhitespacecont;
00061
00062 #endif
00063
00064 inline void VPathExpr::PathParseError(char *errmsg,char *errptr)
00065
00066 {
00067 Error("Error while parsing path expression:\n\n ");
00068 ErrorCont(regexprstr,regexprendptr-regexprstr);
00069 ErrorCont("\n");
00070 for(int i=0;i<errptr-regexprstr+3;i++)
00071 ErrorCont(" ",1);
00072 ErrorCont("^\n");
00073 ErrorCont(errmsg);
00074 Exit();
00075 }
00076
00077 inline void VPathExpr::HandlePathExprOption(char * &str,char *endptr)
00078
00079
00080 {
00081
00082
00083
00084 if(str+2<=endptr)
00085 {
00086 switch(*str)
00087 {
00088 case 'l':switch(str[1])
00089 {
00090 case 'i': leftwhitespacescompress=WHITESPACE_IGNORE;str+=2;return;
00091 case 'g': leftwhitespacescompress=WHITESPACE_STOREGLOBAL;str+=2;return;
00092 case 't': leftwhitespacescompress=WHITESPACE_STORETEXT;str+=2;return;
00093 }
00094 break;
00095 case 'r':switch(str[1])
00096 {
00097 case 'i': rightwhitespacescompress=WHITESPACE_IGNORE;str+=2;return;
00098 case 'g': rightwhitespacescompress=WHITESPACE_STOREGLOBAL;str+=2;return;
00099 case 't': rightwhitespacescompress=WHITESPACE_STORETEXT;str+=2;return;
00100 }
00101 break;
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111 }
00112 }
00113
00114
00115
00116 #ifdef XMILL
00117 usercompressor=compressman.CreateCompressorInstance(str,endptr);
00118 #endif
00119 #ifdef XDEMILL
00120 useruncompressor=compressman.CreateUncompressorInstance(str,endptr);
00121 #endif
00122 }
00123
00124 inline void VPathExpr::ParseUserCompressorString(char * &str,char *endptr)
00125
00126
00127
00128 {
00129
00130 while(str<endptr)
00131 {
00132
00133 if((*str==0)||(*str==' ')||(*str=='\t')||(*str=='\r')||(*str=='\n'))
00134 {
00135 regexprendptr=str;
00136 return;
00137 }
00138
00139
00140 HandlePathExprOption(str,endptr);
00141
00142 if(str==endptr)
00143 {
00144 regexprendptr=str;
00145 return;
00146 }
00147
00148
00149 if((*str==0)||(*str==' ')||(*str=='\t')||(*str=='\r')||(*str=='\n'))
00150 {
00151 regexprendptr=str;
00152 return;
00153 }
00154
00155 if(*str!=':')
00156 {
00157 Error("Character ':' expected at '...'");
00158 if(endptr-str>5)
00159 {
00160 ErrorCont(str,5);
00161 ErrorCont("...'");
00162 }
00163 else
00164 ErrorCont(str,endptr-str);
00165 Exit();
00166 }
00167 str++;
00168
00169 }
00170 }
00171
00172 #ifdef XMILL
00173
00174 inline void VPathExpr::CreateXPathEdge(char *from,char *to,FSM *fsm,FSMState *fromstate,FSMState *tostate,char ignore_pounds)
00175
00176
00177
00178 {
00179 if(from==to)
00180 PathParseError("Unexpected character",from);
00181
00182 switch(*from)
00183 {
00184 case '@':
00185
00186 if((from+2==to)&&(from[1]=='#'))
00187 fsm->CreateLabelEdge(fromstate,tostate,attribpoundlabelid);
00188 else
00189 fsm->CreateLabelEdge(fromstate,tostate,globallabeldict.GetLabelOrAttrib(from+1,to-from-1,1));
00190 return;
00191
00192 case '#':
00193 if((from+2==to)&&(from[1]=='#'))
00194
00195 {
00196 FSMState *middlestate=fsm->CreateState();
00197 fsm->CreateEmptyEdge(fromstate,middlestate);
00198 fsm->CreateEmptyEdge(middlestate,tostate);
00199 if(ignore_pounds)
00200 fsm->CreateNegEdge(middlestate,middlestate);
00201 else
00202 {
00203 fsm->CreateLabelEdge(middlestate,middlestate,elementpoundlabelid);
00204 fsm->CreateLabelEdge(middlestate,middlestate,attribpoundlabelid);
00205 }
00206 }
00207 else
00208 {
00209 if(from+1!=to)
00210 PathParseError("Symbol '/' or '|' expected after '#'",from+1);
00211
00212 if(ignore_pounds)
00213 fsm->CreateNegEdge(fromstate,tostate);
00214 else
00215 {
00216 fsm->CreateLabelEdge(fromstate,tostate,elementpoundlabelid);
00217 fsm->CreateLabelEdge(fromstate,tostate,attribpoundlabelid);
00218 }
00219 }
00220 return;
00221
00222 case '*':
00223 if(from+1!=to)
00224 PathParseError("Symbol '/' or '|' expected after '*'",from+1);
00225
00226 fsm->CreateNegEdge(fromstate,tostate);
00227 return;
00228
00229 default:
00230 fsm->CreateLabelEdge(fromstate,tostate,globallabeldict.GetLabelOrAttrib(from,to-from,0));
00231 }
00232 }
00233
00234 inline void VPathExpr::ParseXPathItem(char * &startptr,char *endptr,FSM *fsm,FSMState *fromstate,FSMState *tostate,char ignore_pounds)
00235
00236
00237
00238 {
00239 FSMState *curfromstate=fromstate;
00240 FSMState *curtostate=fsm->CreateState();
00241 char *to;
00242
00243 do
00244 {
00245 if(*startptr=='(')
00246
00247 {
00248 startptr++;
00249 ParseXPathItem(startptr,endptr,fsm,curfromstate,curtostate,ignore_pounds);
00250
00251
00252 if((startptr==endptr)||(*startptr!=')'))
00253 PathParseError("Missing closed parenthesis ')'",startptr);
00254 startptr++;
00255 }
00256 else
00257 {
00258
00259 to=startptr;
00260 while((to<endptr)&&(*to!='/')&&(*to!='=')&&(*to!='|')&&(*to!=')'))
00261 to++;
00262
00263
00264 CreateXPathEdge(startptr,to,fsm,curfromstate,curtostate,ignore_pounds);
00265
00266 startptr=to;
00267 }
00268
00269 if(startptr==endptr)
00270 break;
00271
00272
00273 switch(*startptr)
00274 {
00275 case '/':
00276 startptr++;
00277
00278 if((startptr<endptr)&&(*startptr=='/'))
00279
00280 {
00281
00282
00283
00284 FSMState *middlestate=fsm->CreateState();
00285 fsm->CreateEmptyEdge(curtostate,middlestate);
00286 fsm->CreateNegEdge(middlestate,middlestate,NULL);
00287
00288 curtostate=fsm->CreateState();
00289 fsm->CreateEmptyEdge(middlestate,curtostate);
00290 startptr++;
00291 }
00292 if((startptr==endptr)||(*startptr=='=')||(*startptr==')'))
00293
00294 {
00295 fsm->CreateEmptyEdge(curtostate,tostate);
00296 return;
00297 }
00298
00299 curfromstate=curtostate;
00300 curtostate=fsm->CreateState();
00301 break;
00302
00303 case '|':
00304 startptr++;
00305 break;
00306
00307 case ')':
00308 case '=':
00309 fsm->CreateEmptyEdge(curtostate,tostate);
00310 return;
00311
00312 default:
00313 PathParseError("Invalid symbol",startptr);
00314 }
00315 }
00316 while(1);
00317
00318 fsm->CreateEmptyEdge(curtostate,tostate);
00319 }
00320
00321 inline FSM *VPathExpr::ParseXPath(char * &str,char *endptr,char ignore_pounds)
00322
00323
00324 {
00325 FSM *fsm=new(fsmmem) FSM();
00326 FSMState *startstate=fsm->CreateState();
00327
00328 fsm->SetStartState(startstate);
00329
00330 str++;
00331
00332 if(str==endptr)
00333 {
00334 startstate->SetFinal();
00335 return fsm;
00336 }
00337
00338 if(str[0]=='/')
00339 {
00340
00341 fsm->CreateNegEdge(startstate,startstate,NULL);
00342
00343 if(str+1==endptr)
00344
00345 {
00346 str++;
00347 startstate->SetFinal();
00348 return fsm;
00349 }
00350 FSMState *middlestate=fsm->CreateState();
00351 fsm->CreateEmptyEdge(startstate,middlestate);
00352 startstate=middlestate;
00353 str++;
00354 }
00355
00356
00357 FSMState *finalstate=fsm->CreateState();
00358 finalstate->SetFinal();
00359
00360
00361
00362
00363 if(*str!='=')
00364 ParseXPathItem(str,endptr,fsm,startstate,finalstate,ignore_pounds);
00365 else
00366 fsm->CreateEmptyEdge(startstate,finalstate);
00367
00368
00369 if(str<endptr)
00370 {
00371 if((str+1==endptr)||(*str!='=')||(str[1]!='>'))
00372 PathParseError("Unexpected character",str);
00373
00374 str+=2;
00375
00376 }
00377 return fsm;
00378 }
00379
00380 void VPathExpr::CreateFromString(char * &str,char *endptr)
00381
00382
00383
00384
00385 {
00386 #ifdef FULL_PATHEXPR
00387 VRegExpr *regexpr;
00388 #endif
00389 FSM *tmpforwardfsm;
00390 char *savestr=str;
00391
00392 regexprstr=str;
00393 regexprendptr=endptr;
00394
00395
00396 tmpmem.StartNewMemBlock();
00397
00398
00399 fsmmem=&tmpmem;
00400 fsmtmpmem=&tmpmem;
00401
00402
00403 if(*str=='/')
00404 tmpforwardfsm=ParseXPath(str,endptr,0);
00405 else
00406 {
00407 #ifdef FULL_PATHEXPR
00408
00409
00410 vregexprmem=&tmpmem;
00411 regexpr=VRegExpr::ParseVRegExpr(str,endptr);
00412
00413
00414
00415 tmpforwardfsm=regexpr->CreateNonDetFSM();
00416 #else
00417 PathParseError("Character '/' expected",str);
00418 #endif
00419 }
00420
00421
00422 tmpforwardfsm=tmpforwardfsm->MakeDeterministic();
00423
00424
00425 tmpforwardfsm=tmpforwardfsm->Minimize();
00426
00427
00428
00429
00430
00431 fsmmem=&tmpmem;
00432
00433
00434 reversefsm=tmpforwardfsm->CreateReverseFSM();
00435
00436 reversefsm=reversefsm->MakeDeterministic();
00437
00438
00439 fsmmem=&mainmem;
00440 mainmem.WordAlign();
00441
00442
00443 reversefsm=reversefsm->Minimize();
00444
00445
00446 reversefsm->FindAcceptingStates();
00447
00448
00449
00450 reversefsm->ComputeStatesHasPoundsAhead();
00451
00452 #ifdef USE_FORWARD_DATAGUIDE
00453 if(*savestr=='/')
00454 forwardfsm=ParseXPath(savestr,endptr,1);
00455 else
00456 {
00457 Error("Fatal Error in VPathExpr::CreateFromString\n");
00458 Exit();
00459 }
00460
00461
00462
00463 forwardfsm=forwardfsm->MakeDeterministic();
00464
00465 fsmmem=&mainmem;
00466 mainmem.WordAlign();
00467
00468
00469 forwardfsm=forwardfsm->Minimize();
00470
00471 #endif
00472
00473
00474 tmpmem.RemoveLastMemBlock();
00475
00476
00477
00478
00479
00480
00481
00482 leftwhitespacescompress=WHITESPACE_DEFAULT;
00483 rightwhitespacescompress=WHITESPACE_DEFAULT;
00484
00485
00486 char *textstring="t";
00487 usercompressor=compressman.CreateCompressorInstance(textstring,textstring+1);
00488
00489 regexprusercompressptr=str;
00490
00491
00492 ParseUserCompressorString(str,endptr);
00493
00494 regexprendptr=str;
00495 }
00496
00497 void VPathExpr::InitWhitespaceHandling()
00498
00499
00500
00501 {
00502 if(leftwhitespacescompress==WHITESPACE_DEFAULT)
00503 leftwhitespacescompress=globalleftwhitespacescompress;
00504
00505 if(rightwhitespacescompress==WHITESPACE_DEFAULT)
00506 rightwhitespacescompress=globalrightwhitespacescompress;
00507 }
00508
00509 void VPathExprMan::InitWhitespaceHandling()
00510
00511
00512
00513 {
00514 VPathExpr *pathexpr=pathexprs;
00515 while(pathexpr!=NULL)
00516 {
00517 pathexpr->InitWhitespaceHandling();
00518 pathexpr=pathexpr->next;
00519 }
00520 }
00521
00522 #endif
00523
00524 void VPathExpr::PrintRegExpr()
00525
00526 {
00527 fwrite(regexprstr,regexprendptr-regexprstr,1,stdout);
00528 }
00529
00530
00531
00532
00533
00534
00535 #ifdef XMILL
00536 void VPathExprMan::AddNewVPathExpr(char * &str,char *endptr)
00537
00538 {
00539
00540 VPathExpr *item=new(&mainmem) VPathExpr();
00541
00542 item->idx=pathexprnum+1;
00543 pathexprnum++;
00544
00545
00546 item->CreateFromString(str,endptr);
00547
00548
00549 if(pathexprs==NULL)
00550 pathexprs=lastpathexpr=item;
00551 else
00552 {
00553 lastpathexpr->next=item;
00554 lastpathexpr=item;
00555 }
00556 }
00557 #endif
00558
00559
00560
00561
00562
00563 #ifdef XMILL
00564 inline void VPathExpr::Store(MemStreamer *output)
00565
00566 {
00567
00568
00569 output->StoreUInt32(regexprendptr-regexprusercompressptr);
00570 output->StoreData(regexprusercompressptr,regexprendptr-regexprusercompressptr);
00571 }
00572 #endif
00573
00574 #ifdef XDEMILL
00575 void VPathExpr::Load(SmallBlockUncompressor *uncompress)
00576
00577
00578 {
00579 char *ptr;
00580 unsigned long len=uncompress->LoadString(&ptr);
00581
00582
00583 regexprusercompressptr=mainmem.GetByteBlock(len);
00584 mainmem.WordAlign();
00585
00586 memcpy(regexprusercompressptr,ptr,len);
00587
00588 regexprendptr=regexprusercompressptr+len;
00589 regexprstr=NULL;
00590
00591 char *str=regexprusercompressptr;
00592
00593
00594 char *textstring="t";
00595 useruncompressor=compressman.CreateUncompressorInstance(textstring,textstring+1);
00596
00597
00598 ParseUserCompressorString(str,regexprendptr);
00599 }
00600 #endif
00601
00602
00603
00604
00605 #ifdef XMILL
00606 void VPathExprMan::Store(MemStreamer *memstream)
00607
00608 {
00609
00610 memstream->StoreUInt32(pathexprnum);
00611
00612 VPathExpr *curpathexpr=pathexprs;
00613
00614
00615 while(curpathexpr!=NULL)
00616 {
00617 curpathexpr->Store(memstream);
00618 curpathexpr=curpathexpr->next;
00619 }
00620 }
00621 #endif
00622
00623 #ifdef XDEMILL
00624 void VPathExprMan::Load(SmallBlockUncompressor *uncompress)
00625
00626 {
00627
00628 pathexprnum=uncompress->LoadUInt32();
00629
00630 VPathExpr **pathexprref=&pathexprs;
00631
00632
00633
00634 for(unsigned long i=0;i<pathexprnum;i++)
00635 {
00636 *pathexprref=new(&mainmem) VPathExpr();
00637
00638 (*pathexprref)->idx=i;
00639
00640 (*pathexprref)->Load(uncompress);
00641
00642 pathexprref=&((*pathexprref)->next);
00643 }
00644 }
00645 #endif