00001 #include "VRegExpr.hpp"
00002 #include "FSM.hpp"
00003
00004 extern MemStreamer tmpmem;
00005 extern MemStreamer mainmem;
00006
00007 MemStreamer *vregexprmem;
00008
00009 extern TLabelID elementpoundlabelid;
00010 extern TLabelID attribpoundlabelid;
00011
00012
00013
00014
00015 void VRegExpr::Print()
00016 {
00017 printf("{");
00018 switch(type)
00019 {
00020 case VP_ITEMTYPE_LABEL: globallabeldict.PrintLabel(labelid);break;
00021 case VP_ITEMTYPE_DONTCARE: printf("_");break;
00022 case VP_ITEMTYPE_POUND: printf("#");break;
00023 case VP_ITEMTYPE_STAR: children.child1->Print();printf("*");break;
00024 case VP_ITEMTYPE_PLUS: children.child1->Print();printf("+");break;
00025 case VP_ITEMTYPE_OPT: children.child1->Print();printf("?");break;
00026 case VP_ITEMTYPE_NOT: children.child1->Print();printf("~");break;
00027 case VP_ITEMTYPE_EMPTY: printf("()");;
00028 case VP_ITEMTYPE_GROUP: printf("(");children.child1->Print();printf(")");break;
00029 case VP_ITEMTYPE_MINUS: children.child1->Print();
00030 printf("-");
00031 children.child2->Print();
00032 break;
00033
00034 case VP_ITEMTYPE_SEQ: children.child1->Print();
00035 printf(",");
00036 children.child2->Print();
00037 break;
00038
00039 case VP_ITEMTYPE_ALT: children.child1->Print();
00040 printf("|");
00041 children.child2->Print();
00042 break;
00043
00044 case VP_ITEMTYPE_AND: children.child1->Print();
00045 printf("&");
00046 children.child2->Print();
00047 break;
00048 }
00049 printf("}");
00050 }
00051
00052
00053
00054
00055 FSM *VRegExpr::CreateNonDetFSM()
00056 {
00057 FSM *fsm;
00058 FSMState *startstate,*finalstate;
00059
00060
00061 fsm=new(fsmtmpmem) FSM();
00062
00063 startstate=fsm->CreateState();
00064 finalstate=fsm->CreateState(1);
00065
00066 fsm->SetStartState(startstate);
00067
00068 CreateFSMEdges(fsm,startstate,finalstate);
00069
00070 return fsm;
00071 }
00072
00073 void VRegExpr::CreateFSMEdges(FSM *newfsm,FSMState *fromstate,FSMState *tostate)
00074 {
00075 FSMState *newstate;
00076
00077 switch(type)
00078 {
00079 case VP_ITEMTYPE_LABEL:
00080 newfsm->CreateLabelEdge(fromstate,tostate,labelid);
00081 return;
00082
00083 case VP_ITEMTYPE_DONTCARE:
00084 newfsm->CreateNegEdge(fromstate,tostate);
00085 return;
00086
00087 case VP_ITEMTYPE_POUND:
00088 newfsm->CreateLabelEdge(fromstate,tostate,elementpoundlabelid);
00089 newfsm->CreateLabelEdge(fromstate,tostate,attribpoundlabelid);
00090 return;
00091
00092 case VP_ITEMTYPE_STAR:
00093 newstate=newfsm->CreateState();
00094
00095 newfsm->CreateEmptyEdge(fromstate,newstate);
00096
00097 children.child1->CreateFSMEdges(newfsm,newstate,newstate);
00098
00099 newfsm->CreateEmptyEdge(newstate,tostate);
00100 return;
00101
00102 case VP_ITEMTYPE_PLUS:
00103 newstate=newfsm->CreateState();
00104
00105 children.child1->CreateFSMEdges(newfsm,fromstate,newstate);
00106 children.child1->CreateFSMEdges(newfsm,newstate,newstate);
00107
00108 newfsm->CreateEmptyEdge(newstate,tostate);
00109 return;
00110
00111 case VP_ITEMTYPE_OPT:
00112 children.child1->CreateFSMEdges(newfsm,fromstate,tostate);
00113 newfsm->CreateEmptyEdge(fromstate,tostate);
00114 return;
00115
00116 case VP_ITEMTYPE_NOT:
00117 {
00118 FSM *fsm1,*negfsm1;
00119 fsm1=children.child1->CreateNonDetFSM();
00120
00121 negfsm1=fsm1->CreateNegateFSM();
00122 newfsm->AddFSM(fromstate,tostate,negfsm1);
00123 return;
00124 }
00125
00126 case VP_ITEMTYPE_EMPTY:
00127 newfsm->CreateEmptyEdge(fromstate,tostate);
00128 return;
00129
00130 case VP_ITEMTYPE_GROUP:
00131 children.child1->CreateFSMEdges(newfsm,fromstate,tostate);
00132 return;
00133
00134 case VP_ITEMTYPE_MINUS:
00135 {
00136 FSM *fsm1,*fsm2,*negfsm1,*combinefsm;
00137 FSMState *startstate,*finalstate;
00138
00139 fsm1=children.child1->CreateNonDetFSM();
00140 fsm2=children.child2->CreateNonDetFSM();
00141
00142 negfsm1=fsm1->CreateNegateFSM();
00143
00144 combinefsm=new(fsmtmpmem) FSM();
00145
00146 startstate=combinefsm->CreateState();
00147 finalstate=combinefsm->CreateState(1);
00148
00149 combinefsm->SetStartState(startstate);
00150
00151 combinefsm->AddFSM(startstate,finalstate,negfsm1);
00152 combinefsm->AddFSM(startstate,finalstate,fsm2);
00153
00154 combinefsm=combinefsm->CreateNegateFSM();
00155
00156 newfsm->AddFSM(fromstate,tostate,combinefsm);
00157 return;
00158 }
00159
00160 case VP_ITEMTYPE_SEQ:
00161 {
00162 FSMState *middlestate=newfsm->CreateState();
00163
00164 children.child1->CreateFSMEdges(newfsm,fromstate,middlestate);
00165 children.child2->CreateFSMEdges(newfsm,middlestate,tostate);
00166 return;
00167 }
00168 case VP_ITEMTYPE_ALT:
00169 children.child1->CreateFSMEdges(newfsm,fromstate,tostate);
00170 children.child2->CreateFSMEdges(newfsm,fromstate,tostate);
00171 return;
00172
00173 case VP_ITEMTYPE_AND:
00174 {
00175 FSM *fsm1,*fsm2,*negfsm1,*negfsm2,*combinefsm;
00176 FSMState *startstate,*finalstate;
00177
00178 fsm1=children.child1->CreateNonDetFSM();
00179 fsm2=children.child2->CreateNonDetFSM();
00180
00181 negfsm1=fsm1->CreateNegateFSM();
00182 negfsm2=fsm2->CreateNegateFSM();
00183
00184 combinefsm=new(fsmtmpmem) FSM();
00185
00186 startstate=combinefsm->CreateState();
00187 finalstate=combinefsm->CreateState(1);
00188
00189 combinefsm->AddFSM(startstate,finalstate,negfsm1);
00190 combinefsm->AddFSM(startstate,finalstate,negfsm2);
00191
00192 combinefsm=combinefsm->CreateNegateFSM();
00193
00194 newfsm->AddFSM(fromstate,tostate,combinefsm);
00195 return;
00196 }
00197 }
00198 }
00199
00200
00201
00202
00203
00204
00205 char *vregexprerrptr;
00206 char *vregexprerrstr;
00207
00208 TLabelID VRegExpr::ParseLabel(char **str,char *endptr)
00209
00210
00211 {
00212 char isattrib=0;
00213
00214 if(**str=='<')
00215 isattrib=0;
00216 else
00217 {
00218 if(**str=='@')
00219 isattrib=1;
00220 }
00221 (*str)++;
00222
00223 char *saveptr=*str;
00224
00225 while(*str<endptr)
00226 {
00227 if(((isattrib==0)&&(**str=='>'))||
00228 ((isattrib==1)&&(**str=='@')))
00229 {
00230 (*str)++;
00231 return globallabeldict.GetLabelOrAttrib(saveptr,*str-saveptr-1,isattrib);
00232 }
00233 (*str)++;
00234 }
00235
00236 Error("Could not find ending '>' in '");
00237 ErrorCont(saveptr,10);
00238 Exit();
00239 return 0;
00240 }
00241
00242
00243
00244
00245
00246
00247 #define STACKITEM_REGEXPR 0
00248 #define STACKITEM_OPENPARENC 1
00249 #define STACKITEM_BINOPSTART 2
00250 #define STACKITEM_SEQ 2
00251 #define STACKITEM_MINUS 3
00252 #define STACKITEM_ALT 4
00253 #define STACKITEM_AND 5
00254
00255 struct RegExprStackItem
00256
00257
00258
00259 {
00260 RegExprStackItem *prev;
00261 unsigned long type;
00262 VRegExpr *regexpr;
00263
00264 void *operator new(size_t size) { return vregexprmem->GetByteBlock(size); }
00265 void operator delete(void *ptr) {}
00266
00267 RegExprStackItem(VRegExpr *myregexpr,RegExprStackItem *myprev)
00268 {
00269 prev=myprev;
00270 type=STACKITEM_REGEXPR;
00271 regexpr=myregexpr;
00272 }
00273
00274 RegExprStackItem(unsigned long mytype,RegExprStackItem *myprev)
00275 {
00276 prev=myprev;
00277 type=mytype;
00278 regexpr=NULL;
00279 }
00280 };
00281
00282 inline char ConvertOpCode(unsigned long type)
00283 {
00284 switch(type)
00285 {
00286 case STACKITEM_SEQ: return VP_ITEMTYPE_SEQ;
00287 case STACKITEM_MINUS:return VP_ITEMTYPE_MINUS;
00288 case STACKITEM_ALT: return VP_ITEMTYPE_ALT;
00289 case STACKITEM_AND: return VP_ITEMTYPE_AND;
00290 }
00291 return 0;
00292 }
00293
00294 inline void ReduceAll(RegExprStackItem **stackptr)
00295 {
00296 VRegExpr *regexpr;
00297
00298 while(((*stackptr)->prev!=NULL)&&
00299 ((*stackptr)->prev->type>=STACKITEM_BINOPSTART))
00300 {
00301 regexpr=new VRegExpr(ConvertOpCode((*stackptr)->prev->type),
00302 (*stackptr)->prev->prev->regexpr,
00303 (*stackptr)->regexpr);
00304 *stackptr=new RegExprStackItem(regexpr,(*stackptr)->prev->prev->prev);
00305 }
00306 }
00307
00308 inline void HandleBinOp(RegExprStackItem **stackptr,unsigned opcode)
00309 {
00310 VRegExpr *regexpr;
00311
00312 if((*stackptr==NULL)||((*stackptr)->type!=STACKITEM_REGEXPR))
00313 {
00314 Error("Binary operation !!\n");
00315 Exit();
00316 }
00317
00318 while(((*stackptr)->prev!=NULL)&&
00319 ((*stackptr)->prev->type>=opcode))
00320 {
00321 regexpr=new VRegExpr(ConvertOpCode((*stackptr)->prev->type),
00322 (*stackptr)->prev->prev->regexpr,
00323 (*stackptr)->regexpr);
00324 *stackptr=new RegExprStackItem(regexpr,(*stackptr)->prev->prev->prev);
00325 }
00326 *stackptr=new RegExprStackItem(opcode,*stackptr);
00327 }
00328
00329 inline void HandleRegExpr(RegExprStackItem **stackptr,VRegExpr *regexpr)
00330
00331 {
00332 if((*stackptr!=NULL)&&
00333 ((*stackptr)->type==STACKITEM_REGEXPR))
00334 HandleBinOp(stackptr,STACKITEM_SEQ);
00335
00336 *stackptr=new RegExprStackItem(regexpr,*stackptr);
00337 }
00338
00339 inline void HandleUnaryOp(RegExprStackItem **stackptr,char opcode,char *curptr)
00340 {
00341 if((*stackptr==NULL)||
00342 ((*stackptr)->type!=STACKITEM_REGEXPR))
00343 {
00344 Error("Unexpected unary operator");
00345 Exit();
00346 }
00347
00348 VRegExpr *regexpr=new VRegExpr(opcode,(*stackptr)->regexpr);
00349 *stackptr=new RegExprStackItem(regexpr,(*stackptr)->prev);
00350 }
00351
00352 inline void HandleOpenParenc(RegExprStackItem **stackptr)
00353 {
00354 if((*stackptr!=NULL)&&
00355 ((*stackptr)->type==STACKITEM_REGEXPR))
00356 HandleBinOp(stackptr,STACKITEM_SEQ);
00357
00358 *stackptr=new RegExprStackItem(STACKITEM_OPENPARENC,*stackptr);
00359 }
00360
00361 inline void HandleCloseParenc(RegExprStackItem **stackptr,char *curptr)
00362 {
00363 ReduceAll(stackptr);
00364
00365 if(((*stackptr)->prev==NULL)||
00366 ((*stackptr)->prev->type!=STACKITEM_OPENPARENC))
00367 {
00368 Error("Unexpected closing bracket");
00369 Exit();
00370 }
00371 VRegExpr *regexpr=new VRegExpr(VP_ITEMTYPE_GROUP,(*stackptr)->regexpr,NULL);
00372
00373 *stackptr=new RegExprStackItem(regexpr,(*stackptr)->prev->prev);
00374 }
00375
00376 VRegExpr *VRegExpr::ParseVRegExpr(char * &str,char *endptr)
00377 {
00378 char reachedend=false;
00379 RegExprStackItem *stack=NULL;
00380
00381 vregexprerrstr=NULL;
00382 vregexprerrptr=NULL;
00383
00384 do
00385 {
00386 switch(*str)
00387 {
00388 case '|': HandleBinOp(&stack,STACKITEM_ALT);
00389 str++;
00390 break;
00391 case '&': HandleBinOp(&stack,STACKITEM_AND);
00392 str++;
00393 break;
00394 case '-': HandleBinOp(&stack,STACKITEM_MINUS);
00395 str++;
00396 break;
00397 case '<':
00398 case '@':{
00399 TLabelID labelid=ParseLabel(&str,endptr);
00400 VRegExpr *regexpr=new VRegExpr(labelid);
00401 HandleRegExpr(&stack,regexpr);
00402 break;
00403 }
00404
00405 case '*': HandleUnaryOp(&stack,VP_ITEMTYPE_STAR,str);
00406 str++;
00407 break;
00408
00409 case '+': HandleUnaryOp(&stack,VP_ITEMTYPE_PLUS,str);
00410 str++;
00411 break;
00412
00413 case '?': HandleUnaryOp(&stack,VP_ITEMTYPE_OPT,str);
00414 str++;
00415 break;
00416
00417 case '~': HandleUnaryOp(&stack,VP_ITEMTYPE_NOT,str);
00418 str++;
00419 break;
00420
00421 case '(': if(str[1]==')')
00422 {
00423 VRegExpr *regexpr=new VRegExpr(VP_ITEMTYPE_EMPTY);
00424 HandleRegExpr(&stack,regexpr);
00425 str+=2;
00426 }
00427 else
00428 {
00429 HandleOpenParenc(&stack);
00430 str++;
00431 }
00432 break;
00433
00434 case ')': HandleCloseParenc(&stack,str);
00435 str++;
00436 break;
00437
00438 case '_':{ VRegExpr *regexpr=new VRegExpr(VP_ITEMTYPE_DONTCARE);
00439 HandleRegExpr(&stack,regexpr);
00440 str++;
00441 break;
00442 }
00443
00444 case '#':{ VRegExpr *regexpr=new VRegExpr(VP_ITEMTYPE_POUND);
00445 HandleRegExpr(&stack,regexpr);
00446 str++;
00447 break;
00448 }
00449 default: reachedend=1;
00450 break;
00451 }
00452 if(reachedend==1)
00453 break;
00454 }
00455 while(str<endptr);
00456
00457 if(stack==NULL)
00458 {
00459 Error("Invalid path expression");
00460 Exit();
00461 }
00462
00463 ReduceAll(&stack);
00464
00465 if(stack->prev!=NULL)
00466 {
00467
00468
00469 Error("Closing parenthesis expected");
00470 Exit();
00471
00472 vregexprerrstr="Closing parenthesis expected";
00473 vregexprerrptr=str;
00474 return NULL;
00475 }
00476 return stack->regexpr;
00477 }
00478
00479
00480
00481
00482 FSM *VRegExpr::CreateFSM()
00483 {
00484 FSM *fsm;
00485
00486 fsmtmpmem=&tmpmem;
00487 fsmmem=&tmpmem;
00488
00489 fsm=CreateNonDetFSM();
00490
00491
00492
00493
00494 fsm=fsm->MakeDeterministic();
00495
00496
00497
00498
00499 fsmmem=&mainmem;
00500 mainmem.WordAlign();
00501
00502
00503 fsm=fsm->Minimize();
00504
00505
00506
00507 return fsm;
00508 }
00509
00510
00511
00512
00513