Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

VRegExpr.cpp

Go to the documentation of this file.
00001 #include "VRegExpr.hpp"
00002 #include "FSM.hpp"
00003 
00004 extern MemStreamer tmpmem;
00005 extern MemStreamer mainmem;
00006 
00007 MemStreamer *vregexprmem;
00008 
00009 extern TLabelID elementpoundlabelid;
00010 extern TLabelID attribpoundlabelid;
00011 
00012 //****************************************************************************
00013 //****************************************************************************
00014 
00015 void VRegExpr::Print()
00016 {
00017    printf("{");
00018    switch(type)
00019    {
00020    case VP_ITEMTYPE_LABEL:    globallabeldict.PrintLabel(labelid);break;
00021    case VP_ITEMTYPE_DONTCARE: printf("_");break;
00022    case VP_ITEMTYPE_POUND:    printf("#");break;
00023    case VP_ITEMTYPE_STAR:     children.child1->Print();printf("*");break;
00024    case VP_ITEMTYPE_PLUS:     children.child1->Print();printf("+");break;
00025    case VP_ITEMTYPE_OPT:      children.child1->Print();printf("?");break;
00026    case VP_ITEMTYPE_NOT:      children.child1->Print();printf("~");break;
00027    case VP_ITEMTYPE_EMPTY:    printf("()");;
00028    case VP_ITEMTYPE_GROUP:    printf("(");children.child1->Print();printf(")");break;
00029    case VP_ITEMTYPE_MINUS:    children.child1->Print();
00030                               printf("-");
00031                               children.child2->Print();
00032                               break;
00033 
00034    case VP_ITEMTYPE_SEQ:      children.child1->Print();
00035                               printf(",");
00036                               children.child2->Print();
00037                               break;
00038 
00039    case VP_ITEMTYPE_ALT:      children.child1->Print();
00040                               printf("|");
00041                               children.child2->Print();
00042                               break;
00043 
00044    case VP_ITEMTYPE_AND:      children.child1->Print();
00045                               printf("&");
00046                               children.child2->Print();
00047                               break;
00048    }
00049    printf("}");
00050 }
00051 
00052 //****************************************************************************
00053 //****************************************************************************
00054 
00055 FSM *VRegExpr::CreateNonDetFSM()
00056 {
00057    FSM *fsm;
00058    FSMState *startstate,*finalstate;
00059 
00060    // We create the finite state automaton
00061    fsm=new(fsmtmpmem) FSM();
00062 
00063    startstate=fsm->CreateState();
00064    finalstate=fsm->CreateState(1);
00065 
00066    fsm->SetStartState(startstate);
00067 
00068    CreateFSMEdges(fsm,startstate,finalstate);
00069 
00070    return fsm;
00071 }
00072 
00073 void VRegExpr::CreateFSMEdges(FSM *newfsm,FSMState *fromstate,FSMState *tostate)
00074 {
00075    FSMState       *newstate;
00076 
00077    switch(type)
00078    {
00079    case VP_ITEMTYPE_LABEL:
00080       newfsm->CreateLabelEdge(fromstate,tostate,labelid);
00081       return;
00082 
00083    case VP_ITEMTYPE_DONTCARE:
00084       newfsm->CreateNegEdge(fromstate,tostate);
00085       return;
00086 
00087    case VP_ITEMTYPE_POUND:
00088       newfsm->CreateLabelEdge(fromstate,tostate,elementpoundlabelid);
00089       newfsm->CreateLabelEdge(fromstate,tostate,attribpoundlabelid);
00090       return;
00091 
00092    case VP_ITEMTYPE_STAR:
00093       newstate=newfsm->CreateState();
00094 
00095       newfsm->CreateEmptyEdge(fromstate,newstate);
00096 
00097       children.child1->CreateFSMEdges(newfsm,newstate,newstate);
00098 
00099       newfsm->CreateEmptyEdge(newstate,tostate);
00100       return;
00101 
00102    case VP_ITEMTYPE_PLUS:
00103       newstate=newfsm->CreateState();
00104 
00105       children.child1->CreateFSMEdges(newfsm,fromstate,newstate);
00106       children.child1->CreateFSMEdges(newfsm,newstate,newstate);
00107 
00108       newfsm->CreateEmptyEdge(newstate,tostate);
00109       return;
00110 
00111    case VP_ITEMTYPE_OPT:
00112       children.child1->CreateFSMEdges(newfsm,fromstate,tostate);
00113       newfsm->CreateEmptyEdge(fromstate,tostate);
00114       return;
00115 
00116    case VP_ITEMTYPE_NOT:
00117    {
00118       FSM *fsm1,*negfsm1;
00119       fsm1=children.child1->CreateNonDetFSM();
00120 
00121       negfsm1=fsm1->CreateNegateFSM();
00122       newfsm->AddFSM(fromstate,tostate,negfsm1);
00123       return;
00124    }
00125 
00126    case VP_ITEMTYPE_EMPTY:
00127       newfsm->CreateEmptyEdge(fromstate,tostate);
00128       return;
00129 
00130    case VP_ITEMTYPE_GROUP:
00131       children.child1->CreateFSMEdges(newfsm,fromstate,tostate);
00132       return;
00133 
00134    case VP_ITEMTYPE_MINUS:
00135    {
00136       FSM *fsm1,*fsm2,*negfsm1,*combinefsm;
00137       FSMState *startstate,*finalstate;
00138       
00139       fsm1=children.child1->CreateNonDetFSM();
00140       fsm2=children.child2->CreateNonDetFSM();
00141 
00142       negfsm1=fsm1->CreateNegateFSM();
00143 
00144       combinefsm=new(fsmtmpmem) FSM();
00145 
00146       startstate=combinefsm->CreateState();
00147       finalstate=combinefsm->CreateState(1);
00148 
00149       combinefsm->SetStartState(startstate);
00150 
00151       combinefsm->AddFSM(startstate,finalstate,negfsm1);
00152       combinefsm->AddFSM(startstate,finalstate,fsm2);
00153 
00154       combinefsm=combinefsm->CreateNegateFSM();
00155 
00156       newfsm->AddFSM(fromstate,tostate,combinefsm);
00157       return;
00158    }
00159 
00160    case VP_ITEMTYPE_SEQ:
00161    {
00162       FSMState *middlestate=newfsm->CreateState();
00163       
00164       children.child1->CreateFSMEdges(newfsm,fromstate,middlestate);
00165       children.child2->CreateFSMEdges(newfsm,middlestate,tostate);
00166       return;
00167    }
00168    case VP_ITEMTYPE_ALT:
00169       children.child1->CreateFSMEdges(newfsm,fromstate,tostate);
00170       children.child2->CreateFSMEdges(newfsm,fromstate,tostate);
00171       return;
00172 
00173    case VP_ITEMTYPE_AND:
00174    {
00175       FSM *fsm1,*fsm2,*negfsm1,*negfsm2,*combinefsm;
00176       FSMState *startstate,*finalstate;
00177       
00178       fsm1=children.child1->CreateNonDetFSM();
00179       fsm2=children.child2->CreateNonDetFSM();
00180 
00181       negfsm1=fsm1->CreateNegateFSM();
00182       negfsm2=fsm2->CreateNegateFSM();
00183 
00184       combinefsm=new(fsmtmpmem) FSM();
00185 
00186       startstate=combinefsm->CreateState();
00187       finalstate=combinefsm->CreateState(1);
00188 
00189       combinefsm->AddFSM(startstate,finalstate,negfsm1);
00190       combinefsm->AddFSM(startstate,finalstate,negfsm2);
00191 
00192       combinefsm=combinefsm->CreateNegateFSM();
00193 
00194       newfsm->AddFSM(fromstate,tostate,combinefsm);
00195       return;
00196    }
00197    }
00198 }
00199 
00200 //****************************************************************************
00201 //****************************************************************************
00202 //****************************************************************************
00203 
00204 // Saves the pointer to position where the error occurred
00205 char *vregexprerrptr;   
00206 char *vregexprerrstr;
00207 
00208 TLabelID VRegExpr::ParseLabel(char **str,char *endptr)
00209    // Parses a single label - either of the form
00210    // <label> (for elements) or @label@ (for attributes)
00211 {
00212    char  isattrib=0;
00213 
00214    if(**str=='<')
00215       isattrib=0;
00216    else
00217    {
00218       if(**str=='@')
00219          isattrib=1;
00220    }
00221    (*str)++;
00222 
00223    char *saveptr=*str;
00224 
00225    while(*str<endptr)
00226    {
00227       if(((isattrib==0)&&(**str=='>'))||
00228          ((isattrib==1)&&(**str=='@')))
00229       {
00230          (*str)++;
00231          return globallabeldict.GetLabelOrAttrib(saveptr,*str-saveptr-1,isattrib);
00232       }
00233       (*str)++;
00234    }
00235 
00236    Error("Could not find ending '>' in '");
00237    ErrorCont(saveptr,10);  // !!!
00238    Exit();
00239    return 0;
00240 }
00241 
00242 //*************************************************************************************
00243 //*************************************************************************************
00244 //*************************************************************************************
00245 //*************************************************************************************
00246 
00247 #define STACKITEM_REGEXPR     0
00248 #define STACKITEM_OPENPARENC  1
00249 #define STACKITEM_BINOPSTART  2
00250 #define STACKITEM_SEQ         2
00251 #define STACKITEM_MINUS       3
00252 #define STACKITEM_ALT         4
00253 #define STACKITEM_AND         5
00254 
00255 struct RegExprStackItem
00256    // This data structure is used to represent a single item on
00257    // the operator stack. An item is either a regular expression
00258    // or an operator (see STACKITEM_...)
00259 {
00260    RegExprStackItem  *prev;
00261    unsigned long     type;
00262    VRegExpr          *regexpr;
00263 
00264    void *operator new(size_t size)  {  return vregexprmem->GetByteBlock(size);  }
00265    void operator delete(void *ptr)  {}
00266 
00267    RegExprStackItem(VRegExpr *myregexpr,RegExprStackItem *myprev)
00268    {
00269       prev=myprev;
00270       type=STACKITEM_REGEXPR;
00271       regexpr=myregexpr;
00272    }
00273 
00274    RegExprStackItem(unsigned long mytype,RegExprStackItem *myprev)
00275    {
00276       prev=myprev;
00277       type=mytype;
00278       regexpr=NULL;
00279    }
00280 };
00281 
00282 inline char ConvertOpCode(unsigned long type)
00283 {
00284    switch(type)
00285    {
00286    case STACKITEM_SEQ:  return VP_ITEMTYPE_SEQ;
00287    case STACKITEM_MINUS:return VP_ITEMTYPE_MINUS;
00288    case STACKITEM_ALT:  return VP_ITEMTYPE_ALT;
00289    case STACKITEM_AND:  return VP_ITEMTYPE_AND;
00290    }
00291    return 0;
00292 }
00293 
00294 inline void ReduceAll(RegExprStackItem **stackptr)
00295 {
00296    VRegExpr *regexpr;
00297 
00298    while(((*stackptr)->prev!=NULL)&&
00299          ((*stackptr)->prev->type>=STACKITEM_BINOPSTART))
00300    {
00301       regexpr=new VRegExpr(ConvertOpCode((*stackptr)->prev->type),
00302                            (*stackptr)->prev->prev->regexpr,
00303                            (*stackptr)->regexpr);
00304       *stackptr=new RegExprStackItem(regexpr,(*stackptr)->prev->prev->prev);
00305    }
00306 }
00307 
00308 inline void HandleBinOp(RegExprStackItem  **stackptr,unsigned opcode)
00309 {
00310    VRegExpr *regexpr;
00311 
00312    if((*stackptr==NULL)||((*stackptr)->type!=STACKITEM_REGEXPR))
00313    {
00314       Error("Binary operation !!\n");
00315       Exit();
00316    }
00317 
00318    while(((*stackptr)->prev!=NULL)&&
00319          ((*stackptr)->prev->type>=opcode))
00320    {
00321       regexpr=new VRegExpr(ConvertOpCode((*stackptr)->prev->type),
00322                            (*stackptr)->prev->prev->regexpr,
00323                            (*stackptr)->regexpr);
00324       *stackptr=new RegExprStackItem(regexpr,(*stackptr)->prev->prev->prev);
00325    }
00326    *stackptr=new RegExprStackItem(opcode,*stackptr);
00327 }
00328 
00329 inline void HandleRegExpr(RegExprStackItem  **stackptr,VRegExpr *regexpr)
00330    // Adds a new expression to the stack
00331 {
00332    if((*stackptr!=NULL)&&
00333       ((*stackptr)->type==STACKITEM_REGEXPR))
00334       HandleBinOp(stackptr,STACKITEM_SEQ);
00335 
00336    *stackptr=new RegExprStackItem(regexpr,*stackptr);
00337 }
00338 
00339 inline void HandleUnaryOp(RegExprStackItem  **stackptr,char opcode,char *curptr)
00340 {
00341    if((*stackptr==NULL)||
00342       ((*stackptr)->type!=STACKITEM_REGEXPR))
00343    {
00344       Error("Unexpected unary operator");
00345       Exit();
00346    }
00347 
00348    VRegExpr *regexpr=new VRegExpr(opcode,(*stackptr)->regexpr);
00349    *stackptr=new RegExprStackItem(regexpr,(*stackptr)->prev);
00350 }
00351 
00352 inline void HandleOpenParenc(RegExprStackItem  **stackptr)
00353 {
00354    if((*stackptr!=NULL)&&
00355       ((*stackptr)->type==STACKITEM_REGEXPR))
00356       HandleBinOp(stackptr,STACKITEM_SEQ);
00357 
00358    *stackptr=new RegExprStackItem(STACKITEM_OPENPARENC,*stackptr);
00359 }
00360 
00361 inline void HandleCloseParenc(RegExprStackItem  **stackptr,char *curptr)
00362 {
00363    ReduceAll(stackptr);
00364 
00365    if(((*stackptr)->prev==NULL)||
00366       ((*stackptr)->prev->type!=STACKITEM_OPENPARENC))
00367    {
00368       Error("Unexpected closing bracket");
00369       Exit();
00370    }
00371    VRegExpr *regexpr=new VRegExpr(VP_ITEMTYPE_GROUP,(*stackptr)->regexpr,NULL);
00372 
00373    *stackptr=new RegExprStackItem(regexpr,(*stackptr)->prev->prev);
00374 }
00375 
00376 VRegExpr *VRegExpr::ParseVRegExpr(char * &str,char *endptr)
00377 {
00378    char              reachedend=false;
00379    RegExprStackItem  *stack=NULL;
00380 
00381    vregexprerrstr=NULL;
00382    vregexprerrptr=NULL;
00383 
00384    do
00385    {
00386       switch(*str)
00387       {
00388       case '|':   HandleBinOp(&stack,STACKITEM_ALT);
00389                   str++;
00390                   break;
00391       case '&':   HandleBinOp(&stack,STACKITEM_AND);
00392                   str++;
00393                   break;
00394       case '-':   HandleBinOp(&stack,STACKITEM_MINUS);
00395                   str++;
00396                   break;
00397       case '<':
00398       case '@':{
00399                   TLabelID labelid=ParseLabel(&str,endptr);
00400                   VRegExpr *regexpr=new VRegExpr(labelid);
00401                   HandleRegExpr(&stack,regexpr);
00402                   break;
00403                }
00404 
00405       case '*':   HandleUnaryOp(&stack,VP_ITEMTYPE_STAR,str);
00406                   str++;
00407                   break;
00408 
00409       case '+':   HandleUnaryOp(&stack,VP_ITEMTYPE_PLUS,str);
00410                   str++;
00411                   break;
00412 
00413       case '?':   HandleUnaryOp(&stack,VP_ITEMTYPE_OPT,str);
00414                   str++;
00415                   break;
00416 
00417       case '~':   HandleUnaryOp(&stack,VP_ITEMTYPE_NOT,str);
00418                   str++;
00419                   break;
00420 
00421       case '(':   if(str[1]==')')
00422                   {
00423                      VRegExpr *regexpr=new VRegExpr(VP_ITEMTYPE_EMPTY);
00424                      HandleRegExpr(&stack,regexpr);
00425                      str+=2;
00426                   }
00427                   else
00428                   {
00429                      HandleOpenParenc(&stack);
00430                      str++;
00431                   }
00432                   break;
00433 
00434       case ')':   HandleCloseParenc(&stack,str);
00435                   str++;
00436                   break;
00437 
00438       case '_':{  VRegExpr *regexpr=new VRegExpr(VP_ITEMTYPE_DONTCARE);
00439                   HandleRegExpr(&stack,regexpr);
00440                   str++;
00441                   break;
00442                }
00443 
00444       case '#':{  VRegExpr *regexpr=new VRegExpr(VP_ITEMTYPE_POUND);
00445                   HandleRegExpr(&stack,regexpr);
00446                   str++;
00447                   break;
00448                }
00449       default:    reachedend=1;
00450                   break;
00451       }
00452       if(reachedend==1)
00453          break;
00454    }
00455    while(str<endptr);
00456 
00457    if(stack==NULL)
00458    {
00459       Error("Invalid path expression");
00460       Exit();
00461    }
00462 
00463    ReduceAll(&stack);
00464 
00465    if(stack->prev!=NULL)
00466    {
00467       // stackptr->prev is a open parenthesis !
00468 
00469       Error("Closing parenthesis expected");
00470       Exit();
00471 
00472       vregexprerrstr="Closing parenthesis expected";
00473       vregexprerrptr=str;
00474       return NULL;
00475    }
00476    return stack->regexpr;
00477 }
00478 
00479 //******************************************************************************
00480 //******************************************************************************
00481 
00482 FSM *VRegExpr::CreateFSM()
00483 {
00484    FSM      *fsm;
00485 
00486    fsmtmpmem=&tmpmem;
00487    fsmmem=&tmpmem;
00488 
00489    fsm=CreateNonDetFSM();
00490 
00491 //   fsm->Print();
00492 
00493    // Let's make the FSM deterministic
00494    fsm=fsm->MakeDeterministic();
00495 
00496 //   fsm->Print();
00497 
00498    // Now, we store the FSM in the main memory
00499    fsmmem=&mainmem;
00500    mainmem.WordAlign();
00501 
00502    // Let's minimize
00503    fsm=fsm->Minimize();
00504 
00505 //   fsm->Print();
00506 
00507    return fsm;
00508 }
00509 
00510 //******************************************************************************
00511 //******************************************************************************
00512 //******************************************************************************
00513 //******************************************************************************

Generated on Sat Oct 13 16:08:41 2001 for XMILL by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001