Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

FSM.cpp

Go to the documentation of this file.
00001 /*
00002 This product contains certain software code or other information
00003 ("AT&T Software") proprietary to AT&T Corp. ("AT&T").  The AT&T
00004 Software is provided to you "AS IS".  YOU ASSUME TOTAL RESPONSIBILITY
00005 AND RISK FOR USE OF THE AT&T SOFTWARE.  AT&T DOES NOT MAKE, AND
00006 EXPRESSLY DISCLAIMS, ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND
00007 WHATSOEVER, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
00008 MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, WARRANTIES OF
00009 TITLE OR NON-INFRINGEMENT OF ANY INTELLECTUAL PROPERTY RIGHTS, ANY
00010 WARRANTIES ARISING BY USAGE OF TRADE, COURSE OF DEALING OR COURSE OF
00011 PERFORMANCE, OR ANY WARRANTY THAT THE AT&T SOFTWARE IS "ERROR FREE" OR
00012 WILL MEET YOUR REQUIREMENTS.
00013 
00014 Unless you accept a license to use the AT&T Software, you shall not
00015 reverse compile, disassemble or otherwise reverse engineer this
00016 product to ascertain the source code for any AT&T Software.
00017 
00018 (c) AT&T Corp. All rights reserved.  AT&T is a registered trademark of AT&T Corp.
00019 
00020 ***********************************************************************
00021 
00022 History:
00023 
00024       24/11/99  - initial release by Hartmut Liefke, liefke@seas.upenn.edu
00025                                      Dan Suciu,      suciu@research.att.com
00026 */
00027 
00028 //**************************************************************************
00029 //**************************************************************************
00030 
00031 // This module contains the implementation of finite state machines
00032 
00033 #include <stdio.h>
00034 
00035 #include "LabelDict.hpp"
00036 #include "FSM.hpp"
00037 #include "Load.hpp"
00038 
00039 //#include "Load.hpp"
00040 
00041 // We reserve two labels for '#' and '@#'
00042 TLabelID elementpoundlabelid=LABEL_UNDEFINED;
00043 TLabelID attribpoundlabelid=LABEL_UNDEFINED;
00044 
00045 void FSMInit()
00046    // This assigns the first two label IDs to '#' and '@#'
00047 {
00048    elementpoundlabelid=globallabeldict.CreateLabelOrAttrib("#",1,0);
00049    attribpoundlabelid=globallabeldict.CreateLabelOrAttrib("#",1,1);
00050 }
00051 
00052 MemStreamer *fsmtmpmem,*fsmmem;
00053 
00054 inline void *FSMLabel::operator new(size_t size, MemStreamer *mem)
00055 {
00056    return mem->GetByteBlock(size);
00057 }
00058 
00059 inline void *FSMEdge::operator new(size_t size, MemStreamer *mem)
00060 {
00061    return mem->GetByteBlock(size);
00062 }
00063 
00064 //******************************************************************************************
00065 
00066 inline char IsInLabelList(FSMLabel *labellist,TLabelID labelid)
00067    // Checks whether 'labelid' is in 'labellist'
00068 {
00069    FSMLabel *curitem=labellist;
00070    while(curitem!=NULL)
00071    {
00072       if(curitem->labelid==labelid)
00073          return 1;
00074       curitem=curitem->next;
00075    }
00076    return 0;
00077 }
00078 
00079 inline void RemoveFromLabelList(FSMLabel **listref,int labelid)
00080    // Removes a certain label from a labellist *listref
00081 {
00082    while(*listref!=NULL)
00083    {
00084       if((*listref)->labelid==labelid)
00085       {
00086          *listref=(*listref)->next;
00087          return;
00088       }
00089       listref=&((*listref)->next);
00090    }
00091 }
00092 
00093 inline void DupLabelList(FSMLabel *labellist,FSMLabel **newlabellist)
00094    // Stores the list of labels in 'labellist' to the list in 'newlabellist
00095 {
00096    while(labellist!=NULL)
00097    {
00098       *newlabellist=new(fsmmem) FSMLabel(labellist->labelid);
00099       newlabellist=&((*newlabellist)->next);
00100       labellist=labellist->next;
00101    }
00102 }
00103 
00104 //******************************************************************************************
00105 
00106 inline FSMEdge::FSMEdge(FSMState *tostate)
00107 {
00108    type=EDGETYPE_EMPTY;
00109    nextstate=tostate;
00110 }
00111 
00112 inline FSMEdge::FSMEdge(FSMState *tostate,TLabelID mylabelid)
00113 {
00114    type=EDGETYPE_LABEL;
00115    nextstate=tostate;
00116    labelid=mylabelid;
00117 }
00118 
00119 inline FSMEdge::FSMEdge(FSMState *tostate,FSMLabel *mylabels)
00120 {
00121    type=EDGETYPE_NEGLABELLIST;
00122    nextstate=tostate;
00123    labellist=mylabels;
00124 }
00125 
00126 inline char FSMEdge::DoesMatchLabel(TLabelID mylabelid)
00127    // Checks whether some edge matches a given label
00128 {
00129    if(labelid==LABEL_UNDEFINED)  // This means it matches everything
00130       return type==EDGETYPE_NEGLABELLIST;
00131 
00132    switch(type)
00133    {
00134    case EDGETYPE_LABEL:       return labelid==mylabelid;
00135    case EDGETYPE_NEGLABELLIST:return !IsInLabelList(labellist,mylabelid);
00136    case EDGETYPE_EMPTY:       return 0;
00137    }
00138    return 0;
00139 }
00140 
00141 //******************************************************************************************
00142 
00143 inline FSMState::FSMState(unsigned mystateid,char myisfinal)
00144 {
00145    stateid=mystateid;
00146    isfinal=myisfinal;
00147    outedges=NULL;
00148    next=NULL;
00149    origstateset=NULL;
00150 }
00151 
00152 inline FSMEdge *FSMState::FindNegEdge()
00153    // Find the negative outgoing edge in this state
00154    // (if there is one)
00155 {
00156    FSMEdge *edge=outedges;
00157 
00158    while(edge!=NULL)
00159    {
00160       if(edge->GetType()==EDGETYPE_NEGLABELLIST)
00161          return edge;
00162       edge=edge->next;
00163    }
00164    return NULL;
00165 }
00166 
00167 FSMState *FSMState::GetNextState(TLabelID labelid,char *overpoundedge)
00168    // Returns the next state after label 'labelid' is consumed.
00169    // isattrib must describe whether the label is an attribute or not.
00170    // 'overpoundedge' will indicated afterwards whether the FSM moved over a 'pound'-edge
00171    // This only works for deterministic automata
00172 {
00173    FSMEdge  *edge=outedges;
00174    FSMEdge  *defaultedge=NULL;
00175    char     isattrib=ISATTRIB(labelid);
00176 
00177    if(overpoundedge!=NULL)
00178       *overpoundedge=0;
00179 
00180    while(edge!=NULL)
00181    {
00182       switch(edge->GetType())
00183       {
00184       case EDGETYPE_LABEL:
00185          // If a label edge matches the label exactly, then we can return immediately
00186          if(edge->labelid==labelid)
00187             return edge->GetNextState();
00188 
00189          // If we have a pound-edge, we save its pointer
00190          // (if we don't have a default edge already)
00191          if((((edge->labelid==elementpoundlabelid)&&(isattrib==0))||
00192             ((edge->labelid==attribpoundlabelid)&&(isattrib==1)))&&
00193             (defaultedge==NULL))
00194          {
00195             defaultedge=edge;
00196             if(overpoundedge!=NULL)
00197                *overpoundedge=1;
00198          }
00199          break;
00200 
00201       case EDGETYPE_NEGLABELLIST:
00202       {
00203          // A negative edge automatically becomes the default edge,
00204          // (And we overwrite any previous pound-edge!)
00205          // if 'labelid' is not contained in the negative list
00206          
00207          if(!IsInLabelList(edge->labellist,labelid))
00208             // 'labelid' is not in the negative list?
00209          {
00210             defaultedge=edge;
00211             if(overpoundedge!=NULL)
00212                *overpoundedge=0;
00213          }
00214          break;
00215       }
00216 //      case EDGETYPE_EMPTY:
00217 //         return NULL;   // This should never happen !
00218       }
00219       edge=edge->next;
00220    }
00221    // If we didn't find any exactly matching label edge, then we choose
00222    // the default edge. If there is no default edge, then there is no match
00223    if(defaultedge!=NULL)
00224       return defaultedge->GetNextState();
00225    else
00226       return NULL;
00227 }
00228 
00229 //**************************************************************************
00230 //**************************************************************************
00231 
00232 FSMEdge *FSM::CreateLabelEdge(FSMState *fromstate,FSMState *tostate,TLabelID labelid)
00233    // Creates an EDGETYPE_LABEL edge
00234 {
00235    FSMEdge *edge=new(fsmmem) FSMEdge(tostate,labelid);
00236 
00237    edge->next=fromstate->outedges;
00238    fromstate->outedges=edge;
00239    return edge;
00240 }
00241 
00242 FSMEdge *FSM::CreateNegEdge(FSMState *fromstate,FSMState *tostate,FSMLabel *labellist)
00243    // Creates an EDGETYPE_NEGLABELLIST edge
00244 {
00245    FSMEdge *edge=new(fsmmem) FSMEdge(tostate,labellist);
00246 
00247    edge->next=fromstate->outedges;
00248    fromstate->outedges=edge;
00249    return edge;
00250 }
00251 
00252 FSMEdge *FSM::CreateEmptyEdge(FSMState *fromstate,FSMState *tostate)
00253    // Creates an EDGETYPE_EMPTY edge
00254 {
00255    FSMEdge *edge=new(fsmmem) FSMEdge(tostate);
00256 
00257    edge->next=fromstate->outedges;
00258    fromstate->outedges=edge;
00259    return edge;
00260 }
00261 
00262 FSMState *FSM::CreateState(char isfinal)
00263 {
00264    FSMState *newstate=new(fsmmem) FSMState(curidx,isfinal);
00265 
00266    curidx++;
00267 
00268    *laststatelistref=newstate;
00269    laststatelistref=&((*laststatelistref)->next);
00270    return newstate;
00271 }
00272 
00273 //****************************************************************************
00274 //****************************************************************************
00275 //****************************************************************************
00276 //****************************************************************************
00277 
00278 //******** These are functions for making an FSM deterministic ********
00279 
00280 struct FSMStateSetItem
00281    // This structure is used for representing a set of (non-deterministic states)
00282 {
00283    FSMStateSetItem   *next;   // The next non-deterministic state
00284    FSMState          *state;  // The actual state
00285 
00286    FSMStateSetItem(FSMState *mystate,FSMStateSetItem *mynext)
00287    {
00288       state=mystate;
00289       next=mynext;
00290    }
00291 
00292    void *operator new(size_t size)     {  return fsmtmpmem->GetByteBlock(size); }
00293    void operator delete(void *ptr)     {}
00294    void *operator new[] (size_t size)  {  return fsmtmpmem->GetByteBlock(size); }
00295    void operator delete[] (void *ptr)  {}
00296 };
00297 
00298 inline FSMState::FSMState(unsigned mystateid,FSMStateSetItem *origset)
00299    // Creates a new (deterministic) state based on the set of (nondeterministic)
00300    // states in 'origset'. We also compute whether the new state is final or not
00301 {
00302    outedges    =NULL;
00303    origstateset=origset;
00304    stateid     =mystateid;
00305    next        =NULL;
00306    isfinal     =0;
00307 
00308    while(origset!=NULL)
00309    {
00310       if(origset->state->IsFinal())
00311       {
00312          isfinal=1;
00313          break;
00314       }
00315       origset=origset->next;
00316    }
00317 }
00318 
00319 inline FSMState *FSM::CreateState(FSMStateSetItem *list)
00320 // Creates a determinsitic state with a set of corresponding nondet. states
00321 {
00322   FSMState *newstate=new(fsmmem) FSMState(curidx,list);
00323 
00324   curidx++;
00325   *laststatelistref=newstate;
00326   laststatelistref=&((*laststatelistref)->next);
00327 
00328   return newstate;
00329 }
00330 
00331 inline char IsInStateSet(FSMStateSetItem *set,FSMState *state)
00332    // Checks whether a given state is already in the state set
00333 {
00334    FSMStateSetItem   *curitem=set;
00335 
00336    // Check if the state is not already in the set
00337    while(curitem!=NULL)
00338    {
00339       if(curitem->state==state)
00340          return 1;
00341       curitem=curitem->next;
00342    }
00343    return 0;
00344 }
00345 
00346 inline FSMStateSetItem *AddToStateSet(FSMStateSetItem *set,FSMState *state)
00347    // Adds a new state to the current set of states, if the state is
00348    // not already in the set.
00349    // If the state is inserted, then we also insert all states that
00350    // are reachable over empty edges
00351 
00352    // This function is static
00353 {
00354    // Check if the state is already in the set
00355    // In this case, we don't have to do anything
00356    if(IsInStateSet(set,state))
00357       return set;
00358 
00359    set=new FSMStateSetItem(state,set);
00360 
00361    // We also add the states that are reachable by empty paths
00362 
00363    FSMEdge *outedges=state->GetOutEdges();
00364    while(outedges!=NULL)
00365    {
00366       if(outedges->GetType()==EDGETYPE_EMPTY)
00367          set=AddToStateSet(set,outedges->GetNextState());
00368       outedges=outedges->next;
00369    }
00370    return set;
00371 }
00372 
00373 inline void CreateStateSet(FSMStateSetItem *fromstateset,int labelid,FSMStateSetItem **newset)
00374    // Starting from a set of states, this function produces a new set of states
00375    // that are reachable from the current set of states via label 'labelid'
00376    // If 'labelid' is -1, it means that we are looking for everything except the
00377    // given labels
00378 {
00379    FSMEdge           *curedge;
00380 
00381    *newset=NULL;
00382 
00383    while(fromstateset!=NULL)
00384    {
00385       curedge=fromstateset->state->GetOutEdges();
00386 
00387       while(curedge!=NULL)
00388       {
00389          if(curedge->DoesMatchLabel(labelid))
00390             *newset=AddToStateSet(*newset,curedge->GetNextState());
00391 
00392          curedge=curedge->next;
00393       }
00394       fromstateset=fromstateset->next;
00395    }
00396 }
00397 
00398 inline char CompareStateSets(FSMStateSetItem *set1,FSMStateSetItem *set2)
00399    // Compares two sets of states
00400    // If they are equal, the result is 1, otherwise 0
00401 {
00402    int               count1=0;
00403    FSMStateSetItem   *curitem=set2;
00404 
00405    // For every element of 'set1', we check whether it is in 'set2'
00406    // If not, we return 0
00407    // We also count the number of elements in 'count1'
00408 
00409   while(set1!=NULL)
00410   {
00411       if(!IsInStateSet(set2,set1->state)) // Not in set?
00412          return 0;
00413 
00414       count1++;
00415       set1=set1->next;
00416    }
00417 
00418    // Now we check whether the two sets have the same number of elements
00419    curitem=set2;
00420    while(curitem!=NULL)
00421    {
00422       count1--;
00423       if(count1<0)
00424          return 0;
00425       curitem=curitem->next;
00426    }
00427    return 1;
00428 }
00429 
00430 inline void FindAllOutgoingLabels(FSMState *state,FSMLabel **dest)
00431    // This function enumerates all outgoing labels from state 'state'
00432    // - this includes all labels mentioned in EDGETYPE_LABEL
00433    // and EDGETYPE_NEGLABELLIST edges.
00434    // The accepted labels are added to label list 'dest'
00435 {
00436    FSMLabel *curlabel;
00437    FSMEdge  *outedge;
00438 
00439    outedge=state->GetOutEdges();
00440    while(outedge!=NULL)
00441    {
00442       switch(outedge->GetType())
00443       {
00444       case EDGETYPE_LABEL:
00445          if(!IsInLabelList(*dest,outedge->GetLabelID()))
00446             // We only insert if the label is not already in the list
00447             *dest=new(fsmtmpmem) FSMLabel(outedge->GetLabelID(),*dest);
00448 
00449          break;
00450 
00451       case EDGETYPE_NEGLABELLIST:
00452          curlabel=outedge->GetLabelList();
00453          while(curlabel!=NULL)
00454          {
00455             if(!IsInLabelList(*dest,curlabel->labelid))
00456                // We only insert if the label is not already in the list
00457                *dest=new(fsmtmpmem) FSMLabel(curlabel->labelid,*dest);
00458 
00459             curlabel=curlabel->next;
00460          }
00461       }
00462       outedge=outedge->next;
00463    }
00464 }
00465 
00466 inline void FindAllOutgoingLabels(FSMStateSetItem *set,FSMLabel **dest)
00467    // This function enumerates all outgoing labels from a given set
00468    // of states.
00469 {
00470    // We iterate over all states and all outgoing edges
00471    while(set!=NULL)
00472    {
00473       // We enumerate for a specific state
00474       FindAllOutgoingLabels(set->state,dest);
00475 
00476       set=set->next;
00477    }
00478 }
00479 
00480 inline char ConsiderNegEdgeAtState(FSM *newfsm,FSMState *curstate,FSMLabel *mylabellist,FSMState *curstatelist,FSMState **newstate)
00481    // Creates an negedge that leads from 'curstate' to some potential new state
00482    // The function stores the new state in *newstate
00483    // Returns 0, if an existing state can be used
00484    //         1, if a new next state is created
00485 {
00486    FSMStateSetItem   *nextstateset;
00487 
00488    // First, we find all nextstates that are reachable from origstateset over 'labelid'
00489    CreateStateSet(curstate->GetOrigStateSet(),LABEL_UNDEFINED,&nextstateset);
00490 
00491    // If the new state set is empty, then we do not have to do anything
00492    if(nextstateset==NULL)
00493       return 0;
00494 
00495    // Now, we check whether we already have a state with the same statelist
00496    while(curstatelist!=NULL)
00497    {
00498       if(CompareStateSets(nextstateset,curstatelist->GetOrigStateSet()))
00499          // If checkstate has the same stateset, then we use that existing one
00500       {
00501          newfsm->CreateNegEdge(curstate,curstatelist,mylabellist);
00502 
00503          *newstate=curstatelist;
00504          return 0;
00505       }
00506       curstatelist=curstatelist->GetNextStateInList();
00507    }
00508 
00509    // We didn't find any state with the same state set
00510    // ==> then we must create one
00511 
00512    *newstate=newfsm->CreateState(nextstateset);
00513    newfsm->CreateNegEdge(curstate,*newstate,mylabellist);
00514 
00515    return 1;
00516 }
00517 
00518 inline char ConsiderLabelIDAtState(FSM *newfsm,FSMState *curstate,int labelid,FSMState *curstatelist,FSMState **newstate)
00519    // Creates an edge that leads from 'curstate' to some potential new state
00520    // The function stores the new state in *newstate
00521    // Returns 0, if an existing state can be used
00522    //         1, if a new next state is created
00523 {
00524    FSMStateSetItem   *nextstateset;
00525    FSMEdge           *edge;
00526 
00527    // First, we find all nextstates that are reachable from origstateset over 'labelid'
00528    CreateStateSet(curstate->GetOrigStateSet(),labelid,&nextstateset);
00529 
00530    // If the new state set is empty, then we do not have to do anything
00531    if(nextstateset==NULL)
00532       return 0;
00533 
00534    // Now, we check whether we already have a state with the same statelist
00535    while(curstatelist!=NULL)
00536    {
00537       if(CompareStateSets(nextstateset,curstatelist->GetOrigStateSet()))
00538          // If checkstate has the same stateset, then we use that existing one
00539       {
00540          *newstate=curstatelist;
00541 
00542          // We check if we already have a negedge between the two
00543          // existing states
00544          edge=curstate->FindNegEdge();
00545          if((edge!=NULL)&&(edge->GetNextState()==curstatelist))
00546             // If yes, then we only need to remove the label from the negative list
00547             RemoveFromLabelList(edge->GetLabelListRef(),labelid);
00548          else
00549             newfsm->CreateLabelEdge(curstate,curstatelist,labelid);
00550 
00551          return 0;
00552       }
00553       curstatelist=curstatelist->GetNextStateInList();
00554    }
00555 
00556    // We didn't find any state with the same state set
00557    // ==> then we must create one
00558 
00559    *newstate=newfsm->CreateState(nextstateset);
00560 
00561    newfsm->CreateLabelEdge(curstate,*newstate,labelid);
00562    return 1;
00563 }
00564 
00565 FSM *FSM::MakeDeterministic()
00566    // This is the main function for making an FSM deterministic
00567 {
00568    FSMStateSetItem   *todolist=NULL,**todoref=&todolist;
00569       // The 'todolist' contains the list of deterministic states that still
00570       // have to be considered
00571    FSMLabel          *tmplabellist,*curlabel;
00572    FSMState          *curstate,*newnextstate;
00573    FSM               *newfsm=new(fsmmem) FSM();
00574 
00575    FSMStateSetItem   *startstateset=NULL;
00576 
00577    // The start state set only contains the start state
00578    startstateset=AddToStateSet(startstateset,startstate);
00579 
00580    // We create a corresponding deterministic start state
00581    curstate=newfsm->CreateState(startstateset);
00582 
00583    newfsm->SetStartState(curstate);
00584 
00585    // We now always consider 'curstate' - the previously created deterministic state
00586    do
00587    {
00588       // First, we find all distinct outgoing labels for 'curstate'
00589       tmplabellist=NULL;
00590       FindAllOutgoingLabels(curstate->GetOrigStateSet(),&tmplabellist);
00591 
00592       // We consider the negedge for the current state
00593 
00594       if(ConsiderNegEdgeAtState(newfsm,curstate,tmplabellist,newfsm->GetStateList(),&newnextstate)==1)
00595          // A new state has been created?
00596          // ==> We add the state to the 'todo'-list
00597          todolist=new FSMStateSetItem(newnextstate,todolist);
00598 
00599       // Now, we also consider the label edges of the current state
00600 
00601       curlabel=tmplabellist;
00602       while(curlabel!=NULL)
00603       {
00604          if(ConsiderLabelIDAtState(newfsm,curstate,curlabel->labelid,newfsm->GetStateList(),&newnextstate)==1)
00605             // A new state has been created?
00606             // ==> We add the state to the 'todo'-list
00607             todolist=new FSMStateSetItem(newnextstate,todolist);
00608 
00609          curlabel=curlabel->next;
00610       }
00611 
00612       if(todolist==NULL)
00613          break;
00614 
00615       // We go to next element of todo-list
00616       curstate=todolist->state;
00617       todolist=todolist->next;
00618    }
00619    while(1);
00620 
00621    return newfsm;
00622 }
00623 
00624 //*****************************************************************************
00625 //*****************************************************************************
00626 
00627 // The following functions are for the minimization of deterministic FSMs
00628 
00629 struct StateEqualPairListItem
00630    // This structure is used to represent a list of state-pairs
00631 {
00632    StateEqualPair          *statepair; // The state-pair
00633    StateEqualPairListItem  *next;      // The next state-pair
00634 
00635    StateEqualPairListItem(StateEqualPair *mystatepair,StateEqualPairListItem *mynext)
00636    {
00637       statepair=mystatepair;
00638       next=mynext;
00639    }
00640 
00641    void *operator new(size_t size)  {  return fsmtmpmem->GetByteBlock(size); }
00642    void operator delete(void *ptr)  {}
00643 };
00644 
00645 struct StateEqualPair
00646    // This structure represents a pair of two states. We keep information about
00647    // whether the two states are not equal and we also keep a list of
00648    // state pairs, which become 'not-equal', if this state pair becomes 'not-equal'.
00649 {
00650    unsigned long           isnotequal:1;     // Tells us if those two states are definitely not equal
00651    FSMState                *state1,*state2;  // The two states in this pair
00652    StateEqualPairListItem  *dependlist;      // If the two states are not equal, then all the
00653                                              // state-pairs in the 'dependlist' become not equal
00654 
00655    void *operator new[] (size_t size)  {  return fsmtmpmem->GetByteBlock(size); }
00656    void operator delete[] (void *ptr)  {}
00657 };
00658 
00659 inline char CheckEqual(FSMState *nextstate1,FSMState *nextstate2,StateEqualPair **array)
00660    // Checks whether the two states 'nextstate1' and 'nextstate2' could be equal
00661    // We also return '1', if both states are NULL, i.e. the label 'curlabel'
00662    // was not accepted by any of the two states
00663 {
00664    if(nextstate1!=NULL)
00665    {
00666       if(nextstate2==NULL) // 'curlabel' is accepted in one state, but not
00667                            // in the other ? ==> states are not equal !
00668          return 0;
00669 
00670       unsigned sid1=nextstate1->GetStateID();
00671       unsigned sid2=nextstate2->GetStateID();
00672       if(sid1==sid2)
00673          return 1;   // Are the two states identical?
00674 
00675       if(sid2<sid1)
00676          return !(array[sid1][sid2].isnotequal);
00677       else
00678          return !(array[sid2][sid1].isnotequal);
00679    }
00680    else
00681       return nextstate2==NULL;
00682 }
00683 
00684 inline void AddDependency(FSMState *nextstate1,FSMState *nextstate2,StateEqualPair *curpair,StateEqualPair **array)
00685    // Adds the state pair 'curpair' into the dependency list for pair 'nextstate1' and 'nextstate2'
00686 {
00687    StateEqualPair *nextpair;
00688 
00689    if((nextstate1==NULL)||(nextstate2==NULL))
00690       return;
00691 
00692    unsigned sid1=nextstate1->GetStateID();
00693    unsigned sid2=nextstate2->GetStateID();
00694    if(sid1==sid2)
00695       return;
00696 
00697    nextpair= (sid2<sid1) ? (array[sid1]+sid2) : (array[sid2]+sid1);
00698 
00699    if((nextpair->dependlist==NULL)||
00700       (nextpair->dependlist->statepair!=curpair))
00701       // We only insert if 'curpair' is not already at the beginning of the list
00702       nextpair->dependlist=new StateEqualPairListItem(curpair,nextpair->dependlist);
00703 }
00704 
00705 void MarkDependentNotEqual(StateEqualPairListItem *dependlist)
00706    // This recursive functions marks the dependents in 'dependlist' and
00707    // their dependents as 'not equal'
00708 {
00709    while(dependlist!=NULL)
00710    {
00711       if(dependlist->statepair->isnotequal==0)
00712       {
00713          dependlist->statepair->isnotequal=1;
00714          if(dependlist->statepair->dependlist!=NULL)
00715             MarkDependentNotEqual(dependlist->statepair->dependlist);
00716       }
00717       dependlist=dependlist->next;
00718    }
00719 }
00720 
00721 //**********************************
00722 
00723 FSM *FSM::Minimize() // This is the main function for minimizing a deterministic FSM
00724 {
00725    int            statenum,i,j;
00726    FSMState       *curstate1,*curstate2;
00727    StateEqualPair *pairptr;
00728    FSMLabel       *tmplabellist,*curlabel;
00729 
00730    // Let's get rid of redundant states first
00731    PruneRedundantStates();
00732 
00733    // We create a NxN array where N is the number of states in the FSM
00734    // Each array field represents a state-pair and we keep track of when
00735    // two states could be equivalent.
00736 
00737    statenum=GetStateCount();
00738 
00739    StateEqualPair **array=(StateEqualPair **)fsmtmpmem->GetByteBlock(statenum*sizeof(StateEqualPair *));
00740 
00741    curstate1=statelist;
00742 
00743    // We initialize the array of states
00744    // Two states can definitely not be equal, if one of them is a final state and the other not
00745    // Note that we only need to consider array fields [i,j] with j<i
00746    for(i=0;i<statenum;i++)
00747    {
00748       array[i]=new StateEqualPair[i];
00749 
00750       pairptr=array[i];
00751 
00752       curstate2=statelist;
00753 
00754       for(j=0;j<i;j++)
00755       {
00756          pairptr->state1      =curstate1;
00757          pairptr->state2      =curstate2;
00758          pairptr->dependlist  =NULL;
00759          pairptr->isnotequal  =(curstate1->IsFinal() != curstate2->IsFinal());
00760 
00761          pairptr++;
00762          curstate2=curstate2->next;
00763       }
00764       curstate1=curstate1->next;
00765    }
00766 
00767    curstate1=statelist;
00768 
00769    // Now we look at each state-pair separately
00770    // We only consider array fields [i,j] with j<i
00771    for(i=0;i<statenum;i++)
00772    {
00773       pairptr=array[i];
00774 
00775       curstate2=statelist;
00776 
00777       for(j=0;j<i;j++)
00778       {
00779          // We first find the labels on all outgoing edges
00780 
00781          tmplabellist=NULL;
00782          FindAllOutgoingLabels(curstate1,&tmplabellist);
00783          FindAllOutgoingLabels(curstate2,&tmplabellist);
00784 
00785          // Now, for each label in 'tmplabellist', we check whether
00786          // the states that would be reached for that label are equal.
00787          // If for some label they are not equal, then we need to add a dependency
00788          curlabel=tmplabellist;
00789          while(curlabel!=NULL)
00790          {
00791             if(CheckEqual( curstate1->GetNextState(curlabel->labelid),
00792                            curstate2->GetNextState(curlabel->labelid),array)==0)
00793                break;
00794             curlabel=curlabel->next;
00795          }
00796 
00797 
00798          if(curlabel==NULL)   // Only if all labels passed well, we try to
00799                               // look at the "EVERYTHING_ELSE" case
00800          {
00801             if(CheckEqual( curstate1->GetNextState(LABEL_UNDEFINED),
00802                            curstate2->GetNextState(LABEL_UNDEFINED),array)==0)
00803                curlabel=(FSMLabel *)1; // We set curlabel to a value!=NULL
00804          }
00805 
00806          if(curlabel!=NULL)   // Did one of the labels lead to non-equal states?
00807                               // Then the current state pair is also marked "non-equal"
00808          {
00809             // We mark the pair (and all dependent pairs) as not-equal 
00810             pairptr->isnotequal=1;
00811 
00812             if(pairptr->dependlist!=NULL)
00813                MarkDependentNotEqual(pairptr->dependlist);
00814          }
00815          else  // We don't know whether the states are equivalent
00816                // ==> We build dependency chains to each of the successor states
00817                // If the successor states become non-equal, then we change this statepair
00818          {
00819             curlabel=tmplabellist;
00820             while(curlabel!=NULL)
00821             {
00822                AddDependency( curstate1->GetNextState(curlabel->labelid),
00823                               curstate2->GetNextState(curlabel->labelid),
00824                               pairptr,
00825                               array);
00826 
00827                curlabel=curlabel->next;
00828             }
00829             AddDependency( curstate1->GetNextState(LABEL_UNDEFINED),
00830                            curstate2->GetNextState(LABEL_UNDEFINED),
00831                            pairptr,
00832                            array);
00833          }
00834          pairptr++;
00835          curstate2=curstate2->next;
00836       }
00837       curstate1=curstate1->next;
00838    }
00839 
00840    // In the final phase, we create a new automaton and copy the states
00841 
00842    FSM *newfsm=new(fsmmem) FSM();
00843    FSMEdge  *edge,*negedge;
00844 
00845    // First, we create all states
00846 
00847    curstate1=statelist;
00848 
00849    for(i=0;i<statenum;i++)
00850    {
00851       pairptr=array[i];
00852       curstate2=statelist;
00853 
00854       for(j=0;j<i;j++)
00855       {
00856          if(pairptr->isnotequal==0)
00857             // We found a previous state that is equal? ==> We store a pointer
00858             // to the corresponding state and we set array[i]=NULL
00859          {
00860             curstate1->data=curstate2->data;
00861             array[i]=NULL;
00862             break;
00863          }
00864          curstate2=curstate2->next;
00865          pairptr++;
00866       }
00867 
00868       if(i==j) // We didn't find any previous state that is equal
00869                // ==> We create a new state
00870          curstate1->data=newfsm->CreateState(curstate1->IsFinal());
00871 
00872       curstate1=curstate1->next;
00873    }
00874 
00875    // We also need to take care of the start-state
00876    newfsm->SetStartState((FSMState *)startstate->data);
00877 
00878    // Now, we need to take care of the edges
00879    curstate1=statelist;
00880    for(i=0;i<statenum;i++)
00881    {
00882       if(array[i]!=NULL)
00883       {
00884          // Let's take care of the negedge first
00885 
00886          edge=curstate1->FindNegEdge();
00887          if(edge!=NULL)
00888          {
00889             tmplabellist=NULL;
00890             DupLabelList(edge->GetLabelList(),&tmplabellist);
00891 
00892             negedge=newfsm->CreateNegEdge((FSMState *)curstate1->data,
00893                                           (FSMState *)edge->GetNextState()->data,
00894                                           tmplabellist);
00895          }
00896          else
00897             negedge=NULL;
00898 
00899          // Let's consider all outgoing edges
00900          edge=curstate1->GetOutEdges();
00901          while(edge!=NULL)
00902          {
00903             if(edge->GetType()==EDGETYPE_LABEL)
00904             {
00905                if((negedge!=NULL)&&
00906                   (negedge->GetNextState()==(FSMState *)edge->nextstate->data))
00907                   // Is the target state the same as the one of the existing negedge ?
00908                   // ==> We simply remove the element from the negedge labellist
00909                   RemoveFromLabelList(&(negedge->labellist),edge->labelid);
00910                else
00911                   newfsm->CreateLabelEdge((FSMState *)curstate1->data,
00912                                           (FSMState *)edge->GetNextState()->data,
00913                                           edge->labelid);
00914             }
00915             edge=edge->next;
00916          }
00917       }
00918       curstate1=curstate1->next;
00919    }
00920    return newfsm;
00921 }
00922 
00923 //**********************************************************************
00924 //**********************************************************************
00925 
00926 char FSMState::PruneRedundantStatesRecurs()
00927    // Eliminates states that are either not reachable
00928    // from the start state or states that never lead
00929    // to final states
00930    // The function returns a value !=0, if the state leads to
00931    // a final state
00932 {
00933    char reachesfinalstate=0;
00934 
00935    // We use 'tmpval' to represent the status information.
00936    // If bit 2 is set, then it means that the
00937    // state reaches a final state or is a final state itself.
00938    // If bit 1 is set, then the state can be reached from
00939    // the start state. Each state that is reached over this
00940    // recursive function is reachable from the start state
00941    if(isfinal)
00942       tmpval=3;
00943    else
00944       tmpval=1;
00945 
00946    FSMEdge *outedge=outedges;
00947 
00948    while(outedge!=NULL)
00949    {
00950       if((outedge->GetNextState()->tmpval&1)==0)
00951          // We haven't visited the next state so far ?
00952       {
00953          if(outedge->GetNextState()->PruneRedundantStatesRecurs())
00954             tmpval=3;
00955       }
00956       else
00957       {
00958          if(outedge->GetNextState()->tmpval==3)
00959             tmpval=3;
00960       }
00961       outedge=outedge->next;
00962    }
00963    return (tmpval==3);
00964 }
00965 
00966 void FSM::PruneRedundantStates()
00967 // Eliminates states that will never lead to final states
00968 // and states that are not reachable from the start state
00969 {
00970    FSMState *curstate=statelist;
00971    FSMEdge  **outedgeref;
00972 
00973    // Let's set the 'tmpval' of each state first
00974    while(curstate!=NULL)
00975    {
00976       curstate->tmpval=0;
00977       curstate=curstate->next;
00978    }
00979    startstate->PruneRedundantStatesRecurs();
00980 
00981    FSMState **curstateref=&statelist;
00982 
00983    curidx=0;
00984 
00985    while(*curstateref!=NULL)
00986    {
00987       if((*curstateref)->tmpval!=3)
00988          // The state is either not reachable from the startstate or
00989          // it does not lead to a final state?
00990       {
00991          *curstateref=(*curstateref)->next;
00992       }
00993       else
00994       {
00995          (*curstateref)->stateid=curidx;
00996 
00997          outedgeref=&((*curstateref)->outedges);
00998          while(*outedgeref!=NULL)
00999          {
01000             if((*outedgeref)->GetNextState()->tmpval!=3)
01001                // Edge leads to a redundant state? ==> Delete edge
01002                *outedgeref=(*outedgeref)->next;
01003             else
01004                outedgeref=&((*outedgeref)->next);
01005          }
01006          curidx++;
01007          curstateref=&((*curstateref)->next);
01008       }
01009    }
01010    *curstateref=NULL;
01011 }
01012 
01013 //**********************************************************************
01014 //**********************************************************************
01015 
01016 void FSMState::EliminateRedundantPoundEdges()
01017    // This function eliminate redundant outgoing pound-edges
01018    // Pound-edges are not needed, if all cases are captured
01019    // by an outgoing negative edge
01020 {
01021    FSMEdge  *outedge=GetOutEdges();
01022    FSMLabel *curlabel;
01023 
01024    FSMEdge  *elementpoundedge=NULL;
01025    FSMEdge  *attribpoundedge=NULL;
01026    FSMEdge  *negedge=NULL;
01027 
01028    // First, we find out whether there are pound edges
01029    // and a negative edge
01030    // we store this information in 'elementpoundedge', 'attribpoundedge'
01031    // and 'negedge'
01032 
01033    outedge=GetOutEdges();
01034 
01035    while(outedge!=NULL)
01036    {
01037       switch(outedge->GetType())
01038       {
01039       case EDGETYPE_LABEL:
01040          if(outedge->GetLabelID()==elementpoundlabelid)
01041             elementpoundedge=outedge;
01042          else
01043          {
01044             if(outedge->GetLabelID()==attribpoundlabelid)
01045                attribpoundedge=outedge;
01046          }
01047          break;
01048 
01049       case EDGETYPE_NEGLABELLIST:
01050          negedge=outedge;
01051          break;
01052       }
01053       outedge=outedge->next;
01054    }
01055 
01056    // We only need to consider the case when there is a negative edge
01057    if(negedge==NULL)
01058       return;
01059 
01060    // Nothing to delete ?
01061    if((attribpoundedge==NULL)&&
01062       (elementpoundedge==NULL))
01063       return;  
01064 
01065    // We traverse the list of labels in the negative list
01066    // and check whether there is a corresponding positive label
01067    // If not, then the label is not captured by negative or positive labels
01068    // but will be captured by the pound!
01069    curlabel=negedge->labellist;
01070    while(curlabel==NULL)
01071    {
01072       outedge=GetOutEdges();
01073       while(outedge!=NULL)
01074       {
01075          if(outedge->GetType()==EDGETYPE_LABEL)
01076          {
01077             if(outedge->GetLabelID()==curlabel->labelid)
01078                break;
01079          }
01080          outedge=outedge->next;
01081       }
01082       if(outedge==NULL) // We didn't find the label ?
01083       {
01084          if(ISATTRIB(curlabel->labelid))  // ==> We don't have to remove the
01085                                           // the corresponding pound edge, since
01086                                           // it captures that label
01087             attribpoundedge=NULL;
01088          else
01089             elementpoundedge=NULL;
01090       }
01091       curlabel=curlabel->next;
01092    }
01093 
01094    // Now, we need to remove the pound-edges
01095 
01096    if((attribpoundedge==NULL)&&
01097       (elementpoundedge==NULL))
01098       return;  // Nothing to do ?
01099 
01100    FSMEdge **edgeref=&outedges;
01101 
01102    while(*edgeref!=NULL)
01103    {
01104       if((*edgeref==attribpoundedge)||
01105          (*edgeref==elementpoundedge))
01106          *edgeref=(*edgeref)->next;
01107       else
01108          edgeref=&((*edgeref)->next);
01109    }
01110 }
01111 
01112 void FSM::EliminateRedundantPoundEdges()
01113    // This function eliminate redundant outgoing pound-edges
01114    // Pound-edges are not needed, if all cases are captured
01115    // by an outgoing negative edge
01116 {
01117    FSMState *state=statelist;
01118    while(state!=NULL)
01119    {
01120       state->EliminateRedundantPoundEdges();
01121       state=state->next;
01122    }
01123 }
01124 //**********************************************************************
01125 //**********************************************************************
01126 
01127 void FSMState::ComputeOutCompleteness()
01128    // Computes for the state whether it is out-complete,
01129    // i.e. whether there exists an edge for each possible label
01130 {
01131    FSMEdge  *outedge=GetOutEdges();
01132    FSMLabel *curlabel;
01133    FSMEdge  *elementpoundedge=NULL,
01134             *attribpoundedge=NULL,
01135             *negedge=NULL;
01136 
01137    while(outedge!=NULL)
01138    {
01139       switch(outedge->GetType())
01140       {
01141       case EDGETYPE_LABEL:
01142          if(outedge->GetLabelID()==elementpoundlabelid)  // Do we have an element-pound sign? => The state is out-complete
01143          {
01144             if(attribpoundedge!=NULL)  // Do we also have an attribute-pound edge ?
01145                                        // ==> We are complete
01146             {
01147                isoutcomplete=1;
01148                return;
01149             }
01150             elementpoundedge=outedge;
01151          }
01152          else
01153          {
01154             if(outedge->GetLabelID()==attribpoundlabelid)  // Do we have a attribute-pound sign? => The state is out-complete
01155             {
01156                if(elementpoundedge!=NULL)  // Do we also have an element-pound edge ?
01157                                           // ==> We are complete
01158                {
01159                   isoutcomplete=1;
01160                   return;
01161                }
01162                attribpoundedge=outedge;
01163             }
01164          }
01165          break;
01166 
01167       case EDGETYPE_NEGLABELLIST:
01168          negedge=outedge;
01169          break;
01170       }
01171       outedge=outedge->next;
01172    }
01173 
01174    if(negedge==NULL) // We didn't have a negative edge
01175                      // (And we didn't find *two* pound edges ? ==> Incomplete
01176    {
01177       isoutcomplete=0;
01178       return;
01179    }
01180 
01181    // We test whether each negative label occurs as a positive one
01182    curlabel=negedge->GetLabelList();
01183    while(curlabel!=NULL)
01184    {
01185       outedge=GetOutEdges();
01186       while(outedge!=NULL)
01187       {
01188          if(outedge->GetType()==EDGETYPE_LABEL)
01189          {
01190             if(outedge->GetLabelID()==curlabel->labelid)
01191                break;
01192          }
01193          outedge=outedge->next;
01194       }
01195       if(outedge==NULL)
01196          // We didn't find a corresponding positive label?
01197          // We check if we have a corresponding pound-edge
01198       {
01199          // There is no corresponding pound-edge ? ==> Incomplete
01200          if((ISATTRIB(curlabel->labelid)&&(attribpoundedge==NULL))||
01201             ((!ISATTRIB(curlabel->labelid))&&(elementpoundedge==NULL)))
01202          { 
01203             isoutcomplete=0;
01204             return;
01205          }
01206       }
01207       curlabel=curlabel->next;
01208    }
01209    // Each of the negative labels either occurred as a positive label
01210    // or there exists a corresponding pound-edge
01211    // I.e. the state is out-complete
01212 
01213    isoutcomplete=1;
01214 }
01215 
01216 void FSM::ComputeOutCompleteness()
01217    // Computes for each state whether it is out-complete,
01218    // i.e. whether there exists an edge for each possible label
01219 {
01220    FSMState *curstate=statelist;
01221 
01222    while(curstate!=NULL)
01223    {
01224       curstate->ComputeOutCompleteness();
01225       curstate=curstate->next;
01226    }
01227 }
01228 
01229 char FSMState::FindAcceptingStatesRecurs()
01230    // Sets attribute 'isaccepting' depending on whether
01231    // all reachable states for this state are final
01232 {
01233    tmpval=1;
01234 
01235    isaccepting=(isfinal && IsOutComplete());
01236       // For now, we simply assume that the state is accepting
01237       // (Only final, out-complete states can be accepting!)
01238       // This is important for cycles in the recursion !
01239 
01240    FSMEdge *outedge=outedges;
01241 
01242    while(outedge!=NULL)
01243    {
01244       if((outedge->GetNextState()->tmpval&1)==0)
01245          // We haven't visited the next state so far ?
01246       {
01247          if(outedge->GetNextState()->FindAcceptingStatesRecurs()==0)
01248             // If the next state is not accepting, then this one is not too
01249             isaccepting=0;
01250       }
01251       else
01252       {
01253          if(outedge->GetNextState()->isaccepting==0)
01254             // If the next state is not accepting, then this one is not too
01255             isaccepting=0;
01256       }
01257       outedge=outedge->next;
01258    }
01259 
01260    // All outgoing edges lead to accepting states ==> We are also accepting
01261    return isaccepting;
01262 }
01263 
01264 void FSM::FindAcceptingStates()
01265    // Determines all (final) states that will definitely lead to an acceptance of
01266    // the word, no matter what comes afterwards
01267    // In other words: all following states are final states
01268 {
01269    FSMState *curstate=statelist;
01270 
01271    // First, let's compute the out-completeness of all states
01272    // States that are not out-complete can never be accepting
01273    ComputeOutCompleteness();
01274 
01275    // We initialize the 'tmpval' elements to '0'
01276    while(curstate!=NULL)
01277    {
01278       curstate->tmpval=0;
01279       curstate=curstate->next;
01280    }
01281    // We compute the accepting status, starting with 'startstate'
01282    startstate->FindAcceptingStatesRecurs();
01283 }
01284 
01285 //**********************************************************************
01286 //**********************************************************************
01287 
01288 char FSMState::HasPoundsAheadRecurs()
01289    // Determines for the state whether there are pound-edges
01290    // reachable from that state
01291 {
01292    tmpval=1;
01293 
01294    haspoundsahead=0;
01295 
01296    FSMEdge *outedge=outedges;
01297 
01298    while(outedge!=NULL)
01299    {
01300       if((outedge->GetNextState()->tmpval&1)==0)
01301          // We haven't visited the next state so far ?
01302       {
01303          if(outedge->GetNextState()->HasPoundsAheadRecurs()!=0)
01304             // If the next state has a pound connection, then this one has too
01305             haspoundsahead=1;
01306       }
01307       else
01308       {
01309          if(outedge->GetNextState()->haspoundsahead)
01310             // If the next state is a pound connection, then this one has too
01311             haspoundsahead=1;
01312       }
01313 
01314       if(outedge->GetType()==EDGETYPE_LABEL)
01315       {
01316          if((outedge->GetLabelID()==elementpoundlabelid)||
01317             (outedge->GetLabelID()==attribpoundlabelid))
01318 
01319             haspoundsahead=1;
01320       }
01321       outedge=outedge->next;
01322    }
01323 
01324    // No outgoing edges has pounds ahead
01325    return haspoundsahead;
01326 }
01327 
01328 void FSM::ComputeStatesHasPoundsAhead()
01329 {
01330    FSMState *curstate=statelist;
01331 
01332    // We initialize the 'tmpval' elements to '0'
01333    while(curstate!=NULL)
01334    {
01335       curstate->tmpval=0;
01336       curstate=curstate->next;
01337    }
01338    // We compute the accepting status, starting with 'startstate'
01339    startstate->HasPoundsAheadRecurs();
01340 }
01341 
01342 //**********************************************************************
01343 //**********************************************************************
01344 
01345 #ifdef FSM_NEGATE
01346 
01347 void FSM::CompleteDetState(FSMState *state,FSMState *deadstate)
01348    // Completes the deterministic state by adding edges for the missing
01349    // labels. The edges lead to 'deadstate'
01350 {
01351    FSMEdge *outedge=state->GetOutEdges();
01352    FSMLabel *labellist=NULL,**ref;
01353    FSMLabel *curlabel;
01354    char     isneglist=0,deleted;
01355 
01356    while(outedge!=NULL)
01357    {
01358       switch(outedge->GetType())
01359       {
01360       case EDGETYPE_LABEL:
01361          if(isneglist==0)  // We have a list of positive labels?
01362          {
01363             if(!IsInLabelList(labellist,outedge->labelid))
01364                // We only insert if the label is not already in the list
01365                labellist=new(fsmtmpmem) FSMLabel(outedge->labelid,labellist);
01366          }
01367          else
01368          {
01369             // In this case, the label *must* be in 'labellist'
01370             // and we remove it from the list
01371             deleted=0;
01372 
01373             ref=&labellist;
01374             while(*ref!=NULL)
01375             {
01376                if((*ref)->labelid==outedge->labelid)
01377                {
01378                   *ref=(*ref)->next;   // We simply eliminate the item
01379                   deleted=1;
01380                   break;
01381                }
01382                ref=&((*ref)->next);
01383             }
01384             if(deleted==0) // If the label did not occur in the list, then
01385                            // we have a problem !
01386             {
01387                Error("Fatal error in FSM::CompleteDetState()");
01388                Exit();
01389             }
01390          }
01391          break;
01392 
01393       case EDGETYPE_NEGLABELLIST:
01394          if(isneglist)  // If we already have a negative list, then something
01395                         // is severely wrong !!!
01396          {
01397             Error("Fatal error in FSM::CompleteDetState()");
01398             Exit();
01399          }
01400          isneglist=1;
01401 
01402          curlabel=outedge->GetLabelList();
01403          while(curlabel!=NULL)
01404          {
01405             deleted=0;
01406 
01407             ref=&labellist;
01408             while(*ref!=NULL)
01409             {
01410                if((*ref)->labelid==curlabel->labelid)
01411                {
01412                   *ref=(*ref)->next;   // We simply eliminate the item
01413                   deleted=1;
01414                   break;
01415                }
01416                ref=&((*ref)->next);
01417             }
01418             if(deleted==0) // If the label did not occur in the list, then
01419                            // we add it to the list
01420                labellist=new (fsmtmpmem) FSMLabel(curlabel->labelid,labellist);
01421 
01422             curlabel=curlabel->next;
01423          }
01424       }
01425       outedge=outedge->next;
01426    }
01427 
01428    if(isneglist==1)
01429       // Negative list? => We create set of edges to 'deadstate'
01430    {
01431       ref=&labellist;
01432       while(*ref!=NULL)
01433       {
01434          CreateLabelEdge(state,deadstate,(*ref)->labelid);
01435          ref=&((*ref)->next);
01436       }
01437    }
01438    else
01439       CreateNegEdge(state,deadstate,labellist);
01440 }
01441 
01442 void FSM::CompleteDetFSM()
01443    // This function adds edges (and one state) to an FSM so that it
01444    // becomes complete -- i.e. there is an outgoing edge for every state
01445    // and every possible label
01446 {
01447    // We create a non-final state, which is a deadend-state
01448    FSMState *deadstate,*curstate;
01449    FSMEdge  *loopedge;
01450 
01451    deadstate=CreateState((char)0);
01452 
01453    // We create a loop edge for deadstate, so that we
01454    // stay in 'deadstate' forever.
01455    loopedge=CreateNegEdge(deadstate,deadstate,NULL);
01456 
01457    // For every state, we must add another edge to 'deadstate'
01458    // which takes care of all labels that are not mentioned in
01459    // already existing edges
01460    curstate=statelist;
01461 
01462    while(curstate!=NULL)
01463    {
01464       // We complete each of the states
01465       CompleteDetState(curstate,deadstate);
01466       curstate=curstate->next;
01467    }
01468 }
01469 
01470 FSM *FSM::CreateNegateFSM()
01471    // This function negates the given FSM
01472    // I.e. it creates an FSM that accepts exactly the words
01473    // that are not accepted by 'this'
01474 {
01475    FSM      *detfsm;
01476    FSMState *curstate;
01477 
01478    // First, we make the FSM deterministic
01479    detfsm=MakeDeterministic();
01480 
01481    // We make the FSM complete
01482    detfsm->CompleteDetFSM();
01483 
01484    // We consider each single state and toggle the final status
01485 
01486    curstate=detfsm->GetStateList();
01487 
01488    while(curstate!=NULL)
01489    {
01490       if(curstate->IsFinal())
01491          curstate->isfinal=0;
01492       else
01493          curstate->isfinal=1;
01494 
01495       curstate=curstate->next;
01496    }
01497 
01498    detfsm=detfsm->Minimize();
01499 
01500    return detfsm;
01501 }
01502 
01503 #endif
01504 
01505 //**********************************************************************
01506 //**********************************************************************
01507 
01508 void FSM::AddFSM(FSMState *fromstate,FSMState *tostate,FSM *fsm)
01509    // Adds the FSM 'fsm' into the current FSM between 'fromstate' and 'tostate'
01510 {
01511    FSMState *curstate=fsm->GetStateList(),*newstate;
01512 
01513    // First, we create all the states and
01514    // we set 'curstate->data' to the new state
01515    while(curstate!=NULL)
01516    {
01517       // For the start state, we simply use the 'fromstate' state
01518       if(curstate==fsm->GetStartState())
01519          curstate=fromstate;
01520       else
01521       {
01522          newstate=CreateState((char)0);
01523          curstate->data=newstate;
01524       }
01525       if(curstate->IsFinal())
01526          // For final states, we create an empty edge to 'tostate'
01527          CreateEmptyEdge((FSMState *)(curstate->data),tostate);
01528 
01529       curstate=curstate->next;
01530    }
01531 
01532    // Now, we can take care of the edges
01533    curstate=fsm->GetStateList();
01534 
01535    FSMEdge  *outedge;
01536 
01537    // We only need to copy the edges
01538 
01539    while(curstate!=NULL)
01540    {
01541       outedge=curstate->GetOutEdges();
01542       while(outedge!=NULL)
01543       {
01544          switch(outedge->GetType())
01545          {
01546          case EDGETYPE_LABEL:
01547             CreateLabelEdge((FSMState *)(curstate->data),
01548                             (FSMState *)(outedge->GetNextState()->data),
01549                             outedge->labelid);
01550             break;
01551 
01552          case EDGETYPE_NEGLABELLIST:
01553          {
01554             FSMLabel *newlabellist=NULL;
01555 
01556             DupLabelList(outedge->labellist,&newlabellist);
01557 
01558             CreateNegEdge((FSMState *)(curstate->data),
01559                               (FSMState *)(outedge->GetNextState()->data),
01560                               newlabellist);
01561             break;
01562          }
01563 
01564          case EDGETYPE_EMPTY:
01565             CreateEmptyEdge((FSMState *)(curstate->data),
01566                               (FSMState *)(outedge->GetNextState()->data));
01567             break;
01568          }
01569 
01570          outedge=outedge->next;
01571       }
01572       curstate=curstate->next;
01573    }
01574 }
01575 
01576 //**********************************************************************
01577 //**********************************************************************
01578 //**********************************************************************
01579 
01580 FSM *FSM::CreateReverseFSM()
01581    // Reverses a non-deterministic FSM - i.e. the resulting FSM
01582    // accepts the reverse language L^R, if and only if FSM accepts L.
01583 {
01584    FSMState *newstate;
01585    FSMEdge  *edge;
01586 
01587    FSM *newfsm=new(fsmmem) FSM();
01588 
01589    // We create a new start state
01590    // This start state will be connected to all final states 
01591    // of the original FSM
01592    FSMState *newstartstate=newfsm->CreateState();
01593 
01594    newfsm->SetStartState(newstartstate);
01595 
01596    FSMState *curstate=statelist;
01597 
01598    // First, we copy all the states
01599    // The start state becomes the new final state
01600    // Furthermore, if the original state is final, then it becomes
01601    // a new start state - we simulate this by creating an empty edge
01602    // between 'newstartstate' and 'newstate'
01603    while(curstate!=NULL)
01604    {
01605       newstate=newfsm->CreateState(curstate==startstate);
01606 
01607       if(curstate->IsFinal())
01608          newfsm->CreateEmptyEdge(newstartstate,newstate);
01609 
01610       curstate->data=newstate;
01611       curstate=curstate->next;
01612    }
01613 
01614    // Now, we consider all states and we create the edges
01615    curstate=statelist;
01616    
01617    while(curstate!=NULL)
01618    {
01619       edge=curstate->GetOutEdges();
01620       while(edge!=NULL)
01621       {
01622          switch(edge->GetType())
01623          {
01624          case EDGETYPE_LABEL:
01625             newfsm->CreateLabelEdge((FSMState *)(edge->GetNextState()->data),
01626                                     (FSMState *)(curstate->data),
01627                                     edge->GetLabelID());
01628             break;
01629 
01630          case EDGETYPE_NEGLABELLIST:
01631          {
01632             FSMLabel *newlabellist=NULL;
01633 
01634             DupLabelList(edge->GetLabelList(),&newlabellist);
01635 
01636             newfsm->CreateNegEdge(  (FSMState *)(edge->GetNextState()->data),
01637                                     (FSMState *)(curstate->data),
01638                                     newlabellist);
01639             break;
01640          }
01641          case EDGETYPE_EMPTY:
01642             newfsm->CreateEmptyEdge((FSMState *)(edge->GetNextState()->data),
01643                                     (FSMState *)(curstate->data));
01644             break;
01645          }
01646          edge=edge->next;
01647       }
01648       curstate=curstate->next;
01649    }
01650    return newfsm;
01651 }
01652 
01653 //****************************************************************************
01654 //****************************************************************************
01655 //****************************************************************************
01656 //****************************************************************************
01657 
01658 #ifdef FSM_PRINT
01659 
01660 void FSMState::Print()
01661    // Prints information about the state
01662 {
01663    printf("%lu",stateid);
01664    if(isfinal)
01665       printf(":F");
01666 
01667    if(isoutcomplete)
01668       printf(":C");
01669 
01670    if(isaccepting)
01671       printf(":A");
01672 
01673    if(haspoundsahead)
01674       printf(":P");
01675 
01676 /*
01677    if(origstateset!=NULL)
01678    {
01679       FSMStateSetItem *state=origstateset;
01680       printf("  {");
01681 
01682       while(state!=NULL)
01683       {
01684          state->state->Print();
01685          state=state->next;
01686          if(state!=NULL)
01687             printf(",");
01688       }
01689       printf("}");
01690    }
01691 */
01692 }
01693 
01694 void FSMEdge::PrintLabelList()
01695    // Prints the labellist of an EDGETYPE_LABELLIST edge
01696 {
01697    FSMLabel *curlabel=labellist;
01698 
01699    while(curlabel!=NULL)
01700    {
01701          globallabeldict.PrintLabel(curlabel->labelid);
01702 
01703       curlabel=curlabel->next;
01704       if(curlabel!=NULL)
01705          printf("|");
01706    }
01707 }
01708 
01709 void FSMEdge::Print()
01710    // Prints the information about an edge
01711 {
01712    switch(type)
01713    {
01714    case EDGETYPE_LABEL:
01715          globallabeldict.PrintLabel(labelid);
01716       break;
01717       
01718    case EDGETYPE_NEGLABELLIST:
01719       printf("ALL BUT ");
01720       PrintLabelList();
01721       break;
01722 
01723    case EDGETYPE_EMPTY:
01724       break;
01725    }
01726 }
01727 
01728 //*********************************************************************
01729 //*********************************************************************
01730 
01731 void FSM::Print()
01732    // Prints the structure of the FSM
01733 {
01734    FSMState *curstate=statelist;
01735    FSMEdge  *edge;
01736 
01737    while(curstate!=NULL)
01738    {
01739       edge=curstate->GetOutEdges();
01740 
01741       while(edge!=NULL)
01742       {
01743          curstate->Print();
01744          printf(" -- ");
01745          edge->Print();
01746          printf(" --> ");
01747          edge->GetNextState()->Print();
01748          printf("\n");
01749 
01750          edge=edge->next;
01751       }
01752       curstate=curstate->next;
01753    }
01754 }
01755 
01756 #endif // FSM_PRINT
01757 
01758 //*********************************************************************
01759 //*********************************************************************
01760 //*********************************************************************
01761 
01762 #ifdef FSM_STORE
01763 
01764 void FSMEdge::Store(MemStreamer *mem)
01765 // Stores the information about the edge in 'mem'
01766 {
01767    // We store the edge type
01768    // EDGETYPE_LABEL, EDGETYPE_NEGLABELLIST (, or EDGETYPE_EMPTY)
01769 
01770    mem->StoreUInt32(type);
01771    mem->StoreUInt32(nextstate->GetStateID());
01772 
01773    switch(type)
01774    {
01775    case EDGETYPE_LABEL:
01776       mem->StoreUInt32(labelid);
01777       break;
01778 
01779    case EDGETYPE_NEGLABELLIST:
01780    {
01781       FSMLabel *curlabel=labellist;
01782       int      labelcount=0;
01783 
01784       // First, let's count the number of labels
01785 
01786       while(curlabel!=NULL)
01787       {
01788          labelcount++;
01789          curlabel=curlabel->next;
01790       }
01791 
01792       // We store the labelcount
01793       mem->StoreUInt32(labelcount);
01794 
01795       // Now, we can store the label ID's
01796       curlabel=labellist;
01797 
01798       while(curlabel!=NULL)
01799       {
01800          mem->StoreUInt32(curlabel->labelid);
01801          curlabel=curlabel->next;
01802       }
01803    }
01804    }
01805 }
01806 
01807 void FSMEdge::Load(unsigned char * &ptr,FSMState *statearray,MemStreamer *fsmmem)
01808 {
01809    type=LoadUInt32(ptr);
01810 
01811    nextstate=statearray+LoadUInt32(ptr);
01812 
01813    switch(type)
01814    {
01815    case EDGETYPE_LABEL:
01816       labelid=(unsigned short)LoadUInt32(ptr);;
01817       break;
01818 
01819    case EDGETYPE_NEGLABELLIST:
01820    {
01821       FSMLabel **labelref=&labellist;
01822       unsigned long labelcount;
01823       
01824       labelcount=LoadUInt32(ptr);
01825 
01826       while(labelcount--)
01827       {
01828          *labelref=new(fsmmem) FSMLabel((unsigned short)LoadUInt32(ptr));
01829          labelref=&((*labelref)->next);
01830       }
01831       *labelref=NULL;
01832    }
01833    }
01834 }
01835 
01836 //*********************************************************************
01837 //*********************************************************************
01838 
01839 void FSMState::Store(MemStreamer *mem)
01840 {
01841    // We store whether the state is final
01842    mem->StoreUInt32(isfinal);
01843 
01844    // Let's count the number of outgoing edges
01845 
01846    FSMEdge *curedge=outedges;
01847    unsigned long edgecount=0;
01848 
01849    while(curedge!=NULL)
01850    {
01851       edgecount++;
01852       curedge=curedge->next;
01853    }
01854 
01855    // We store the number of outgoing edges
01856    mem->StoreUInt32(edgecount);
01857 
01858    // Now we store the actual edges
01859    curedge=outedges;
01860    while(curedge!=NULL)
01861    {
01862       curedge->Store(mem);
01863       curedge=curedge->next;
01864    }
01865 }
01866 
01867 void FSMState::Load(unsigned char * &ptr,FSMState *statearray,MemStreamer *fsmmem)
01868 {
01869    unsigned long edgecount;
01870 
01871    // We load the status bit
01872    isfinal=LoadUInt32(ptr);
01873 
01874    FSMEdge  **edgeref =&outedges;
01875 
01876    // We load the number of outgoing edges
01877 
01878    edgecount=LoadUInt32(ptr);
01879 
01880    // We load all edges
01881    while(edgecount--)
01882    {
01883       *edgeref=new(fsmmem) FSMEdge();
01884 
01885       (*edgeref)->Load(ptr,statearray,fsmmem);
01886       edgeref=&((*edgeref)->next);
01887    }
01888    *edgeref=NULL;
01889 }
01890 
01891 //*********************************************************************
01892 //*********************************************************************
01893 
01894 void FSM::Store(MemStreamer *mem)
01895 // Stores the information in the FSM in 'mem'
01896 {
01897    mem->StoreUInt32(curidx);
01898 
01899    // We store the ID of the start state
01900    mem->StoreUInt32(startstate->GetStateID());
01901 
01902    FSMState *curstate=statelist;
01903 
01904    // We store all the states and their outgoing edges
01905    while(curstate!=NULL)
01906    {
01907       curstate->Store(mem);
01908       curstate=curstate->next;
01909    }
01910 }
01911 
01912 void FSM::Load(unsigned char * &ptr,MemStreamer *fsmmem)
01913 {
01914    // We load the number of states
01915    curidx=LoadUInt32(ptr);
01916 
01917    unsigned long startstateidx;
01918 
01919    startstateidx=LoadUInt32(ptr);
01920 
01921    FSMState **stateref=&statelist;
01922    FSMState *statearray=new(fsmmem) FSMState[curidx];
01923 
01924    for(unsigned i=0;i<curidx;i++)
01925    {
01926       *stateref=statearray+i;
01927       (*stateref)->stateid=i;
01928       (*stateref)->next=NULL;
01929       (*stateref)->origstateset=NULL;
01930 
01931       (*stateref)->Load(ptr,statearray,fsmmem);
01932 
01933       stateref=&((*stateref)->next);
01934    }
01935    *stateref=NULL;
01936 
01937    startstate=statearray+startstateidx;
01938 }
01939 
01940 #endif // FSM_STORE
01941 
01942 //**********************************************************************
01943 //**********************************************************************
01944 //**********************************************************************
01945 //**********************************************************************
01946 /*
01947 struct IntersectState
01948 {
01949    FSMState       *intersectstate;
01950    FSMState       *state1;
01951    FSMState       *state2;
01952    IntersectState *next_todo;
01953 };
01954 
01955 IntersectState *intersect_statearray;
01956 IntersectState *todo_list;
01957 unsigned       statecount2;
01958 
01959 inline FSMState *FSM::CreateIntersectState(FSMState *origstate1,FSMState *origstate2)
01960 {
01961    IntersectState *ref=intersect_statearray+origstate1->GetStateID()*statecount2+origstate2->GetStateID();
01962    if(ref->intersectstate==NULL)
01963    {
01964       ref->intersectstate=CreateState();
01965       ref->next_todo=todo_list;
01966       todo_list=ref;
01967    }
01968    return ref->intersectstate;
01969 }
01970 
01971 void FSM::IntersectFSMS(FSM *fsm1,FSM *fsm2)
01972 {
01973    FSMState *state1,*state2,*newstate;
01974 
01975    unsigned statecount1=fsm1->GetStateCount();
01976 
01977    statecount2=fsm2->GetStateCount();
01978 
01979    intersect_statearray=new IntersectState[statecount1*statecount2];
01980 
01981    todo_list=NULL;
01982 
01983    SetStartState(
01984       CreateIntersectState(fsm1->GetStartState(),fsm2->GetStartState())
01985    );
01986 
01987    while(todo_list!=NULL)
01988    {
01989       state1=todo_list->state1;
01990       state2=todo_list->state2;
01991       newstate=todo_list->intersectstate;
01992 
01993       todo_list=todo_list->next_todo;
01994 
01995       curedge1=state1->GetOutEdges();
01996 
01997       while(curedge1!=NULL)
01998       {
01999          switch(curedge1->GetType())
02000          {
02001          case EDGETYPE_LABEL:
02002             curedge2=state2->GetOutEdges();
02003             while(curedge2!=NULL)
02004             {
02005                if(curedge2->DoesMatchLabel(curedge1->GetLabelID())
02006                {
02007                   CreateLabelEdge(
02008                         newstate,
02009                         CreateIntersectState(curedge1->GetNextState(),curedge2->GetNextState()),
02010                         curedge1->GetLabelID());
02011                }
02012                curedge2=curedge2->next;
02013             }
02014             break;
02015 
02016          case EDGETYPE_NEGLABELLIST:
02017             curedge2=state2->GetOutEdges();
02018             while(curedge2!=NULL)
02019             {
02020                switch(curedge2->type)
02021                {
02022                case EDGETYPE_LABEL:
02023                   if(curedge1->DoesMatchLabel(curedge2->GetLabelID())
02024                   {
02025                      CreateLabelEdge(
02026                         newstate,
02027                         CreateIntersectState(curedge1->GetNextState(),curedge2->GetNextState()),
02028                         curedge2->GetLabelID());
02029                   }
02030                   break;
02031                case EDGETYPE_NEGLABELLIST:
02032                   CreateNegLabelEdge(newstate,
02033                      CreateIntersectState(curedge1->GetNextState(),curedge2->GetNextState()),
02034                      UnionNegLabelList(curedge1->GetLabelList(),curedge2->GetLabelList()));
02035                }
02036                curedge2=curedge2->next;
02037             }
02038          }
02039          curedge1=curedge1->next
02040       }
02041    }
02042 
02043    delete[] intersect_statearray;
02044 }
02045 
02046 */

Generated on Sat Oct 13 16:08:37 2001 for XMILL by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001