#include <stdio.h>
#include "Error.hpp"
#include "Output.hpp"
#include "SAXClient.hpp"
#include "ContMan.hpp"
#include "CurPath.hpp"
#include "XMLParse.hpp"
Go to the source code of this file.
Functions | |
void | StoreEndLabel () |
void | StoreEmptyEndLabel () |
void | StoreStartLabel (TLabelID labelid) |
void | StoreTextToken (unsigned blockid) |
void | CompressTextItem (char *str, int len, int leftwslen, int rightwslen) |
Variables | |
CurPath | curpath |
char | globalfullwhitespacescompress |
char | globalattribwhitespacescompress |
CompressContainer * | globalwhitespacecont |
char | ignore_comment |
char | ignore_cdata |
char | ignore_doctype |
char | ignore_pi |
XMLParse * | xmlparser |
|
Definition at line 454 of file SAXClient.cpp. Referenced by SAXClient::HandleText().
00461 { 00462 CurPathIterator it,saveit; 00463 FSMManStateItem *fsmstate; 00464 TLabelID labelid; 00465 char overpoundedge; 00466 FSMState *curstate; 00467 PathDictNode *pathdictnode; 00468 00469 // We iterate over the current path 00470 curpath.InitIterator(&it); 00471 00472 PathTreeNode *curpathtreenode=pathtree.GetRootNode(); 00473 00474 // We start at the root-node of the reverse data guide 00475 // and traverse the path backward as long as no accepting state 00476 // has been reached 00477 while(curpathtreenode->IsAccepting()==0) 00478 { 00479 labelid=it.GotoPrev(); 00480 if(labelid==LABEL_UNDEFINED) 00481 break; 00482 00483 curpathtreenode=pathtree.ExtendCurPath(curpathtreenode,labelid); 00484 } 00485 00486 // After we reached an accepting state, we look at each 00487 // single regular expression and the corresponding state of the FSM 00488 // Note that each of the states already accepted the word! 00489 // Therefore, we only check whether there are additional pound-signs that 00490 // come afterwards 00491 00492 fsmstate=curpathtreenode->GetFSMStates(); 00493 00494 // Did we reach the end of the path? 00495 if(labelid==LABEL_UNDEFINED) 00496 { 00497 // We look for an FSM whose state is final for that path 00498 while(fsmstate!=NULL) 00499 { 00500 if(fsmstate->curstate->IsFinal()) 00501 // Did we find a final state => We send the text to the 00502 // corresponding path expression 00503 { 00504 if(fsmstate->pathexpr->CompressTextItem( 00505 str,len,fsmstate->GetPathDictNode(), 00506 leftwslen,rightwslen)) 00507 return; 00508 } 00509 fsmstate=fsmstate->next; 00510 } 00511 } 00512 else 00513 { 00514 // We haven't reached the end of the path, but we found 00515 // an accepting state? 00516 00517 // Let's save the iterator 00518 saveit=it; 00519 00520 // Let's find the FSMs whose states are accepting 00521 while(fsmstate!=NULL) 00522 { 00523 if(fsmstate->curstate->IsAccepting()==0) 00524 { 00525 fsmstate=fsmstate->next; 00526 continue; 00527 } 00528 00529 // For each state, go over the rest of the path and 00530 // traverse the rest of the FSM and we instantiate 00531 // the # symbols. 00532 00533 pathdictnode=fsmstate->GetPathDictNode(); 00534 00535 curstate=fsmstate->curstate; 00536 00537 // Let's go the starting point in the iterator 00538 saveit=it; 00539 00540 // Let's instantiate the #'s as long as we still have 00541 // #'s ahead 00542 while(curstate->HasPoundsAhead()) 00543 { 00544 labelid=it.GotoPrev(); 00545 if(labelid==LABEL_UNDEFINED) // We reached the beginning of the path? 00546 break; 00547 curstate=curstate->GetNextState(labelid,&overpoundedge); 00548 00549 // Did we jump over a pound-edge ? 00550 // ==> We must advance the 'pathdictnode' item 00551 if(overpoundedge) 00552 pathdictnode=pathdict.FindOrCreatePath(pathdictnode,labelid); 00553 } 00554 00555 // Let's now try to compress the text with the compressor 00556 if(fsmstate->pathexpr->CompressTextItem(str,len,pathdictnode,leftwslen,rightwslen)) 00557 return; 00558 00559 fsmstate=fsmstate->next; 00560 } 00561 } 00562 // No FSM accepts the path? ==> Something is wrong 00563 Error("Fatal error: no automaton accepts current path !\n"); 00564 Exit(); 00565 } 00566 |
|
Definition at line 88 of file SAXClient.cpp. 00090 { 00091 globaltreecont->StoreCompressedSInt(0,TREETOKEN_EMPTYENDLABEL); 00092 00093 #ifdef USE_FORWARD_DATAGUIDE 00094 curpathtreenode=curpathtreenode->parent; 00095 00096 #ifdef USE_NO_DATAGUIDE 00097 pathtreemem.RemoveLastMemBlock(); 00098 #endif 00099 #endif 00100 } |
|
Definition at line 74 of file SAXClient.cpp. 00076 { 00077 globaltreecont->StoreCompressedSInt(0,TREETOKEN_ENDLABEL); 00078 00079 #ifdef USE_FORWARD_DATAGUIDE 00080 curpathtreenode=curpathtreenode->parent; 00081 00082 #ifdef USE_NO_DATAGUIDE 00083 pathtreemem.RemoveLastMemBlock(); 00084 #endif 00085 #endif 00086 } |
|
Definition at line 102 of file SAXClient.cpp. 00106 { 00107 globaltreecont->StoreCompressedSInt(0,GET_LABELID(labelid)+LABELIDX_TOKENOFFS); 00108 00109 #ifdef USE_FORWARD_DATAGUIDE 00110 #ifdef USE_NO_DATAGUIDE 00111 pathtreemem.StartNewMemBlock(); 00112 #endif 00113 00114 curpathtreenode=pathtree.ExtendCurPath(curpathtreenode,labelid); 00115 #endif 00116 } |
|
Definition at line 118 of file SAXClient.cpp. 00120 { 00121 globaltreecont->StoreCompressedSInt(1,blockid); 00122 /* 00123 #ifdef USE_FORWARD_DATAGUIDE 00124 CurPathIterator it; 00125 TLabelID labelid; 00126 PathTreeNode *mycurnode=reversedataguide.GetRootNode(); 00127 00128 curpath.InitIterator(&it); 00129 while((labelid=it.GotoPrev())!=LABEL_UNDEFINED) 00130 mycurnode=reversedataguide.ExtendCurPath(mycurnode,labelid); 00131 #endif 00132 */ 00133 } |
|
Definition at line 43 of file SAXClient.cpp. |
|
Definition at line 46 of file SAXClient.cpp. |
|
Definition at line 45 of file SAXClient.cpp. |
|
Definition at line 48 of file SAXClient.cpp. |
|
Definition at line 52 of file SAXClient.cpp. |
|
Definition at line 51 of file SAXClient.cpp. |
|
Definition at line 53 of file SAXClient.cpp. |
|
Definition at line 54 of file SAXClient.cpp. |
|
Definition at line 57 of file SAXClient.cpp. |