Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

SAXClient.cpp File Reference

#include <stdio.h>
#include "Error.hpp"
#include "Output.hpp"
#include "SAXClient.hpp"
#include "ContMan.hpp"
#include "CurPath.hpp"
#include "XMLParse.hpp"

Go to the source code of this file.

Functions

void StoreEndLabel ()
void StoreEmptyEndLabel ()
void StoreStartLabel (TLabelID labelid)
void StoreTextToken (unsigned blockid)
void CompressTextItem (char *str, int len, int leftwslen, int rightwslen)

Variables

CurPath curpath
char globalfullwhitespacescompress
char globalattribwhitespacescompress
CompressContainer * globalwhitespacecont
char ignore_comment
char ignore_cdata
char ignore_doctype
char ignore_pi
XMLParsexmlparser


Function Documentation

void CompressTextItem char *    str,
int    len,
int    leftwslen,
int    rightwslen
 

Definition at line 454 of file SAXClient.cpp.

Referenced by SAXClient::HandleText().

00461 {
00462    CurPathIterator         it,saveit;
00463    FSMManStateItem         *fsmstate;
00464    TLabelID                labelid;
00465    char                    overpoundedge;
00466    FSMState                *curstate;
00467    PathDictNode            *pathdictnode;
00468 
00469    // We iterate over the current path
00470    curpath.InitIterator(&it);
00471 
00472    PathTreeNode *curpathtreenode=pathtree.GetRootNode();
00473 
00474    // We start at the root-node of the reverse data guide
00475    // and traverse the path backward as long as no accepting state
00476    // has been reached
00477    while(curpathtreenode->IsAccepting()==0)
00478    {
00479       labelid=it.GotoPrev();
00480       if(labelid==LABEL_UNDEFINED)
00481          break;
00482 
00483       curpathtreenode=pathtree.ExtendCurPath(curpathtreenode,labelid);
00484    }
00485 
00486    // After we reached an accepting state, we look at each
00487    // single regular expression and the corresponding state of the FSM
00488    // Note that each of the states already accepted the word!
00489    // Therefore, we only check whether there are additional pound-signs that
00490    // come afterwards
00491 
00492    fsmstate=curpathtreenode->GetFSMStates();
00493 
00494    // Did we reach the end of the path?
00495    if(labelid==LABEL_UNDEFINED)
00496    {
00497       // We look for an FSM whose state is final for that path
00498       while(fsmstate!=NULL)
00499       {
00500          if(fsmstate->curstate->IsFinal())
00501             // Did we find a final state => We send the text to the
00502             // corresponding path expression
00503          {
00504             if(fsmstate->pathexpr->CompressTextItem(
00505                   str,len,fsmstate->GetPathDictNode(),
00506                   leftwslen,rightwslen))
00507                return;
00508          }
00509          fsmstate=fsmstate->next;
00510       }
00511    }
00512    else
00513    {
00514       // We haven't reached the end of the path, but we found
00515       // an accepting state?
00516 
00517       // Let's save the iterator
00518       saveit=it;
00519 
00520       // Let's find the FSMs whose states are accepting
00521       while(fsmstate!=NULL)
00522       {
00523          if(fsmstate->curstate->IsAccepting()==0)
00524          {
00525             fsmstate=fsmstate->next;
00526             continue;
00527          }
00528 
00529          // For each state, go over the rest of the path and
00530          // traverse the rest of the FSM and we instantiate
00531          // the # symbols.
00532 
00533          pathdictnode=fsmstate->GetPathDictNode();
00534 
00535          curstate=fsmstate->curstate;
00536 
00537          // Let's go the starting point in the iterator
00538          saveit=it;
00539 
00540          // Let's instantiate the #'s as long as we still have
00541          // #'s ahead
00542          while(curstate->HasPoundsAhead())
00543          {
00544             labelid=it.GotoPrev();
00545             if(labelid==LABEL_UNDEFINED)  // We reached the beginning of the path?
00546                break;
00547             curstate=curstate->GetNextState(labelid,&overpoundedge);
00548 
00549             // Did we jump over a pound-edge ?
00550             // ==> We must advance the 'pathdictnode' item
00551             if(overpoundedge)
00552                pathdictnode=pathdict.FindOrCreatePath(pathdictnode,labelid);
00553          }
00554 
00555          // Let's now try to compress the text with the compressor
00556          if(fsmstate->pathexpr->CompressTextItem(str,len,pathdictnode,leftwslen,rightwslen))
00557             return;
00558       
00559          fsmstate=fsmstate->next;
00560       }
00561    }
00562    // No FSM accepts the path? ==> Something is wrong
00563    Error("Fatal error: no automaton accepts current path !\n");
00564    Exit();
00565 }
00566 

void StoreEmptyEndLabel   [inline]
 

Definition at line 88 of file SAXClient.cpp.

00090 {
00091    globaltreecont->StoreCompressedSInt(0,TREETOKEN_EMPTYENDLABEL);
00092 
00093 #ifdef USE_FORWARD_DATAGUIDE
00094    curpathtreenode=curpathtreenode->parent;
00095 
00096 #ifdef USE_NO_DATAGUIDE
00097    pathtreemem.RemoveLastMemBlock();
00098 #endif
00099 #endif
00100 }

void StoreEndLabel   [inline]
 

Definition at line 74 of file SAXClient.cpp.

00076 {
00077    globaltreecont->StoreCompressedSInt(0,TREETOKEN_ENDLABEL);
00078 
00079 #ifdef USE_FORWARD_DATAGUIDE
00080    curpathtreenode=curpathtreenode->parent;
00081 
00082 #ifdef USE_NO_DATAGUIDE
00083    pathtreemem.RemoveLastMemBlock();
00084 #endif
00085 #endif
00086 }

void StoreStartLabel TLabelID    labelid [inline]
 

Definition at line 102 of file SAXClient.cpp.

00106 {
00107    globaltreecont->StoreCompressedSInt(0,GET_LABELID(labelid)+LABELIDX_TOKENOFFS);
00108 
00109 #ifdef USE_FORWARD_DATAGUIDE
00110 #ifdef USE_NO_DATAGUIDE
00111    pathtreemem.StartNewMemBlock();
00112 #endif
00113 
00114    curpathtreenode=pathtree.ExtendCurPath(curpathtreenode,labelid);
00115 #endif
00116 }

void StoreTextToken unsigned    blockid [inline]
 

Definition at line 118 of file SAXClient.cpp.

00120 {
00121    globaltreecont->StoreCompressedSInt(1,blockid);
00122 /*
00123 #ifdef USE_FORWARD_DATAGUIDE
00124    CurPathIterator it;
00125    TLabelID labelid;
00126    PathTreeNode *mycurnode=reversedataguide.GetRootNode();
00127 
00128    curpath.InitIterator(&it);
00129    while((labelid=it.GotoPrev())!=LABEL_UNDEFINED)
00130       mycurnode=reversedataguide.ExtendCurPath(mycurnode,labelid);
00131 #endif
00132 */
00133 }


Variable Documentation

CurPath curpath
 

Definition at line 43 of file SAXClient.cpp.

char globalattribwhitespacescompress
 

Definition at line 46 of file SAXClient.cpp.

char globalfullwhitespacescompress
 

Definition at line 45 of file SAXClient.cpp.

CompressContainer* globalwhitespacecont
 

Definition at line 48 of file SAXClient.cpp.

char ignore_cdata
 

Definition at line 52 of file SAXClient.cpp.

char ignore_comment
 

Definition at line 51 of file SAXClient.cpp.

char ignore_doctype
 

Definition at line 53 of file SAXClient.cpp.

char ignore_pi
 

Definition at line 54 of file SAXClient.cpp.

XMLParse* xmlparser
 

Definition at line 57 of file SAXClient.cpp.


Generated on Sat Oct 13 16:08:50 2001 for XMILL by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001