00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #include <stdio.h>
00035
00036 #include "Error.hpp"
00037 #include "Output.hpp"
00038 #include "SAXClient.hpp"
00039 #include "ContMan.hpp"
00040 #include "CurPath.hpp"
00041 #include "XMLParse.hpp"
00042
00043 extern CurPath curpath;
00044
00045 extern char globalfullwhitespacescompress;
00046 extern char globalattribwhitespacescompress;
00047
00048 extern CompressContainer *globalwhitespacecont;
00049
00050
00051 extern char ignore_comment;
00052 extern char ignore_cdata;
00053 extern char ignore_doctype;
00054 extern char ignore_pi;
00055
00056
00057 XMLParse *xmlparser;
00058
00059
00060 #ifdef USE_FORWARD_DATAGUIDE
00061 PathTreeNode *curpathtreenode;
00062
00063 void InitForwardDataGuide()
00064 {
00065 curpathtreenode=pathtree.GetRootNode();
00066 }
00067 #endif
00068
00069
00070
00071
00072
00073
00074 inline void StoreEndLabel()
00075
00076 {
00077 globaltreecont->StoreCompressedSInt(0,TREETOKEN_ENDLABEL);
00078
00079 #ifdef USE_FORWARD_DATAGUIDE
00080 curpathtreenode=curpathtreenode->parent;
00081
00082 #ifdef USE_NO_DATAGUIDE
00083 pathtreemem.RemoveLastMemBlock();
00084 #endif
00085 #endif
00086 }
00087
00088 inline void StoreEmptyEndLabel()
00089
00090 {
00091 globaltreecont->StoreCompressedSInt(0,TREETOKEN_EMPTYENDLABEL);
00092
00093 #ifdef USE_FORWARD_DATAGUIDE
00094 curpathtreenode=curpathtreenode->parent;
00095
00096 #ifdef USE_NO_DATAGUIDE
00097 pathtreemem.RemoveLastMemBlock();
00098 #endif
00099 #endif
00100 }
00101
00102 inline void StoreStartLabel(TLabelID labelid)
00103
00104
00105
00106 {
00107 globaltreecont->StoreCompressedSInt(0,GET_LABELID(labelid)+LABELIDX_TOKENOFFS);
00108
00109 #ifdef USE_FORWARD_DATAGUIDE
00110 #ifdef USE_NO_DATAGUIDE
00111 pathtreemem.StartNewMemBlock();
00112 #endif
00113
00114 curpathtreenode=pathtree.ExtendCurPath(curpathtreenode,labelid);
00115 #endif
00116 }
00117
00118 inline void StoreTextToken(unsigned blockid)
00119
00120 {
00121 globaltreecont->StoreCompressedSInt(1,blockid);
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133 }
00134
00135
00136
00137
00138 void CompressTextItem(char *str,int len,int leftwslen,int rightwslen);
00139
00140 void SAXClient::HandleAttribName(char *str,int len,char iscont)
00141
00142 {
00143
00144 TLabelID labelid=globallabeldict.FindLabelOrAttrib(str,len,1);
00145
00146 if(labelid==LABEL_UNDEFINED)
00147 labelid=globallabeldict.CreateLabelOrAttrib(str,len,1);
00148
00149
00150 curpath.AddLabel(labelid);
00151
00152
00153 StoreStartLabel(labelid);
00154 }
00155
00156 void SAXClient::HandleAttribValue(char *str,int len,char iscont)
00157
00158 {
00159
00160 CompressTextItem(str,len,0,0);
00161
00162
00163 curpath.RemoveLabel();
00164
00165
00166 StoreEndLabel();
00167 }
00168
00169 void SAXClient::HandleAttribWhiteSpaces(char *str,int len,char iscont)
00170 {
00171 if(globalattribwhitespacescompress!=WHITESPACE_IGNORE)
00172 {
00173 if(len>0)
00174 {
00175 globalwhitespacecont->StoreUInt32(len);
00176 globalwhitespacecont->StoreData(str,len);
00177 globaltreecont->StoreCompressedSInt(0,TREETOKEN_ATTRIBWHITESPACE);
00178 }
00179 }
00180 }
00181
00182 void SAXClient::HandleStartLabel(char *str,int len,char iscont)
00183
00184 {
00185
00186 TLabelID labelid=globallabeldict.FindLabelOrAttrib(str,len,0);
00187
00188 if((len==9)&&(memcmp(str,"CARBOHYD ",9)==0))
00189 labelid=labelid;
00190
00191 if(labelid==LABEL_UNDEFINED)
00192 {
00193 labelid=globallabeldict.CreateLabelOrAttrib(str,len,0);
00194 if(labelid==LABEL_UNDEFINED)
00195 labelid=LABEL_UNDEFINED;
00196 }
00197
00198
00199 curpath.AddLabel(labelid);
00200
00201
00202 StoreStartLabel(labelid);
00203 }
00204
00205 void SAXClient::HandleEndLabel(char *str,int len,char iscont)
00206
00207 {
00208 TLabelID labelid=curpath.RemoveLabel();
00209 TLabelID endlabelid;
00210
00211
00212 while((len>0)&&
00213 ((str[len-1]=='\n')||(str[len-1]=='\r')||(str[len-1]=='\t')||(str[len-1]==' ')))
00214 {
00215 Error("End label has trailing white spaces!");
00216 PrintErrorMsg();
00217 len--;
00218 }
00219
00220
00221
00222 if(labelid==LABEL_UNDEFINED)
00223 {
00224 Error("Unexpected end label '");
00225 ErrorCont(str,len);
00226 ErrorCont("' !");
00227 xmlparser->XMLParseError("");
00228 }
00229
00230 if(str==NULL)
00231 StoreEmptyEndLabel();
00232 else
00233 {
00234
00235 endlabelid=globallabeldict.FindLabelOrAttrib(str,len,0);
00236
00237 if(endlabelid!=labelid)
00238
00239
00240 {
00241 char *ptr;
00242 unsigned long startlen=globallabeldict.LookupCompressLabel(labelid,&ptr);
00243
00244 TLabelID prevlabelid=curpath.RemoveLabel();
00245 if(prevlabelid!=endlabelid)
00246 {
00247 Error("End label '");
00248 ErrorCont(str,len);
00249 ErrorCont("' does not match start label '");
00250 ErrorCont(ptr,startlen);
00251 ErrorCont("' !");
00252 xmlparser->XMLParseError("");
00253 }
00254
00255
00256 char tmpstr[100];
00257
00258 Error("Warning: End label '");
00259 ErrorCont(str,len);
00260 sprintf(tmpstr,"' in line %lu does not match start label '",xmlparser->GetCurLineNo());
00261 ErrorCont(tmpstr);
00262 ErrorCont(ptr,startlen);
00263 ErrorCont("'!\n => Additional end label inserted!");
00264 PrintErrorMsg();
00265
00266
00267 StoreEndLabel();
00268 }
00269 StoreEndLabel();
00270 }
00271 }
00272
00273 void SAXClient::HandleText(char *str,int len,char iscont,int leftwslen,int rightwslen)
00274
00275 {
00276 if((leftwslen==len)&&(rightwslen==len))
00277
00278 {
00279
00280
00281 switch(globalfullwhitespacescompress)
00282 {
00283 case WHITESPACE_IGNORE:
00284 return;
00285
00286 case WHITESPACE_STOREGLOBAL:
00287 globaltreecont->StoreCompressedSInt(0,TREETOKEN_WHITESPACE);
00288 globalwhitespacecont->StoreUInt32(len);
00289 globalwhitespacecont->StoreData(str,len);
00290 return;
00291
00292 case WHITESPACE_STORETEXT:
00293 CompressTextItem(str,len,0,0);
00294 return;
00295 }
00296 }
00297
00298
00299
00300 CompressTextItem(str,len,leftwslen,rightwslen);
00301 }
00302
00303 void SAXClient::HandleComment(char *str,int len,char iscont)
00304
00305 {
00306 if(!ignore_comment)
00307 {
00308 globaltreecont->StoreCompressedSInt(0,TREETOKEN_SPECIAL);
00309 globalspecialcont->StoreUInt32(len);
00310 globalspecialcont->StoreData(str,len);
00311 }
00312 }
00313
00314 void SAXClient::HandlePI(char *str,int len,char iscont)
00315
00316 {
00317 if(!ignore_pi)
00318 {
00319 globaltreecont->StoreCompressedSInt(0,TREETOKEN_SPECIAL);
00320 globalspecialcont->StoreUInt32(len);
00321 globalspecialcont->StoreData(str,len);
00322 }
00323 }
00324
00325 void SAXClient::HandleDOCTYPE(char *str,int len,char iscont)
00326
00327 {
00328 if(!ignore_doctype)
00329 {
00330 globaltreecont->StoreCompressedSInt(0,TREETOKEN_SPECIAL);
00331 globalspecialcont->StoreUInt32(len);
00332 globalspecialcont->StoreData(str,len);
00333 }
00334 }
00335
00336 void SAXClient::HandleCDATA(char *str,int len,char iscont)
00337
00338 {
00339 if(!ignore_cdata)
00340 {
00341 globaltreecont->StoreCompressedSInt(0,TREETOKEN_SPECIAL);
00342 globalspecialcont->StoreUInt32(len);
00343 globalspecialcont->StoreData(str,len);
00344 }
00345 }
00346
00347
00348
00349
00350 inline char VPathExpr::CompressTextItem(char *str,int len,PathDictNode *pathdictnode,int wsleftlen,int wsrightlen)
00351
00352
00353
00354
00355 {
00356 CompressContainerBlock *contblock=pathdictnode->GetCompressContainerBlock();
00357 CompressContainer *cont;
00358 char *dataptr;
00359
00360
00361 if(contblock==NULL)
00362 {
00363 contblock=pathdictnode->AssignCompressContainerBlock(
00364 GetUserContNum(),
00365 GetUserDataSize(),this);
00366
00367 cont=contblock->GetContainer(0);
00368 dataptr=contblock->GetUserDataPtr();
00369
00370
00371
00372 InitCompress(cont,dataptr);
00373 }
00374 else
00375 {
00376 cont=contblock->GetContainer(0);
00377 dataptr=contblock->GetUserDataPtr();
00378 }
00379
00380 if(wsleftlen>0)
00381 {
00382
00383
00384
00385 switch(leftwhitespacescompress)
00386 {
00387 case WHITESPACE_IGNORE:
00388 case WHITESPACE_STOREGLOBAL:
00389 str+=wsleftlen;
00390 len-=wsleftlen;
00391 break;
00392 case WHITESPACE_STORETEXT:
00393 wsleftlen=0;
00394 }
00395 }
00396 if(wsrightlen>0)
00397 {
00398
00399
00400
00401 switch(rightwhitespacescompress)
00402 {
00403 case WHITESPACE_IGNORE:
00404 case WHITESPACE_STOREGLOBAL:
00405 len-=wsrightlen;
00406 break;
00407 case WHITESPACE_STORETEXT:
00408 wsrightlen=0;
00409 }
00410 }
00411
00412 char *savedataptr=dataptr;
00413
00414
00415
00416 if(len>0)
00417 {
00418 if(usercompressor->ParseString(str,len,dataptr)==0)
00419 return 0;
00420
00421 usercompressor->CompressString(str,len,cont,savedataptr);
00422 }
00423
00424
00425
00426
00427 if((wsleftlen>0)&&(leftwhitespacescompress==WHITESPACE_STOREGLOBAL))
00428 {
00429 globaltreecont->StoreCompressedSInt(0,TREETOKEN_WHITESPACE);
00430 globalwhitespacecont->StoreUInt32(wsleftlen);
00431 globalwhitespacecont->StoreData(str-wsleftlen,wsleftlen);
00432 }
00433
00434
00435
00436
00437 if(len>0)
00438 StoreTextToken(contblock->GetID());
00439
00440
00441 if((wsrightlen>0)&&(rightwhitespacescompress==WHITESPACE_STOREGLOBAL))
00442 {
00443 globaltreecont->StoreCompressedSInt(0,TREETOKEN_WHITESPACE);
00444 globalwhitespacecont->StoreUInt32(wsrightlen);
00445 globalwhitespacecont->StoreData(str+len,wsrightlen);
00446 }
00447 return 1;
00448 }
00449
00450
00451
00452 #ifndef USE_FORWARD_DATAGUIDE
00453
00454 void CompressTextItem(char *str,int len,int leftwslen,int rightwslen)
00455
00456
00457
00458
00459
00460
00461 {
00462 CurPathIterator it,saveit;
00463 FSMManStateItem *fsmstate;
00464 TLabelID labelid;
00465 char overpoundedge;
00466 FSMState *curstate;
00467 PathDictNode *pathdictnode;
00468
00469
00470 curpath.InitIterator(&it);
00471
00472 PathTreeNode *curpathtreenode=pathtree.GetRootNode();
00473
00474
00475
00476
00477 while(curpathtreenode->IsAccepting()==0)
00478 {
00479 labelid=it.GotoPrev();
00480 if(labelid==LABEL_UNDEFINED)
00481 break;
00482
00483 curpathtreenode=pathtree.ExtendCurPath(curpathtreenode,labelid);
00484 }
00485
00486
00487
00488
00489
00490
00491
00492 fsmstate=curpathtreenode->GetFSMStates();
00493
00494
00495 if(labelid==LABEL_UNDEFINED)
00496 {
00497
00498 while(fsmstate!=NULL)
00499 {
00500 if(fsmstate->curstate->IsFinal())
00501
00502
00503 {
00504 if(fsmstate->pathexpr->CompressTextItem(
00505 str,len,fsmstate->GetPathDictNode(),
00506 leftwslen,rightwslen))
00507 return;
00508 }
00509 fsmstate=fsmstate->next;
00510 }
00511 }
00512 else
00513 {
00514
00515
00516
00517
00518 saveit=it;
00519
00520
00521 while(fsmstate!=NULL)
00522 {
00523 if(fsmstate->curstate->IsAccepting()==0)
00524 {
00525 fsmstate=fsmstate->next;
00526 continue;
00527 }
00528
00529
00530
00531
00532
00533 pathdictnode=fsmstate->GetPathDictNode();
00534
00535 curstate=fsmstate->curstate;
00536
00537
00538 saveit=it;
00539
00540
00541
00542 while(curstate->HasPoundsAhead())
00543 {
00544 labelid=it.GotoPrev();
00545 if(labelid==LABEL_UNDEFINED)
00546 break;
00547 curstate=curstate->GetNextState(labelid,&overpoundedge);
00548
00549
00550
00551 if(overpoundedge)
00552 pathdictnode=pathdict.FindOrCreatePath(pathdictnode,labelid);
00553 }
00554
00555
00556 if(fsmstate->pathexpr->CompressTextItem(str,len,pathdictnode,leftwslen,rightwslen))
00557 return;
00558
00559 fsmstate=fsmstate->next;
00560 }
00561 }
00562
00563 Error("Fatal error: no automaton accepts current path !\n");
00564 Exit();
00565 }
00566
00567
00568
00569
00570
00571
00572 #else // Should we use a forward data guide ?
00573
00574 void CompressTextItem(char *str,int len,int leftwslen,int rightwslen)
00575 {
00576 FSMManStateItem *fsmstate;
00577
00578
00579
00580
00581 fsmstate=curpathtreenode->GetFSMStates();
00582
00583 while(fsmstate!=NULL)
00584 {
00585 if(fsmstate->curstate->IsFinal())
00586 {
00587 if(fsmstate->pathexpr->CompressTextItem(
00588 str,len,fsmstate->GetPathDictNode(),
00589 leftwslen,rightwslen))
00590 return;
00591 }
00592 fsmstate=fsmstate->next;
00593 }
00594
00595 Error("No automaton accepts string !\n");
00596 Exit();
00597 }
00598
00599 #endif