00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include "Error.hpp"
00036 #include "FileParser.hpp"
00037 #include "SAXClient.hpp"
00038
00039 extern unsigned long memory_cutoff;
00040
00041
00042
00043
00044
00045 inline char *TraverseWhiteSpaces(char *ptr,char *endptr)
00046 {
00047 while((ptr<endptr)&&
00048 ((*ptr==' ')||(*ptr=='\t')||(*ptr=='\r')||(*ptr=='\n')))
00049 ptr++;
00050 return ptr;
00051 }
00052
00053 class XMLParse : public FileParser
00054 {
00055 SAXClient *saxclient;
00056
00057 public:
00058 void XMLParseError(char *errmsg)
00059
00060 {
00061 char tmpstr[50];
00062 sprintf(tmpstr,"Parse error in line %lu:\n",GetCurLineNo());
00063 Error(tmpstr);
00064 ErrorCont(errmsg);
00065 Exit();
00066 }
00067
00068 void XMLParseError(char *errmsg,int savelineno)
00069
00070 {
00071 char tmpstr[50];
00072 sprintf(tmpstr,errmsg,savelineno);
00073 Error(tmpstr);
00074 Exit();
00075 }
00076
00077 private:
00078 char SkipWhiteSpaces()
00079 {
00080 char c;
00081 do
00082 {
00083 PeekChar(&c);
00084 if((c!=' ')&&(c!='\t')&&(c!='\r')&&(c!='\n'))
00085 break;
00086 SkipChar();
00087 }
00088 while(1);
00089
00090 return c;
00091 }
00092
00093 char ParseAttribs()
00094
00095
00096 {
00097 char c;
00098 char *strptr;
00099 int len;
00100
00101 do
00102 {
00103 while(ReadWhiteSpaces(&strptr,&len)==0)
00104
00105 saxclient->HandleAttribWhiteSpaces(strptr,len,1);
00106
00107 saxclient->HandleAttribWhiteSpaces(strptr,len,0);
00108
00109
00110
00111 PeekChar(&c);
00112 if((c=='>')||(c=='/'))
00113 {
00114 SkipChar();
00115 return c;
00116 }
00117
00118 while(ReadStringUntil(&strptr,&len,1,'=',0)==0)
00119
00120 saxclient->HandleAttribName(strptr,len,1);
00121
00122
00123 saxclient->HandleAttribName(strptr,len-1,0);
00124
00125 if(strptr[len-1]!='=')
00126
00127 {
00128 c=SkipWhiteSpaces();
00129 if(c!='=')
00130 XMLParseError("Symbol '=' expected !");
00131 SkipChar();
00132 }
00133
00134 c=SkipWhiteSpaces();
00135
00136
00137
00138
00139
00140 if(c!='"')
00141 {
00142 while(ReadStringUntil(&strptr,&len,1,'>','/')==0)
00143 saxclient->HandleAttribValue(strptr,len,0);
00144
00145 saxclient->HandleAttribValue(strptr,len-1,1);
00146
00147 c=strptr[len-1];
00148 if((c=='/')||(c=='>'))
00149 return c;
00150 }
00151 else
00152 {
00153 SkipChar();
00154
00155 while(ReadStringUntil(&strptr,&len,0,'"','>')==0)
00156 saxclient->HandleAttribValue(strptr,len,0);
00157
00158 if(strptr[len-1]=='>')
00159 {
00160 char tmpstr[100];
00161 sprintf(tmpstr,"Line %lu: Missing '\"' at the end of attribute value '",GetCurLineNo());
00162 Error(tmpstr);
00163 ErrorCont(strptr,len-1);
00164 ErrorCont("'!");
00165 PrintErrorMsg();
00166 UndoReadChar();
00167 len--;
00168 }
00169
00170 saxclient->HandleAttribValue(strptr,len-1,1);
00171
00172 PeekChar(&c);
00173 if((c!='>')&&(c!=' ')&&(c!='\t')&&(c!='\n')&&(c!='\r')&&(c!='/'))
00174 {
00175 char tmpstr[50];
00176 sprintf(tmpstr,"Skip invalid character '%c' in line %lu",c,GetCurLineNo());
00177 Error(tmpstr);
00178 PrintErrorMsg();
00179 SkipChar();
00180 }
00181 }
00182 }
00183 while(1);
00184 }
00185
00186 void ParseLabel()
00187
00188 {
00189 char c,*ptr;
00190 int len;
00191
00192 PeekChar(&c);
00193
00194 if(c=='/')
00195 {
00196 GetChar(&c);
00197
00198 while(ReadStringUntil(&ptr,&len,0,'>','<')==0)
00199
00200
00201 saxclient->HandleEndLabel(ptr,len,1);
00202
00203 if(ptr[len-1]=='<')
00204 {
00205 Error("Unfinished end label!");
00206 PrintErrorMsg();
00207 UndoReadChar();
00208 }
00209
00210 saxclient->HandleEndLabel(ptr,len-1,0);
00211 return;
00212 }
00213
00214 while(ReadStringUntil(&ptr,&len,1,'>','/')==0)
00215
00216 saxclient->HandleStartLabel(ptr,len,1);
00217
00218 switch(ptr[len-1])
00219 {
00220 case '>':
00221 saxclient->HandleStartLabel(ptr,len-1,0);
00222 return;
00223
00224 case '/':
00225 saxclient->HandleStartLabel(ptr,len-1,0);
00226 GetChar(&c);
00227 if(c!='>')
00228 XMLParseError("Symbol '/' in label must be followed by '>' !");
00229
00230 saxclient->HandleEndLabel(NULL,0,0);
00231 return;
00232
00233 default:
00234 saxclient->HandleStartLabel(ptr,len,0);
00235 c=ParseAttribs();
00236 if(c=='/')
00237 {
00238
00239 saxclient->HandleEndLabel(NULL,0,0);
00240 GetChar(&c);
00241 }
00242 if(c!='>')
00243 XMLParseError("Symbol '>' expected after '/' in tag!");
00244 }
00245 }
00246
00247 void ParsePI()
00248
00249 {
00250 int len,savelineno=GetCurLineNo();
00251 char *ptr;
00252
00253 do
00254 {
00255 if(ReadStringUntil(&ptr,&len,"?>"))
00256 break;
00257
00258 if(len==0)
00259 XMLParseError("Could not find closing '?>' for processing instruction in line %lu !",savelineno);
00260
00261 saxclient->HandlePI(ptr,len,1);
00262 }
00263 while(1);
00264
00265 saxclient->HandlePI(ptr,len,0);
00266 }
00267
00268 void ParseCDATA()
00269
00270 {
00271 int len,savelineno=GetCurLineNo();
00272 char *ptr;
00273
00274 while(ReadStringUntil(&ptr,&len,"]]>")==0)
00275 {
00276 if(len==0)
00277 XMLParseError("Could not find closing ']]>' for CDATA section starting in line %lu !",savelineno);
00278
00279 saxclient->HandleCDATA(ptr,len,1);
00280 }
00281
00282 saxclient->HandleCDATA(ptr,len,0);
00283 }
00284
00285 void ParseComment()
00286
00287 {
00288 int len,savelineno=GetCurLineNo();
00289 char *ptr;
00290
00291 while(ReadStringUntil(&ptr,&len,"-->")==0)
00292 {
00293 if(len==0)
00294 XMLParseError("Could not find closing '-->' for comment starting in line %lu !",savelineno);
00295
00296 saxclient->HandleComment(ptr,len,1);
00297 }
00298
00299 saxclient->HandleComment(ptr,len,0);
00300 }
00301
00302 void ParseText()
00303
00304 {
00305 char err;
00306 int len;
00307 char *ptr,*leftwsptr,*rightwsptr,*endptr;
00308
00309
00310 err=ReadStringUntil(&ptr,&len,'<');
00311
00312 if((err==0)&&(len==0))
00313 return;
00314
00315 endptr=ptr+len;
00316
00317
00318 leftwsptr=ptr;
00319
00320 while((leftwsptr<endptr)&&
00321 (*leftwsptr==' ')||(*leftwsptr=='\t')||
00322 (*leftwsptr=='\r')||(*leftwsptr=='\n'))
00323 leftwsptr++;
00324
00325 while(err==0)
00326
00327 {
00328 if(len>0)
00329 {
00330 if(IsEndOfFile()&&(len==leftwsptr-ptr))
00331
00332
00333 saxclient->HandleText(ptr,len,0,len,len);
00334 else
00335 saxclient->HandleText(ptr,len,1,leftwsptr-ptr,0);
00336 }
00337
00338 if(leftwsptr==endptr)
00339
00340 {
00341 err=ReadStringUntil(&ptr,&len,'<');
00342
00343 if((err==0)&&(len==0))
00344 return;
00345
00346 leftwsptr=ptr;
00347
00348 while((leftwsptr<endptr)&&
00349 (*leftwsptr==' ')||(*leftwsptr=='\t')||
00350 (*leftwsptr=='\r')||(*leftwsptr=='\n'))
00351 leftwsptr++;
00352 }
00353 else
00354 {
00355 err=ReadStringUntil(&ptr,&len,'<');
00356 if((err==0)&&(len==0))
00357 return;
00358
00359 leftwsptr=ptr;
00360 }
00361 }
00362
00363
00364
00365
00366 UndoReadChar();
00367 len--;
00368
00369 endptr=ptr+len;
00370
00371
00372 rightwsptr=endptr-1;
00373
00374 while((rightwsptr>=ptr)&&
00375 (*rightwsptr==' ')||(*rightwsptr=='\t')||
00376 (*rightwsptr=='\r')||(*rightwsptr=='\n'))
00377 rightwsptr--;
00378
00379 if(len>0)
00380 saxclient->HandleText(ptr,len,0,leftwsptr-ptr,endptr-rightwsptr-1);
00381 }
00382
00383 void ParseDOCTYPE()
00384
00385
00386 {
00387 int len,savelineno=GetCurLineNo();
00388 char *ptr;
00389 char *myendptr,*curptr;
00390
00391
00392 len=GetCurBlockPtr(&ptr);
00393 if(len==0)
00394 RefillAndGetCurBlockPtr(&ptr,&len);
00395
00396 myendptr=ptr+len;
00397 curptr=ptr;
00398
00399 do
00400 {
00401 if(*curptr=='[')
00402 {
00403 do
00404 {
00405 curptr++;
00406 if(curptr==myendptr)
00407 {
00408 saxclient->HandleDOCTYPE(ptr,len,1);
00409 FastSkipData(len);
00410 RefillAndGetCurBlockPtr(&ptr,&len);
00411 if(len==0)
00412 XMLParseError("Could not find closing ']>' for DOCTYPE section starting in line %lu !",savelineno);
00413
00414 myendptr=ptr+len;
00415 curptr=ptr;
00416 }
00417 }
00418 while(*curptr!=']');
00419 }
00420 if(*curptr=='>')
00421 break;
00422
00423 curptr++;
00424 if(curptr==myendptr)
00425 {
00426 saxclient->HandleDOCTYPE(ptr,len,1);
00427 FastSkipData(len);
00428 RefillAndGetCurBlockPtr(&ptr,&len);
00429 if(len==0)
00430 XMLParseError("Could not find closing ']>' for DOCTYPE section starting in line %lu !",savelineno);
00431 myendptr=ptr+len;
00432 curptr=ptr;
00433 }
00434 }
00435 while(1);
00436
00437 saxclient->HandleDOCTYPE(ptr,curptr+1-ptr,0);
00438 FastSkipData(curptr+1-ptr);
00439 }
00440
00441
00442
00443 public:
00444
00445 char DoParsing(SAXClient *myclient)
00446
00447 {
00448 saxclient=myclient;
00449
00450 xmlparser=this;
00451
00452 char c[9];
00453
00454 do
00455 {
00456
00457 ParseText();
00458
00459
00460 if(IsEndOfFile())
00461 return 1;
00462
00463
00464 PeekChar(c);
00465 if(*c!='<')
00466 {
00467 Error("Character '<' expected !");
00468 XMLParseError("");
00469 }
00470
00471
00472 PeekData(c,3);
00473
00474 switch(c[1])
00475 {
00476 case '?':
00477 if(c[2]=='>')
00478 {
00479 SkipChar();
00480 ParseLabel();
00481 }
00482 else
00483 ParsePI();
00484 break;
00485
00486 case '!':
00487 switch(c[2])
00488 {
00489 case '[':
00490 PeekData(c,9);
00491 if(memcmp(c,"<![CDATA[",9)!=0)
00492 {
00493 Error("Invalid tag '");
00494 ErrorCont(c,9);
00495 ErrorCont("...' should probably be '<![CDATA ...' !");
00496 XMLParseError("");
00497 }
00498 ParseCDATA();
00499 break;
00500
00501 case 'D':
00502 {
00503 PeekData(c,9);
00504 if(memcmp(c,"<!DOCTYPE",9)!=0)
00505 {
00506 Error("Invalid tag '");
00507 ErrorCont(c,9);
00508 ErrorCont("...' should probably be '<!DOCTYPE ...' !");
00509 XMLParseError("");
00510 }
00511 ParseDOCTYPE();
00512 }
00513 break;
00514
00515 case '-':
00516 PeekData(c,4);
00517
00518 if(c[3]!='-')
00519 {
00520 Error("Invalid tag '");
00521 ErrorCont(c,4);
00522 ErrorCont("...' should probably be '<!-- ...' !");
00523 XMLParseError("");
00524 }
00525 ParseComment();
00526 break;
00527
00528 default:
00529 Error("Invalid tag '");
00530 ErrorCont(c,3);
00531 ErrorCont("...' !");
00532 XMLParseError("");
00533 }
00534 break;
00535
00536 case '=':
00537 Error("Invalid label '<=...'!");
00538 PrintErrorMsg();
00539 SkipChar();
00540 saxclient->HandleText("<",1,0,0,0);
00541 break;
00542
00543 default:
00544
00545 SkipChar();
00546 ParseLabel();
00547 }
00548 }
00549 while(allocatedmemory<memory_cutoff);
00550
00551
00552
00553 return 0;
00554 }
00555 };