00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 #include "Error.hpp"
00036 #include "FileParser.hpp"
00037 #include "SAXClient.hpp"
00038 
00039 extern unsigned long memory_cutoff;
00040    
00041    
00042    
00043    
00044 
00045 inline char *TraverseWhiteSpaces(char *ptr,char *endptr)
00046 {
00047    while((ptr<endptr)&&
00048          ((*ptr==' ')||(*ptr=='\t')||(*ptr=='\r')||(*ptr=='\n')))
00049       ptr++;
00050    return ptr;
00051 }
00052 
00053 class XMLParse : public FileParser
00054 { 
00055    SAXClient *saxclient;   
00056 
00057 public:
00058    void XMLParseError(char *errmsg)
00059       
00060    {
00061       char tmpstr[50];
00062       sprintf(tmpstr,"Parse error in line %lu:\n",GetCurLineNo());
00063       Error(tmpstr);
00064       ErrorCont(errmsg);
00065       Exit();
00066    }
00067 
00068    void XMLParseError(char *errmsg,int savelineno)
00069       
00070    {
00071       char tmpstr[50];
00072       sprintf(tmpstr,errmsg,savelineno);
00073       Error(tmpstr);
00074       Exit();
00075    }
00076 
00077 private:
00078    char SkipWhiteSpaces()
00079    {
00080       char c;
00081       do
00082       {
00083          PeekChar(&c);
00084          if((c!=' ')&&(c!='\t')&&(c!='\r')&&(c!='\n'))
00085             break;
00086          SkipChar();
00087       }
00088       while(1);
00089 
00090       return c;
00091    }
00092 
00093    char ParseAttribs()
00094    
00095    
00096    {
00097       char c;
00098       char *strptr;
00099       int  len;
00100 
00101       do
00102       {
00103          while(ReadWhiteSpaces(&strptr,&len)==0)
00104             
00105             saxclient->HandleAttribWhiteSpaces(strptr,len,1);
00106 
00107          saxclient->HandleAttribWhiteSpaces(strptr,len,0);
00108 
00109          
00110          
00111          PeekChar(&c);
00112          if((c=='>')||(c=='/'))  
00113          {
00114             SkipChar();
00115             return c;
00116          }
00117          
00118          while(ReadStringUntil(&strptr,&len,1,'=',0)==0)
00119             
00120             saxclient->HandleAttribName(strptr,len,1);
00121 
00122          
00123          saxclient->HandleAttribName(strptr,len-1,0);
00124 
00125          if(strptr[len-1]!='=')
00126             
00127          {
00128             c=SkipWhiteSpaces();
00129             if(c!='=')
00130                XMLParseError("Symbol '=' expected !");
00131             SkipChar();
00132          }
00133          
00134          c=SkipWhiteSpaces();
00135 
00136          
00137          
00138          
00139 
00140          if(c!='"')
00141          {
00142             while(ReadStringUntil(&strptr,&len,1,'>','/')==0)
00143                saxclient->HandleAttribValue(strptr,len,0);
00144 
00145             saxclient->HandleAttribValue(strptr,len-1,1);
00146 
00147             c=strptr[len-1];
00148             if((c=='/')||(c=='>'))
00149                return c;
00150          }
00151          else
00152          {
00153             SkipChar();
00154 
00155             while(ReadStringUntil(&strptr,&len,0,'"','>')==0)
00156                saxclient->HandleAttribValue(strptr,len,0);
00157 
00158             if(strptr[len-1]=='>')
00159             {
00160                char tmpstr[100];
00161                sprintf(tmpstr,"Line %lu: Missing '\"' at the end of attribute value '",GetCurLineNo());
00162                Error(tmpstr);
00163                ErrorCont(strptr,len-1);
00164                ErrorCont("'!");
00165                PrintErrorMsg();
00166                UndoReadChar();
00167                len--;
00168             }
00169 
00170             saxclient->HandleAttribValue(strptr,len-1,1);
00171 
00172             PeekChar(&c);
00173             if((c!='>')&&(c!=' ')&&(c!='\t')&&(c!='\n')&&(c!='\r')&&(c!='/'))
00174             {
00175                char tmpstr[50];
00176                sprintf(tmpstr,"Skip invalid character '%c' in line %lu",c,GetCurLineNo());
00177                Error(tmpstr);
00178                PrintErrorMsg();
00179                SkipChar();
00180             }
00181          }
00182       }
00183       while(1);
00184    }
00185 
00186    void ParseLabel()
00187       
00188    {
00189       char c,*ptr;
00190       int  len;
00191 
00192       PeekChar(&c);
00193 
00194       if(c=='/') 
00195       {
00196          GetChar(&c);
00197 
00198          while(ReadStringUntil(&ptr,&len,0,'>','<')==0)
00199 
00200             
00201             saxclient->HandleEndLabel(ptr,len,1);
00202 
00203          if(ptr[len-1]=='<')
00204          {
00205             Error("Unfinished end label!");
00206             PrintErrorMsg();
00207             UndoReadChar();
00208          }
00209 
00210          saxclient->HandleEndLabel(ptr,len-1,0);
00211          return;
00212       }
00213 
00214       while(ReadStringUntil(&ptr,&len,1,'>','/')==0)
00215          
00216          saxclient->HandleStartLabel(ptr,len,1);
00217 
00218       switch(ptr[len-1])
00219       {
00220       case '>':
00221          saxclient->HandleStartLabel(ptr,len-1,0);
00222          return;
00223 
00224       case '/':
00225          saxclient->HandleStartLabel(ptr,len-1,0);
00226          GetChar(&c);
00227          if(c!='>')
00228             XMLParseError("Symbol '/' in label must be followed by '>' !");
00229 
00230          saxclient->HandleEndLabel(NULL,0,0);
00231          return;
00232 
00233       default: 
00234          saxclient->HandleStartLabel(ptr,len,0);
00235          c=ParseAttribs();
00236          if(c=='/')
00237          {
00238             
00239             saxclient->HandleEndLabel(NULL,0,0);
00240             GetChar(&c);
00241          }
00242          if(c!='>')
00243             XMLParseError("Symbol '>' expected after '/' in tag!");
00244       }
00245    }
00246 
00247    void ParsePI()
00248       
00249    {
00250       int len,savelineno=GetCurLineNo();
00251       char *ptr;
00252 
00253       do
00254       {
00255          if(ReadStringUntil(&ptr,&len,"?>"))
00256             break;
00257 
00258          if(len==0)
00259             XMLParseError("Could not find closing '?>' for processing instruction in line %lu !",savelineno);
00260 
00261          saxclient->HandlePI(ptr,len,1);
00262       }
00263       while(1);
00264 
00265       saxclient->HandlePI(ptr,len,0);
00266    }
00267 
00268    void ParseCDATA()
00269       
00270    {
00271       int len,savelineno=GetCurLineNo();
00272       char *ptr;
00273 
00274       while(ReadStringUntil(&ptr,&len,"]]>")==0)
00275       {
00276          if(len==0)
00277             XMLParseError("Could not find closing ']]>' for CDATA section starting in line %lu !",savelineno);
00278 
00279          saxclient->HandleCDATA(ptr,len,1);
00280       }
00281     
00282       saxclient->HandleCDATA(ptr,len,0);
00283    }
00284 
00285    void ParseComment()
00286       
00287    {
00288       int len,savelineno=GetCurLineNo();
00289       char *ptr;
00290 
00291       while(ReadStringUntil(&ptr,&len,"-->")==0)
00292       {
00293          if(len==0)
00294             XMLParseError("Could not find closing '-->' for comment starting in line %lu !",savelineno);
00295 
00296          saxclient->HandleComment(ptr,len,1);
00297       }
00298 
00299       saxclient->HandleComment(ptr,len,0);
00300    }
00301 
00302    void ParseText()
00303       
00304    {
00305       char err;
00306       int len;
00307       char *ptr,*leftwsptr,*rightwsptr,*endptr;
00308 
00309       
00310       err=ReadStringUntil(&ptr,&len,'<');
00311 
00312       if((err==0)&&(len==0))
00313          return;
00314 
00315       endptr=ptr+len;
00316 
00317       
00318       leftwsptr=ptr;
00319 
00320       while((leftwsptr<endptr)&&
00321             (*leftwsptr==' ')||(*leftwsptr=='\t')||
00322             (*leftwsptr=='\r')||(*leftwsptr=='\n'))
00323          leftwsptr++;
00324 
00325       while(err==0)  
00326                      
00327       {
00328          if(len>0)
00329          {
00330             if(IsEndOfFile()&&(len==leftwsptr-ptr))
00331                
00332                
00333                saxclient->HandleText(ptr,len,0,len,len);
00334             else
00335                saxclient->HandleText(ptr,len,1,leftwsptr-ptr,0);
00336          }
00337 
00338          if(leftwsptr==endptr)   
00339                                  
00340          {
00341             err=ReadStringUntil(&ptr,&len,'<');
00342 
00343             if((err==0)&&(len==0))  
00344                return;
00345 
00346             leftwsptr=ptr;
00347 
00348             while((leftwsptr<endptr)&&
00349                   (*leftwsptr==' ')||(*leftwsptr=='\t')||
00350                   (*leftwsptr=='\r')||(*leftwsptr=='\n'))
00351                leftwsptr++;
00352          }
00353          else
00354          {
00355             err=ReadStringUntil(&ptr,&len,'<');
00356             if((err==0)&&(len==0))
00357                return;
00358          
00359             leftwsptr=ptr; 
00360          }
00361       }
00362 
00363       
00364 
00365       
00366       UndoReadChar();
00367       len--;
00368 
00369       endptr=ptr+len;
00370 
00371       
00372       rightwsptr=endptr-1;
00373 
00374       while((rightwsptr>=ptr)&&
00375             (*rightwsptr==' ')||(*rightwsptr=='\t')||
00376             (*rightwsptr=='\r')||(*rightwsptr=='\n'))
00377          rightwsptr--;
00378 
00379       if(len>0)
00380          saxclient->HandleText(ptr,len,0,leftwsptr-ptr,endptr-rightwsptr-1);
00381    }
00382 
00383    void ParseDOCTYPE()
00384       
00385       
00386    {
00387       int   len,savelineno=GetCurLineNo(); 
00388       char  *ptr;
00389       char  *myendptr,*curptr;
00390 
00391       
00392       len=GetCurBlockPtr(&ptr);
00393       if(len==0)
00394          RefillAndGetCurBlockPtr(&ptr,&len);
00395 
00396       myendptr=ptr+len;
00397       curptr=ptr;
00398 
00399       do
00400       {
00401          if(*curptr=='[')
00402          {
00403             do
00404             {
00405                curptr++;
00406                if(curptr==myendptr)
00407                {
00408                   saxclient->HandleDOCTYPE(ptr,len,1);
00409                   FastSkipData(len);
00410                   RefillAndGetCurBlockPtr(&ptr,&len);
00411                   if(len==0)
00412                      XMLParseError("Could not find closing ']>' for DOCTYPE section starting in line %lu !",savelineno);
00413 
00414                   myendptr=ptr+len;
00415                   curptr=ptr;
00416                }
00417             }
00418             while(*curptr!=']');
00419          }
00420          if(*curptr=='>')
00421             break;
00422 
00423          curptr++;
00424          if(curptr==myendptr)
00425          {
00426             saxclient->HandleDOCTYPE(ptr,len,1);
00427             FastSkipData(len);
00428             RefillAndGetCurBlockPtr(&ptr,&len);
00429             if(len==0)
00430                XMLParseError("Could not find closing ']>' for DOCTYPE section starting in line %lu !",savelineno);
00431             myendptr=ptr+len;
00432             curptr=ptr;
00433          }
00434       }
00435       while(1);
00436 
00437       saxclient->HandleDOCTYPE(ptr,curptr+1-ptr,0);
00438       FastSkipData(curptr+1-ptr);
00439    }
00440 
00441 
00442 
00443 public:
00444 
00445    char DoParsing(SAXClient *myclient)
00446       
00447    {
00448       saxclient=myclient;
00449 
00450       xmlparser=this;
00451 
00452       char c[9];
00453 
00454       do
00455       {
00456          
00457          ParseText();
00458 
00459          
00460          if(IsEndOfFile())
00461             return 1;
00462 
00463          
00464          PeekChar(c);
00465          if(*c!='<') 
00466          {
00467             Error("Character '<' expected !");
00468             XMLParseError("");
00469          }
00470 
00471          
00472          PeekData(c,3);
00473 
00474          switch(c[1])
00475          {
00476             case '?': 
00477                if(c[2]=='>')
00478                {
00479                   SkipChar();
00480                   ParseLabel();
00481                }
00482                else
00483                   ParsePI();
00484                break;
00485 
00486             case '!':
00487                switch(c[2])
00488                {
00489                case '[': 
00490                   PeekData(c,9);
00491                   if(memcmp(c,"<![CDATA[",9)!=0)
00492                   {
00493                      Error("Invalid tag '");
00494                      ErrorCont(c,9);
00495                      ErrorCont("...' should probably be '<![CDATA ...' !");
00496                      XMLParseError("");
00497                   }
00498                   ParseCDATA();
00499                   break;
00500       
00501                case 'D': 
00502                {
00503                   PeekData(c,9);
00504                   if(memcmp(c,"<!DOCTYPE",9)!=0)
00505                   {
00506                      Error("Invalid tag '");
00507                      ErrorCont(c,9);
00508                      ErrorCont("...' should probably be '<!DOCTYPE ...' !");
00509                      XMLParseError("");
00510                   }
00511                   ParseDOCTYPE();
00512                }
00513                break;
00514 
00515                case '-': 
00516                   PeekData(c,4);
00517 
00518                   if(c[3]!='-')
00519                   {
00520                      Error("Invalid tag '");
00521                      ErrorCont(c,4);
00522                      ErrorCont("...' should probably be '<!-- ...' !");
00523                      XMLParseError("");
00524                   }
00525                   ParseComment();
00526                   break;
00527 
00528                default:
00529                   Error("Invalid tag '");
00530                   ErrorCont(c,3);
00531                   ErrorCont("...' !");
00532                   XMLParseError("");
00533                }
00534                break;
00535 
00536          case '=':
00537             Error("Invalid label '<=...'!");
00538             PrintErrorMsg();
00539             SkipChar();
00540             saxclient->HandleText("<",1,0,0,0);
00541             break;
00542 
00543          default: 
00544                   
00545             SkipChar();
00546             ParseLabel();
00547          }
00548       }
00549       while(allocatedmemory<memory_cutoff);
00550          
00551          
00552 
00553       return 0;
00554    }
00555 };