00001 /* 00002 This product contains certain software code or other information 00003 ("AT&T Software") proprietary to AT&T Corp. ("AT&T"). The AT&T 00004 Software is provided to you "AS IS". YOU ASSUME TOTAL RESPONSIBILITY 00005 AND RISK FOR USE OF THE AT&T SOFTWARE. AT&T DOES NOT MAKE, AND 00006 EXPRESSLY DISCLAIMS, ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND 00007 WHATSOEVER, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 00008 MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, WARRANTIES OF 00009 TITLE OR NON-INFRINGEMENT OF ANY INTELLECTUAL PROPERTY RIGHTS, ANY 00010 WARRANTIES ARISING BY USAGE OF TRADE, COURSE OF DEALING OR COURSE OF 00011 PERFORMANCE, OR ANY WARRANTY THAT THE AT&T SOFTWARE IS "ERROR FREE" OR 00012 WILL MEET YOUR REQUIREMENTS. 00013 00014 Unless you accept a license to use the AT&T Software, you shall not 00015 reverse compile, disassemble or otherwise reverse engineer this 00016 product to ascertain the source code for any AT&T Software. 00017 00018 (c) AT&T Corp. All rights reserved. AT&T is a registered trademark of AT&T Corp. 00019 00020 *********************************************************************** 00021 00022 History: 00023 00024 24/11/99 - initial release by Hartmut Liefke, liefke@seas.upenn.edu 00025 Dan Suciu, suciu@research.att.com 00026 */ 00027 00028 //*********************************************************************** 00029 //*********************************************************************** 00030 00031 // This module contains the handling of options specified at the command line 00032 00033 #include <stdio.h> 00034 #include <stdlib.h> 00035 00036 #include "Types.hpp" 00037 #include "Input.hpp" 00038 #include "VPathExprMan.hpp" 00039 00040 #ifdef XDEMILL 00041 #include "XMLOutput.hpp" 00042 #include "BXMLOutput.hpp" 00043 #endif 00044 00045 // Determines whether CR/LF (dos) or just LF (unix) should be used 00046 // if the XML is printed using formatting 00047 #ifdef WIN32 00048 char usedosnewline=1; 00049 #else 00050 char usedosnewline=0; 00051 #endif 00052 00053 // The following flags determine how white spaces should be stored 00054 char globalfullwhitespacescompress =WHITESPACE_IGNORE; 00055 00056 extern VPathExprMan pathexprman; // The path manager 00057 00058 // The memory limit for the compressor 00059 // For the decompressor, it contains a size of the buffer needed to decompress 00060 // the header 00061 unsigned long memory_cutoff=8L*1024L*1024L; 00062 00063 // Determines whether to keep the input file 00064 char delete_inputfiles=0; 00065 00066 char overwrite_files=0; // Is 1, if the user wants to overwrite all files 00067 char skip_all_files=0; // Is 1, if the user want to skip all remaining files 00068 00069 00070 00071 //********** Flags for compression ************ 00072 #ifdef XMILL 00073 // Describe the handling of left, right, and attribute white spaces 00074 char globalleftwhitespacescompress =WHITESPACE_IGNORE; 00075 char globalrightwhitespacescompress =WHITESPACE_IGNORE; 00076 char globalattribwhitespacescompress =WHITESPACE_IGNORE; 00077 00078 // Flags for ignoring comment, CDATA, DOCTYPE, and PI sections 00079 char ignore_comment=0; 00080 char ignore_cdata=0; 00081 char ignore_doctype=0; 00082 char ignore_pi=0; 00083 00084 // The compression ratio index for the zlib library 00085 unsigned char zlib_compressidx=6; 00086 00087 #endif 00088 00089 #ifdef XDEMILL 00090 extern XMLOutput output; 00091 extern BXMLOutput boutput; 00092 00093 char g_bBinary = 0; // For binary output 00094 00095 #endif 00096 00097 // *********** Common flags 00098 char no_output=0; // No output 00099 char usestdout=0; // Use the standard output 00100 char verbose=0; // Verbose mode 00101 char output_initialized=0; // output has been initalized 00102 00103 00104 00105 #ifdef TIMING 00106 char timing=0; // Do timing 00107 #endif 00108 00109 //********************************************************** 00110 //********************************************************** 00111 00112 // Several auxiliary function for managing 00113 00114 // We keep a global state while traversing the sequence of options 00115 char **argstrings; // The set of option strings 00116 int argnum, // The number of option stringd 00117 curargidx; // The index of the current option string 00118 char *curargptr; // The current pointer into the current option string 00119 00120 char *optfiledata; // If we loaded a file, then this pointer refers to the file data 00121 char *curfileptr; // The current pointer in the file 00122 00123 inline void InitArguments(char **argv,int argc) 00124 // Initializes the option reader state above 00125 { 00126 argstrings=argv; 00127 argnum=argc; 00128 00129 curargidx=0; 00130 curargptr=argstrings[curargidx]; 00131 00132 optfiledata=NULL; 00133 curfileptr=NULL; 00134 } 00135 00136 inline char *GetNextArgument(int *len) 00137 // Reads the next argument using the option reader state 00138 // and stores the (maximal) length (to the next white space) in *len. 00139 // The actual length of an option can be smaller 00140 { 00141 do 00142 { 00143 // We don't have a file loaded 00144 if(curfileptr==NULL) 00145 { 00146 // Let's skip white spaces 00147 while((*curargptr==' ')||(*curargptr=='\t')||(*curargptr=='\n')||(*curargptr=='\r')) 00148 curargptr++; 00149 00150 // If we have some actual option strings left, then scan up to the 00151 // next white space 00152 if(*curargptr!=0) 00153 { 00154 char *endptr=curargptr+1; 00155 while((*endptr!=0)&& 00156 (*endptr!=' ')&&(*endptr!='\t')&& 00157 (*endptr!='\n')&&(*endptr!='\r')) 00158 endptr++; 00159 00160 *len=(endptr-curargptr); 00161 return curargptr; 00162 } 00163 00164 // We didn't find an option string ==> Go to next string 00165 curargidx++; 00166 if(curargidx==argnum) // No more string? => Exit 00167 return NULL; 00168 00169 curargptr=argstrings[curargidx]; 00170 } 00171 else // If we have a file, then we do something very similar 00172 { 00173 // Let's skip white spaces 00174 while((*curfileptr==' ')||(*curfileptr=='\t')||(*curfileptr=='\n')||(*curfileptr=='\r')) 00175 curfileptr++; 00176 00177 if(*curfileptr!=0) 00178 return curfileptr; 00179 00180 // Let's get rid of the file data 00181 // delete[] optfiledata; 00182 // We keep the file, since in VPathExprMan, the pathstrings 00183 // are direct pointers into the memory of the file data 00184 // This is a bad solution! 00185 00186 curfileptr=NULL; 00187 curfileptr=NULL; 00188 } 00189 } 00190 while(1); 00191 } 00192 00193 inline void SkipArgumentString(int len) 00194 // After parsing an option, this function is called to go to string 00195 // data after the option 00196 { 00197 if(curfileptr!=NULL) 00198 curfileptr+=len; 00199 else 00200 curargptr+=len; 00201 } 00202 00203 //********************************************************************* 00204 00205 inline void ParseOptionFile(char *filename) 00206 // Loads the option file 00207 { 00208 Input input; 00209 char *ptr; 00210 int len; 00211 00212 // We exit, if there is already an option file 00213 if(optfiledata!=NULL) 00214 { 00215 Error("Only one option file allowed!"); 00216 Exit(); 00217 } 00218 00219 if(input.OpenFile(filename)==0) 00220 { 00221 Error("Could not open parameter file '"); 00222 ErrorCont(filename); 00223 ErrorCont("'!"); 00224 Exit(); 00225 } 00226 00227 len=input.GetCurBlockPtr(&ptr); 00228 00229 if(len>30000) // Just to make sure that the file fits into one 00230 // single buffer -- i.e. we don't read another block 00231 { 00232 Error("Input file '"); 00233 ErrorCont(filename); 00234 ErrorCont("' is too large!"); 00235 Exit(); 00236 } 00237 00238 // Let's allocate and copy the data 00239 optfiledata=new char[len+1]; 00240 memcpy(optfiledata,ptr,len); 00241 optfiledata[len]=0; 00242 00243 curfileptr=optfiledata; 00244 00245 input.CloseFile(); 00246 } 00247 00248 //********************************************************************* 00249 00250 void InterpretOptionString(char *option) 00251 // Interprets a specific option 00252 { 00253 int len; 00254 00255 switch(*option) 00256 { 00257 // Includes an option file 00258 case 'i':SkipArgumentString(1); 00259 option=GetNextArgument(&len); 00260 if(option[len]!=0) // white space in file name? 00261 { 00262 Error("Invalid filename for option '-f'"); 00263 Exit(); 00264 } 00265 SkipArgumentString(len); 00266 ParseOptionFile(option); 00267 return; 00268 00269 case 'v': verbose=1;SkipArgumentString(1);return; 00270 case 't': no_output=1;SkipArgumentString(1);return; 00271 00272 00273 #ifdef XDEMILL 00274 case 'b': g_bBinary=1;SkipArgumentString(1);return; 00275 #endif 00276 00277 case 'c': usestdout=1;SkipArgumentString(1);return; 00278 // case 'k': delete_inputfiles=0;SkipArgumentString(1);return; 00279 case 'd': delete_inputfiles=1;SkipArgumentString(1);return; 00280 case 'f': overwrite_files=1;SkipArgumentString(1);return; 00281 00282 #ifdef TIMING 00283 case 'T': timing=1;SkipArgumentString(1);return; 00284 #endif 00285 00286 #ifdef XMILL 00287 // Sets the memory window size 00288 case 'm':SkipArgumentString(1); 00289 option=GetNextArgument(&len); 00290 SkipArgumentString(len); 00291 memory_cutoff=atoi(option); 00292 if(memory_cutoff<1) 00293 { 00294 Error("Option '-m' must be followed be a number >=1"); 00295 Exit(); 00296 } 00297 memory_cutoff*=1024L*1024L; 00298 return; 00299 00300 // Reads a path expression 00301 case 'p': SkipArgumentString(1); 00302 option=GetNextArgument(&len); 00303 { 00304 char *ptr=option; 00305 pathexprman.AddNewVPathExpr(ptr,option+strlen(option)); 00306 // 'ptr' is moved to the characters after the path expression 00307 SkipArgumentString(ptr-option); 00308 } 00309 00310 return; 00311 00312 // Sets the left white space handling 00313 case 'l': option++; 00314 switch(*option) 00315 { 00316 case 'i': globalleftwhitespacescompress=WHITESPACE_IGNORE;SkipArgumentString(2);return; 00317 case 'g': globalleftwhitespacescompress=WHITESPACE_STOREGLOBAL;SkipArgumentString(2);return; 00318 case 't': globalleftwhitespacescompress=WHITESPACE_STORETEXT;SkipArgumentString(2);return; 00319 } 00320 break; 00321 00322 // Sets the right white space handling 00323 case 'r': option++; 00324 switch(*option) 00325 { 00326 case 'i': globalrightwhitespacescompress=WHITESPACE_IGNORE;SkipArgumentString(2);return; 00327 case 'g': globalrightwhitespacescompress=WHITESPACE_STOREGLOBAL;SkipArgumentString(2);return; 00328 case 't': globalrightwhitespacescompress=WHITESPACE_STORETEXT;SkipArgumentString(2);return; 00329 } 00330 break; 00331 00332 // Sets the full white space handling 00333 case 'w': option++; 00334 switch(*option) 00335 { 00336 case 'i': globalfullwhitespacescompress=WHITESPACE_IGNORE;SkipArgumentString(2);return; 00337 case 'g': globalfullwhitespacescompress=WHITESPACE_STOREGLOBAL;SkipArgumentString(2);return; 00338 case 't': globalfullwhitespacescompress=WHITESPACE_STORETEXT;SkipArgumentString(2);return; 00339 default: 00340 globalleftwhitespacescompress=WHITESPACE_STOREGLOBAL; 00341 globalrightwhitespacescompress=WHITESPACE_STOREGLOBAL; 00342 globalfullwhitespacescompress=WHITESPACE_STOREGLOBAL; 00343 globalattribwhitespacescompress=WHITESPACE_STOREGLOBAL; 00344 SkipArgumentString(1);return; 00345 } 00346 break; 00347 00348 // Sets the attribute white space handling 00349 case 'a': option++; 00350 switch(*option) 00351 { 00352 case 'i': globalattribwhitespacescompress=WHITESPACE_IGNORE;SkipArgumentString(2);return; 00353 case 'g': globalattribwhitespacescompress=WHITESPACE_STOREGLOBAL;SkipArgumentString(2);return; 00354 } 00355 break; 00356 00357 // Sets the handling of special sections (comment, CDATA, DOCTYPE, PI) 00358 case 'n': option++; 00359 switch(*option) 00360 { 00361 case 'c': ignore_comment=1;SkipArgumentString(2);return; 00362 case 't': ignore_doctype=1;SkipArgumentString(2);return; 00363 case 'p': ignore_pi=1;SkipArgumentString(2);return; 00364 case 'd': ignore_cdata=1;;SkipArgumentString(2);return; 00365 } 00366 break; 00367 #endif 00368 #ifdef XDEMILL 00369 // All options for output formatting 00370 00371 00372 00373 case 'o': option++; 00374 switch(*option) 00375 { 00376 case 's': // Use space indentation 00377 { 00378 SkipArgumentString(2); 00379 option=GetNextArgument(&len); 00380 SkipArgumentString(len); 00381 00382 int spccount=atoi(option); 00383 if(spccount<=0) 00384 { 00385 Error("Option '-os' must be followed be a positive integer"); 00386 Exit(); 00387 } 00388 output.Init(XMLINTENT_SPACES,0,spccount); 00389 output_initialized=1; 00390 return; 00391 } 00392 // Use tab indentation 00393 case 't': SkipArgumentString(2); 00394 output.Init(XMLINTENT_TABS); 00395 output_initialized=1; 00396 return; 00397 00398 // Use no indentation 00399 case 'n': SkipArgumentString(2); 00400 output.Init(XMLINTENT_NONE); 00401 output_initialized=1; 00402 return; 00403 00404 // Determines type of newline (DOS or UNIX) 00405 case 'd': usedosnewline=1; 00406 SkipArgumentString(2); 00407 return; 00408 case 'u': usedosnewline=0; 00409 SkipArgumentString(2); 00410 return; 00411 00412 /* 00413 case 'w': 00414 { 00415 int wrapcount; 00416 option+=2; 00417 wrapcount=atoi(option); 00418 if(wrapcount<=0) 00419 { 00420 Error("Option '-ow' must be followed be a positive integer"); 00421 Exit(); 00422 } 00423 while((*option>='0')&&(*option<='9')) 00424 option++; 00425 output.Init(XMLINTENT_WRAP,0,wrapcount); 00426 output_initialized=1; 00427 return; 00428 } 00429 */ 00430 00431 } 00432 00433 00434 #endif // XDEMILL 00435 } 00436 #ifdef XMILL 00437 // Determines the compression rate index for zlib 00438 if((*option>='1')&&(*option<='9')) 00439 { 00440 zlib_compressidx=(unsigned char)(*option-'0'); 00441 SkipArgumentString(1); 00442 return; 00443 } 00444 #endif 00445 00446 Error("Invalid option '-"); 00447 ErrorCont(option); 00448 ErrorCont("'"); 00449 Exit(); 00450 } 00451 00452 int HandleAllOptions(char **argv,int argc) 00453 // Reads all the options from 'argv'. 00454 // It returns the index of the first non-option string 00455 // i.e. the string not starting with '-' 00456 { 00457 char *option; 00458 int len; 00459 00460 InitArguments(argv,argc); 00461 00462 while((option=GetNextArgument(&len))!=NULL) 00463 { 00464 if(*option!='-') 00465 break; 00466 00467 SkipArgumentString(1); 00468 option++; 00469 00470 InterpretOptionString(option); 00471 } 00472 00473 return curargidx; 00474 } 00475 00476 //******************************************************************** 00477 //******************************************************************** 00478 00479 void PrintUsage(char showmoreoptions) 00480 { 00481 printf("XMill 0.7 (30 Nov 99) - a compressor for XML\n"); 00482 printf("Copyright (C) 1999 AT&T Labs Research\n"); 00483 00484 #ifdef XMILL 00485 00486 if(showmoreoptions==0) 00487 printf("\nUsage:\n\n xmill [-i file] [-v] [-p path] [-m num] [-1..9] [-c] [-d] [-r] [-w] [-h] file ...\n\n"); 00488 else 00489 { 00490 printf("\nUsage:\n\n xmill [-i file] [-v] [-p path] [-m num] [-1..9] [-c] [-d] [-r] [-w] [-h]\n"); 00491 printf(" [-w(i|g|t)] [-l(i|g|t)] [-r(i|g|t)] [-a(i|g)] [-n(c|t|p|d)] file ...\n\n"); 00492 } 00493 00494 printf(" -i file - include options from file\n"); 00495 printf(" -v - verbose mode\n"); 00496 printf(" -p path - define path expression\n"); 00497 printf(" -m num - set memory limit\n"); 00498 printf(" -1..9 - set the compression factor of zlib (default=6)\n"); 00499 // printf(" -t - test mode (no output)\n"); 00500 printf(" -c - write on standard output\n"); 00501 // printf(" -k - keep original files unchanged (default)\n"); 00502 printf(" -d - delete input files\n"); 00503 printf(" -f - force overwrite of output files\n"); 00504 printf(" -w - preserve white spaces\n"); 00505 printf(" -h - show extended white space options and user compressors\n"); 00506 00507 if(showmoreoptions) 00508 { 00509 printf("\n Extended options:\n\n"); 00510 printf(" -wi - ignore complete white spaces (default)\n"); 00511 printf(" -wg - store complete white spaces in global container\n"); 00512 printf(" -wt - store complete white spaces as normal text\n"); 00513 printf(" -li - ignore left white spaces (default)\n"); 00514 printf(" -lg - store left white spaces in global container\n"); 00515 printf(" -lt - store left white spaces as normal text\n"); 00516 printf(" -ri - ignore right white spaces (default)\n"); 00517 printf(" -rg - store right white spaces in global container\n"); 00518 printf(" -rt - store right white spaces as normal text\n"); 00519 printf(" -ai - ignore attribute white spaces (default)\n"); 00520 printf(" -ag - store attribute white spaces in global container\n"); 00521 printf("\n"); 00522 printf(" -nc - ignore comments\n"); 00523 printf(" -nt - ignore DOCTYPE sections\n"); 00524 printf(" -np - ignore PI sections\n"); 00525 printf(" -nd - ignore CDATA sections\n"); 00526 printf("\n"); 00527 printf("\n User compressors:\n\n"); 00528 compressman.PrintCompressorInfo(); 00529 } 00530 #endif 00531 00532 #ifdef XDEMILL 00533 printf("Usage:\n\n\t xdemill [-i file] [-v] [-c] [-d] [-r] [-os num] [-ot] [-oz] [-od] [-ou] file ...\n\n"); 00534 printf(" -i file - include options from file\n"); 00535 printf(" -v - verbose mode\n"); 00536 printf(" -c - write on standard output\n"); 00537 // printf(" -k - keep original files unchanged\n"); 00538 printf(" -d - delete input files\n"); 00539 printf(" -f - force overwrite of output files\n"); 00540 // printf(" -t - test mode (no output)\n"); 00541 printf(" -os num - output formatted XML with space intentation\n"); 00542 printf(" -ot - output formatted XML with tabular intentation\n"); 00543 printf(" -oz - output unformatted XML (without white spaces)\n"); 00544 #ifdef WIN32 00545 printf(" -od - uses DOS newline convention (default)\n"); 00546 printf(" -ou - uses UNIX newline convention\n"); 00547 #else 00548 printf(" -od - uses DOS newline convention\n"); 00549 printf(" -ou - uses UNIX newline convention (default)\n"); 00550 #endif 00551 // printf(" -ow num - wrap XML output after specified number of characters\n"); 00552 #endif 00553 }