00001 /* 00002 This product contains certain software code or other information 00003 ("AT&T Software") proprietary to AT&T Corp. ("AT&T"). The AT&T 00004 Software is provided to you "AS IS". YOU ASSUME TOTAL RESPONSIBILITY 00005 AND RISK FOR USE OF THE AT&T SOFTWARE. AT&T DOES NOT MAKE, AND 00006 EXPRESSLY DISCLAIMS, ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND 00007 WHATSOEVER, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 00008 MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, WARRANTIES OF 00009 TITLE OR NON-INFRINGEMENT OF ANY INTELLECTUAL PROPERTY RIGHTS, ANY 00010 WARRANTIES ARISING BY USAGE OF TRADE, COURSE OF DEALING OR COURSE OF 00011 PERFORMANCE, OR ANY WARRANTY THAT THE AT&T SOFTWARE IS "ERROR FREE" OR 00012 WILL MEET YOUR REQUIREMENTS. 00013 00014 Unless you accept a license to use the AT&T Software, you shall not 00015 reverse compile, disassemble or otherwise reverse engineer this 00016 product to ascertain the source code for any AT&T Software. 00017 00018 (c) AT&T Corp. All rights reserved. AT&T is a registered trademark of AT&T Corp. 00019 00020 *********************************************************************** 00021 00022 History: 00023 00024 24/11/99 - initial release by Hartmut Liefke, liefke@seas.upenn.edu 00025 Dan Suciu, suciu@research.att.com 00026 */ 00027 00028 //*********************************************************************** 00029 //*********************************************************************** 00030 00031 // This module contains the handling of options specified at the command line 00032 00033 #include <stdio.h> 00034 #include <stdlib.h> 00035 00036 #include "Types.hpp" 00037 #include "Input.hpp" 00038 #include "VPathExprMan.hpp" 00039 00040 #ifdef XDEMILL 00041 #include "XMLOutput.hpp" 00042 #endif 00043 00044 // Determines whether CR/LF (dos) or just LF (unix) should be used 00045 // if the XML is printed using formatting 00046 #ifdef WIN32 00047 char usedosnewline=1; 00048 #else 00049 char usedosnewline=0; 00050 #endif 00051 00052 // The following flags determine how white spaces should be stored 00053 char globalfullwhitespacescompress =WHITESPACE_IGNORE; 00054 00055 extern VPathExprMan pathexprman; // The path manager 00056 00057 // The memory limit for the compressor 00058 // For the decompressor, it contains a size of the buffer needed to decompress 00059 // the header 00060 unsigned long memory_cutoff=8L*1024L*1024L; 00061 00062 // Determines whether to keep the input file 00063 char delete_inputfiles=0; 00064 00065 char overwrite_files=0; // Is 1, if the user wants to overwrite all files 00066 char skip_all_files=0; // Is 1, if the user want to skip all remaining files 00067 00068 //********** Flags for compression ************ 00069 #ifdef XMILL 00070 // Describe the handling of left, right, and attribute white spaces 00071 char globalleftwhitespacescompress =WHITESPACE_IGNORE; 00072 char globalrightwhitespacescompress =WHITESPACE_IGNORE; 00073 char globalattribwhitespacescompress =WHITESPACE_IGNORE; 00074 00075 // Flags for ignoring comment, CDATA, DOCTYPE, and PI sections 00076 char ignore_comment=0; 00077 char ignore_cdata=0; 00078 char ignore_doctype=0; 00079 char ignore_pi=0; 00080 00081 // The compression ratio index for the zlib library 00082 unsigned char zlib_compressidx=6; 00083 00084 #endif 00085 00086 #ifdef XDEMILL 00087 extern XMLOutput output; 00088 #endif 00089 00090 // *********** Common flags 00091 char no_output=0; // No output 00092 char usestdout=0; // Use the standard output 00093 char verbose=0; // Verbose mode 00094 char output_initialized=0; // output has been initalized 00095 00096 00097 #ifdef TIMING 00098 char timing=0; // Do timing 00099 #endif 00100 00101 //********************************************************** 00102 //********************************************************** 00103 00104 // Several auxiliary function for managing 00105 00106 // We keep a global state while traversing the sequence of options 00107 char **argstrings; // The set of option strings 00108 int argnum, // The number of option stringd 00109 curargidx; // The index of the current option string 00110 char *curargptr; // The current pointer into the current option string 00111 00112 char *optfiledata; // If we loaded a file, then this pointer refers to the file data 00113 char *curfileptr; // The current pointer in the file 00114 00115 inline void InitArguments(char **argv,int argc) 00116 // Initializes the option reader state above 00117 { 00118 argstrings=argv; 00119 argnum=argc; 00120 00121 curargidx=0; 00122 curargptr=argstrings[curargidx]; 00123 00124 optfiledata=NULL; 00125 curfileptr=NULL; 00126 } 00127 00128 inline char *GetNextArgument(int *len) 00129 // Reads the next argument using the option reader state 00130 // and stores the (maximal) length (to the next white space) in *len. 00131 // The actual length of an option can be smaller 00132 { 00133 do 00134 { 00135 // We don't have a file loaded 00136 if(curfileptr==NULL) 00137 { 00138 // Let's skip white spaces 00139 while((*curargptr==' ')||(*curargptr=='\t')||(*curargptr=='\n')||(*curargptr=='\r')) 00140 curargptr++; 00141 00142 // If we have some actual option strings left, then scan up to the 00143 // next white space 00144 if(*curargptr!=0) 00145 { 00146 char *endptr=curargptr+1; 00147 while((*endptr!=0)&& 00148 (*endptr!=' ')&&(*endptr!='\t')&& 00149 (*endptr!='\n')&&(*endptr!='\r')) 00150 endptr++; 00151 00152 *len=(endptr-curargptr); 00153 return curargptr; 00154 } 00155 00156 // We didn't find an option string ==> Go to next string 00157 curargidx++; 00158 if(curargidx==argnum) // No more string? => Exit 00159 return NULL; 00160 00161 curargptr=argstrings[curargidx]; 00162 } 00163 else // If we have a file, then we do something very similar 00164 { 00165 // Let's skip white spaces 00166 while((*curfileptr==' ')||(*curfileptr=='\t')||(*curfileptr=='\n')||(*curfileptr=='\r')) 00167 curfileptr++; 00168 00169 if(*curfileptr!=0) 00170 return curfileptr; 00171 00172 // Let's get rid of the file data 00173 // delete[] optfiledata; 00174 // We keep the file, since in VPathExprMan, the pathstrings 00175 // are direct pointers into the memory of the file data 00176 // This is a bad solution! 00177 00178 curfileptr=NULL; 00179 curfileptr=NULL; 00180 } 00181 } 00182 while(1); 00183 } 00184 00185 inline void SkipArgumentString(int len) 00186 // After parsing an option, this function is called to go to string 00187 // data after the option 00188 { 00189 if(curfileptr!=NULL) 00190 curfileptr+=len; 00191 else 00192 curargptr+=len; 00193 } 00194 00195 //********************************************************************* 00196 00197 inline void ParseOptionFile(char *filename) 00198 // Loads the option file 00199 { 00200 Input input; 00201 char *ptr; 00202 int len; 00203 00204 // We exit, if there is already an option file 00205 if(optfiledata!=NULL) 00206 { 00207 Error("Only one option file allowed!"); 00208 Exit(); 00209 } 00210 00211 if(input.OpenFile(filename)==0) 00212 { 00213 Error("Could not open parameter file '"); 00214 ErrorCont(filename); 00215 ErrorCont("'!"); 00216 Exit(); 00217 } 00218 00219 len=input.GetCurBlockPtr(&ptr); 00220 00221 if(len>30000) // Just to make sure that the file fits into one 00222 // single buffer -- i.e. we don't read another block 00223 { 00224 Error("Input file '"); 00225 ErrorCont(filename); 00226 ErrorCont("' is too large!"); 00227 Exit(); 00228 } 00229 00230 // Let's allocate and copy the data 00231 optfiledata=new char[len+1]; 00232 memcpy(optfiledata,ptr,len); 00233 optfiledata[len]=0; 00234 00235 curfileptr=optfiledata; 00236 00237 input.CloseFile(); 00238 } 00239 00240 //********************************************************************* 00241 00242 void InterpretOptionString(char *option) 00243 // Interprets a specific option 00244 { 00245 int len; 00246 00247 switch(*option) 00248 { 00249 // Includes an option file 00250 case 'i':SkipArgumentString(1); 00251 option=GetNextArgument(&len); 00252 if(option[len]!=0) // white space in file name? 00253 { 00254 Error("Invalid filename for option '-f'"); 00255 Exit(); 00256 } 00257 SkipArgumentString(len); 00258 ParseOptionFile(option); 00259 return; 00260 00261 case 'v': verbose=1;SkipArgumentString(1);return; 00262 case 't': no_output=1;SkipArgumentString(1);return; 00263 case 'c': usestdout=1;SkipArgumentString(1);return; 00264 // case 'k': delete_inputfiles=0;SkipArgumentString(1);return; 00265 case 'd': delete_inputfiles=1;SkipArgumentString(1);return; 00266 case 'f': overwrite_files=1;SkipArgumentString(1);return; 00267 #ifdef TIMING 00268 case 'T': timing=1;SkipArgumentString(1);return; 00269 #endif 00270 00271 #ifdef XMILL 00272 // Sets the memory window size 00273 case 'm':SkipArgumentString(1); 00274 option=GetNextArgument(&len); 00275 SkipArgumentString(len); 00276 memory_cutoff=atoi(option); 00277 if(memory_cutoff<1) 00278 { 00279 Error("Option '-m' must be followed be a number >=1"); 00280 Exit(); 00281 } 00282 memory_cutoff*=1024L*1024L; 00283 return; 00284 00285 // Reads a path expression 00286 case 'p': SkipArgumentString(1); 00287 option=GetNextArgument(&len); 00288 { 00289 char *ptr=option; 00290 pathexprman.AddNewVPathExpr(ptr,option+strlen(option)); 00291 // 'ptr' is moved to the characters after the path expression 00292 SkipArgumentString(ptr-option); 00293 } 00294 00295 return; 00296 00297 // Sets the left white space handling 00298 case 'l': option++; 00299 switch(*option) 00300 { 00301 case 'i': globalleftwhitespacescompress=WHITESPACE_IGNORE;SkipArgumentString(2);return; 00302 case 'g': globalleftwhitespacescompress=WHITESPACE_STOREGLOBAL;SkipArgumentString(2);return; 00303 case 't': globalleftwhitespacescompress=WHITESPACE_STORETEXT;SkipArgumentString(2);return; 00304 } 00305 break; 00306 00307 // Sets the right white space handling 00308 case 'r': option++; 00309 switch(*option) 00310 { 00311 case 'i': globalrightwhitespacescompress=WHITESPACE_IGNORE;SkipArgumentString(2);return; 00312 case 'g': globalrightwhitespacescompress=WHITESPACE_STOREGLOBAL;SkipArgumentString(2);return; 00313 case 't': globalrightwhitespacescompress=WHITESPACE_STORETEXT;SkipArgumentString(2);return; 00314 } 00315 break; 00316 00317 // Sets the full white space handling 00318 case 'w': option++; 00319 switch(*option) 00320 { 00321 case 'i': globalfullwhitespacescompress=WHITESPACE_IGNORE;SkipArgumentString(2);return; 00322 case 'g': globalfullwhitespacescompress=WHITESPACE_STOREGLOBAL;SkipArgumentString(2);return; 00323 case 't': globalfullwhitespacescompress=WHITESPACE_STORETEXT;SkipArgumentString(2);return; 00324 default: 00325 globalleftwhitespacescompress=WHITESPACE_STOREGLOBAL; 00326 globalrightwhitespacescompress=WHITESPACE_STOREGLOBAL; 00327 globalfullwhitespacescompress=WHITESPACE_STOREGLOBAL; 00328 globalattribwhitespacescompress=WHITESPACE_STOREGLOBAL; 00329 SkipArgumentString(1);return; 00330 } 00331 break; 00332 00333 // Sets the attribute white space handling 00334 case 'a': option++; 00335 switch(*option) 00336 { 00337 case 'i': globalattribwhitespacescompress=WHITESPACE_IGNORE;SkipArgumentString(2);return; 00338 case 'g': globalattribwhitespacescompress=WHITESPACE_STOREGLOBAL;SkipArgumentString(2);return; 00339 } 00340 break; 00341 00342 // Sets the handling of special sections (comment, CDATA, DOCTYPE, PI) 00343 case 'n': option++; 00344 switch(*option) 00345 { 00346 case 'c': ignore_comment=1;SkipArgumentString(2);return; 00347 case 't': ignore_doctype=1;SkipArgumentString(2);return; 00348 case 'p': ignore_pi=1;SkipArgumentString(2);return; 00349 case 'd': ignore_cdata=1;;SkipArgumentString(2);return; 00350 } 00351 break; 00352 #endif 00353 #ifdef XDEMILL 00354 // All options for output formatting 00355 case 'o': option++; 00356 switch(*option) 00357 { 00358 case 's': // Use space indentation 00359 { 00360 SkipArgumentString(2); 00361 option=GetNextArgument(&len); 00362 SkipArgumentString(len); 00363 00364 int spccount=atoi(option); 00365 if(spccount<=0) 00366 { 00367 Error("Option '-os' must be followed be a positive integer"); 00368 Exit(); 00369 } 00370 output.Init(XMLINTENT_SPACES,0,spccount); 00371 output_initialized=1; 00372 return; 00373 } 00374 // Use tab indentation 00375 case 't': SkipArgumentString(2); 00376 output.Init(XMLINTENT_TABS); 00377 output_initialized=1; 00378 return; 00379 00380 // Use no indentation 00381 case 'n': SkipArgumentString(2); 00382 output.Init(XMLINTENT_NONE); 00383 output_initialized=1; 00384 return; 00385 00386 // Determines type of newline (DOS or UNIX) 00387 case 'd': usedosnewline=1; 00388 SkipArgumentString(2); 00389 return; 00390 case 'u': usedosnewline=0; 00391 SkipArgumentString(2); 00392 return; 00393 /* 00394 case 'w': 00395 { 00396 int wrapcount; 00397 option+=2; 00398 wrapcount=atoi(option); 00399 if(wrapcount<=0) 00400 { 00401 Error("Option '-ow' must be followed be a positive integer"); 00402 Exit(); 00403 } 00404 while((*option>='0')&&(*option<='9')) 00405 option++; 00406 output.Init(XMLINTENT_WRAP,0,wrapcount); 00407 output_initialized=1; 00408 return; 00409 } 00410 */ 00411 } 00412 #endif // XDEMILL 00413 } 00414 #ifdef XMILL 00415 // Determines the compression rate index for zlib 00416 if((*option>='1')&&(*option<='9')) 00417 { 00418 zlib_compressidx=(unsigned char)(*option-'0'); 00419 SkipArgumentString(1); 00420 return; 00421 } 00422 #endif 00423 00424 Error("Invalid option '-"); 00425 ErrorCont(option); 00426 ErrorCont("'"); 00427 Exit(); 00428 } 00429 00430 int HandleAllOptions(char **argv,int argc) 00431 // Reads all the options from 'argv'. 00432 // It returns the index of the first non-option string 00433 // i.e. the string not starting with '-' 00434 { 00435 char *option; 00436 int len; 00437 00438 InitArguments(argv,argc); 00439 00440 while((option=GetNextArgument(&len))!=NULL) 00441 { 00442 if(*option!='-') 00443 break; 00444 00445 SkipArgumentString(1); 00446 option++; 00447 00448 InterpretOptionString(option); 00449 } 00450 00451 return curargidx; 00452 } 00453 00454 //******************************************************************** 00455 //******************************************************************** 00456 00457 void PrintUsage(char showmoreoptions) 00458 { 00459 printf("XMill 0.7 (30 Nov 99) - a compressor for XML\n"); 00460 printf("Copyright (C) 1999 AT&T Labs Research\n"); 00461 00462 #ifdef XMILL 00463 00464 if(showmoreoptions==0) 00465 printf("\nUsage:\n\n xmill [-i file] [-v] [-p path] [-m num] [-1..9] [-c] [-d] [-r] [-w] [-h] file ...\n\n"); 00466 else 00467 { 00468 printf("\nUsage:\n\n xmill [-i file] [-v] [-p path] [-m num] [-1..9] [-c] [-d] [-r] [-w] [-h]\n"); 00469 printf(" [-w(i|g|t)] [-l(i|g|t)] [-r(i|g|t)] [-a(i|g)] [-n(c|t|p|d)] file ...\n\n"); 00470 } 00471 00472 printf(" -i file - include options from file\n"); 00473 printf(" -v - verbose mode\n"); 00474 printf(" -p path - define path expression\n"); 00475 printf(" -m num - set memory limit\n"); 00476 printf(" -1..9 - set the compression factor of zlib (default=6)\n"); 00477 // printf(" -t - test mode (no output)\n"); 00478 printf(" -c - write on standard output\n"); 00479 // printf(" -k - keep original files unchanged (default)\n"); 00480 printf(" -d - delete input files\n"); 00481 printf(" -f - force overwrite of output files\n"); 00482 printf(" -w - preserve white spaces\n"); 00483 printf(" -h - show extended white space options and user compressors\n"); 00484 00485 if(showmoreoptions) 00486 { 00487 printf("\n Extended options:\n\n"); 00488 printf(" -wi - ignore complete white spaces (default)\n"); 00489 printf(" -wg - store complete white spaces in global container\n"); 00490 printf(" -wt - store complete white spaces as normal text\n"); 00491 printf(" -li - ignore left white spaces (default)\n"); 00492 printf(" -lg - store left white spaces in global container\n"); 00493 printf(" -lt - store left white spaces as normal text\n"); 00494 printf(" -ri - ignore right white spaces (default)\n"); 00495 printf(" -rg - store right white spaces in global container\n"); 00496 printf(" -rt - store right white spaces as normal text\n"); 00497 printf(" -ai - ignore attribute white spaces (default)\n"); 00498 printf(" -ag - store attribute white spaces in global container\n"); 00499 printf("\n"); 00500 printf(" -nc - ignore comments\n"); 00501 printf(" -nt - ignore DOCTYPE sections\n"); 00502 printf(" -np - ignore PI sections\n"); 00503 printf(" -nd - ignore CDATA sections\n"); 00504 printf("\n"); 00505 printf("\n User compressors:\n\n"); 00506 compressman.PrintCompressorInfo(); 00507 } 00508 #endif 00509 00510 #ifdef XDEMILL 00511 printf("Usage:\n\n\t xdemill [-i file] [-v] [-c] [-d] [-r] [-os num] [-ot] [-oz] [-od] [-ou] file ...\n\n"); 00512 printf(" -i file - include options from file\n"); 00513 printf(" -v - verbose mode\n"); 00514 printf(" -c - write on standard output\n"); 00515 // printf(" -k - keep original files unchanged\n"); 00516 printf(" -d - delete input files\n"); 00517 printf(" -f - force overwrite of output files\n"); 00518 // printf(" -t - test mode (no output)\n"); 00519 printf(" -os num - output formatted XML with space intentation\n"); 00520 printf(" -ot - output formatted XML with tabular intentation\n"); 00521 printf(" -oz - output unformatted XML (without white spaces)\n"); 00522 #ifdef WIN32 00523 printf(" -od - uses DOS newline convention (default)\n"); 00524 printf(" -ou - uses UNIX newline convention\n"); 00525 #else 00526 printf(" -od - uses DOS newline convention\n"); 00527 printf(" -ou - uses UNIX newline convention (default)\n"); 00528 #endif 00529 // printf(" -ow num - wrap XML output after specified number of characters\n"); 00530 #endif 00531 }