#!/usr/bin/env perl # cppp -- Partial CPP (expands specified C preprocessor macros) # Michael Ernst # Time-stamp: <2007-09-07 12:44:28 mernst> # cppp simplifies the conditional compilation structure (#if, #ifdef, etc.) # of a C file by eliminating CPP conditional directives, based on the # values of preprocessor macros specified by the user. cppp makes use of # macro definitions only on its command line, not #define directives in the # input files. If the specified macros are insufficient to fully evaluate # a conditional, it is left as is. Furthermore, in the resulting file, # occurrences of macros are not expanded (in code, macro definitions, or # conditional compilation lines). No expansion or evaluation is ever done # in comments. # This program is similar to the unifdef Unix program. Two differences are # that unifdef only operates on #ifdef lines, whereas cppp operates on #if # lines as well, and that unifdef produces output to standard out, whereas # cppp modifies the file in place. # Call this program by specifying file names on the command line. # Files are modified in place after being backed up with extension ".nocppp". # If ".nocppp" file already exists, it may be used instead; see flags below. ## Flags: # If no .nocppp file is found, the current file is backed up to a .nocppp # version before being modified in place. # These flags control what action is taken if a .nocppp file is found: # -nocppp-overwrite Copy the current file to the .nocppp file and proceed, # exactly as if the .nocppp file had not existed. # -nocppp-skip Do no processing of the current file; leave it (and # the .nocppp file) as is. # -nocppp-use (default) Copy the .nocppp file to the current file # (overwriting the current file), then proceed. # -nocppp-die Abort. # # -preservelines # Causes the output to have the same number of lines as the input; # instead of being removed, lines are made blank. Thus, line n of the # output is either blank or the same as line n of the input. # # The following cpp flags are recognized: # # `-D NAME' # Predefine NAME as a macro, with definition `1'. # # `-D NAME=DEFINITION' # Predefine NAME as a macro, with definition DEFINITION. There are # no restrictions on the contents of DEFINITION, but if you are # invoking the preprocessor from a shell or shell-like program you # may need to use the shell's quoting syntax to protect characters # such as spaces that have a meaning in the shell syntax. If you # use more than one `-D' for the same NAME, the rightmost definition # takes effect. # # `-U NAME' # Do not predefine NAME. If both `-U' and `-D' are specified for # one name, the `-U' beats the `-D' and the name is not predefined. # # # `-A PREDICATE(ANSWER)' # Make an assertion with the predicate PREDICATE and answer ANSWER. # *Note Assertions::. # # You can use `-A-' to disable all predefined assertions; it also # undefines all predefined macros that identify the type of target # system. # Specifying -U means that you know that the macro is undefined. If you # don't know what a macro will be set to, then don't specify it at all (as # opposed to using "-U"). # Since cpp treats identifiers that are not macros as zero, "-U var" and # "-D var=0" are equivalent, if "var" appears in an expression (but not if # var appears in an #ifdef). # Example: Suppose "myfile.h" contains # before # #if (COND1 == PRESENT) # middle # #endif /** COND1 **/ # after # Then "cppp -DCOND1=7 -DPRESENT=22 myfile.h" # or "cppp -UCOND1 -DPRESENT=XXX myfile.h" makes myfile.h contain # before # after # and "cppp -DCOND1=XXX -DPRESENT=XXX myfile.h" # or "cppp -UCOND1 -DPRESENT=0 myfile.h" makes myfile.h contain # before # middle # after # and "cppp myfile.h" or "cppp -DPRESENT myfile.h" results in no change to # myfile.h. ## To do: # Check for {re,un}definition of the specified macros. # Implement -h command-line option. # Could use -i.nocppp flag to Perl, but I'm not comfortable with it. # Simplifying postpass (eg, #else #endif => #endif) might be nice. # Some users would like to have an option to use the program as a filter: # read from standard input and write to standard output. # Add an option to expand macros inside comments? ## Bugs: # * Only copes with conditional compilation; occurrences in code, macro # definitions, etc. are not expanded. # * I assume that "defined" isn't the name of a macro (can it be?). # * Character constants are always assumed to be positive, never negative # (the standard doesn't specify signedness). # * -D can only define constant-like macros, not function-like ones (true # for cpp also). # This program was solid enough to be used in the development of SCWM # (http://scwm.sourceforge.net/) in 1999, and many other projects since. ## Alternatives: # * The unifdef program, distributed with some Unix variants, is similar # to cppp, except that unifdef does not permit specifying a value for a # defined symbol and only operates on #ifdef tests. # * Emacs's hide-ifdef-mode enables the programmer to specify # preprocessor variables as explicitly defined or not defined; the mode then # presents a view of the source code corresponding to that configuration, # hiding code that is conditionally unincluded # * Other alternatives include scpp and rmifdef. require 5.003; # uses prototypes use strict; use Carp; use English; $WARNING = 1; use FindBin (); use lib "$FindBin::Bin"; use checkargs; use cline; use paren; #require Exporter; #@ISA = qw(Exporter); #@EXPORT = qw(); # Should call this somewhere sub usage ( ) { die "@_ Usage: $0 file ... -h Help. Display this usage information ENTER USAGE INFO HERE "; } # Forward declarations required by Perl 5.6 sub eval_substituted_conditional ( $ ); sub process_file ( $ ); ########################################################################### ### Constants ### ### Boolean my $true = 1; my $false = 0; my $unknown = 'unknown'; # not undef: it is a bad arg some places. my $debug_cppp = $false; # $debug_cppp = $true; my $debug_cppp_output = $false; # $debug_cppp_output = $true; ## From cline package. # $debug_cline = $true; # Lifted from em_analyze my $char_literal_contents_re = '(\\\\?.|\\\\[0-7]{3})'; my $char_literal_re = "L?'" . $char_literal_contents_re . "'"; ########################################################################### ### Variables ### my $current_file; # one of overwrite, skip, use, die. my $backup_exists_action = "use"; my $preservelines = 0; ########################################################################### ### Symbol table ### # If a symbol does not appear in this hash, we know nothing about it. # Otherwise, its value (possibly undef) in the hash gives its value. my %cpp_macros = (); # Returns one of $true, $false, or $unknown sub cpp_macro_definedness ( $ ) { my ($symbol) = check_args(1, @_); if (!exists($cpp_macros{$symbol})) { return $unknown; } # Use 0 instead of empty string for false values, to ease debugging. return defined($cpp_macros{$symbol}) ? 1 : 0; } sub is_unknown ( $ ) { my ($arg) = check_args(1, @_); return ($arg eq 'unknown'); } sub is_nonfalse ( $ ) { my ($arg) = check_args(1, @_); return (is_unknown($arg) || $arg); } sub is_false ( $ ) { my ($arg) = check_args(1, @_); return (!is_unknown($arg)) && !$arg; } sub is_true ( $ ) { my ($arg) = check_args(1, @_); return (!is_unknown($arg)) && $arg; } # Returns a textual substitution for "defined($symbol)"; result is one of # "1", "0", or "defined($symbol)" itself (if the definedness is unknown). sub eval_defined ( $$ ) { my ($symbol, $expression) = check_args(2, @_); # Invariant: $expression eq "defined($symbol)". if (!exists($cpp_macros{$symbol})) { return $expression; } elsif ($cpp_macros{$symbol}) { return "1"; } else { return "0"; } } # In the argument, convert instances of "defined(var)" to 0 or 1 if possible. sub eval_all_defined ( $ ) { my ($string) = check_args(1, @_); if ($debug_cppp) { print "eval_all_defined <= $string\n"; } $string =~ s/\bdefined\s*\(\s*(\w+)\s*\)/eval_defined($1,$MATCH)/ge; $string =~ s/\bdefined\s*(\w+)\b/eval_defined($1,$MATCH)/ge; if ($debug_cppp) { print "eval_all_defined => $string\n"; } return $string; } sub expand_macros ( $ ) { my ($string) = check_args(1, @_); my $remaining = $string; my $result = ""; # Expand macros; they're constant-like, not function-like. while ($remaining =~ /\b\w+\b/) { my $symbol = $MATCH; if (!exists($cpp_macros{$symbol})) { $result .= $PREMATCH . $symbol; $remaining = $POSTMATCH; } else { my $replacement = (defined($cpp_macros{$symbol}) ? $cpp_macros{$symbol} : "0"); # Put replacement on $remaining, not on $result, to give # nested macros a chance to fire. $result .= $PREMATCH; $remaining = $replacement . $POSTMATCH; } } $result .= $remaining; if ($debug_cppp) { print "expand_macros $string => $result\n"; } return $result; } # [[It looks like this gives the wrong answer for literals like '\n'.]] # Takes the character literal CONTENTS (i.e., no quote characters) as its # argument. sub char_literal_as_int ( $ ) { my ($char) = check_args(1, @_); if ($char =~ /^\\/) { return oct($POSTMATCH); } else { return ord($char); } } # EXPRESSION is a C expression of integer type, subject to stringent # restrictions. It may contain # # * Integer constants, which are all regarded as `long' or `unsigned # long'. # # * Character constants, which are interpreted according to the # character set and conventions of the machine and operating system # on which the preprocessor is running. The GNU C preprocessor uses # the C data type `char' for these character constants; therefore, # whether some character codes are negative is determined by the C # compiler used to compile the preprocessor. If it treats `char' as # signed, then character codes large enough to set the sign bit will # be considered negative; otherwise, no character code is considered # negative. # # * Arithmetic operators for addition, subtraction, multiplication, # division, bitwise operations, shifts, comparisons, and logical # operations (`&&' and `||'). # # * Identifiers that are not macros, which are all treated as zero(!). # # * Macro calls. All macro calls in the expression are expanded before # actual computation of the expression's value begins. # # Note that `sizeof' operators and `enum'-type values are not allowed. # `enum'-type values, like all other identifiers that are not taken as # macro calls and expanded, are treated as zero. # The argument is the conditional expression. # Each conditional evaluates to one of three values: $true, $false, or $unknown sub evaluate_conditional ( $ ) { my ($exp) = check_args(1, @_); # Do the following simplifications: # * replace defined(macro) by 1 or 0 # * expand macros # * replace character constants by numeric equivalents # This isn't quite right with respect to ordering of # expansion vs. defined, but maybe it's close enough. # It does cope with "defined(FOO)" in macro expansion. # Eliminate "defined(FOO)", for all FOO whose definedness is known. $exp = eval_all_defined($exp); # Expand constant-like (but not function-like) macros. $exp = expand_macros($exp); # Convert character, octal, and hexadecimal integer constants into decimal $exp =~ s/$char_literal_re/char_literal_as_int($1)/ge; $exp =~ s/\b0[0-9]+\b/oct($MATCH)/ge; $exp =~ s/\b0x[0-9a-f]+\b/oct($MATCH)/ge; # Strip trailing L from long integer constants $exp =~ s/\b([0-9]+)L+\b/$1/g; # Replace function-like macro calls by UNKNOWNMACRO. # This also catches "defined(FOO)". while ($exp =~ /\b\w+\s*\(/) { my $preexp = $PREMATCH . "UNKNOWNMACRO"; my $post = $POSTMATCH; my $close_paren_pos = find_close_delimiter('(', $post); if (!$close_paren_pos) { print "File: $current_file\n"; die "No close paren found to match function call in $exp"; } $exp = $preexp . substr($post, $close_paren_pos+1); } # Replace "defined FOO" by UNKNOWNMACRO $exp =~ s/\bdefined\s*(\w+)\b/UNKNOWNMACRO/g; if ($debug_cppp) { print "conditional expression: $exp\n"; } return eval_substituted_conditional($exp); } # Expression should contain only numbers, operators, and symbols (= unknown). # Each conditional evaluates to one of three values: $true, $false, or $unknown sub eval_substituted_conditional ( $ ) { my ($exp) = check_args(1, @_); if ($debug_cppp) { print "eval_substituted_conditional $exp\n"; } my $result = eval_substituted_conditional_internal($exp); if ($debug_cppp) { print "eval_substituted_conditional $exp => $result\n"; } return $result; } sub eval_substituted_conditional_internal ( $ ) { my ($exp) = check_args(1, @_); # Remove leading and trailing space. $exp =~ s/^\s+//; $exp =~ s/\s+$//; # Evaluate parenthesized expressions from the inside out. This does the # wrong thing for function calls, but the argument shouldn't contain any. { my $rparen_pos; while (-1 != ($rparen_pos = index($exp, ")"))) { my $lparen_pos = rindex($exp, "(", $rparen_pos); if ($lparen_pos == -1) { print "File: $current_file\n"; die "Right paren at $rparen_pos but no preceding left paren: $exp"; } if ($debug_cppp) { print "left paren at $lparen_pos, right paren at $rparen_pos\n"; } # my $pre = ($lparen_pos == 0) ? "" : substr($exp, 0, $lparen_pos-1); my $pre = substr($exp, 0, $lparen_pos); my $parenthesized = substr($exp, $lparen_pos+1, $rparen_pos-$lparen_pos-1); my $post = substr($exp, $rparen_pos+1); if ($debug_cppp) { if (($pre eq "") && ($post eq "")) { print "evaluating $parenthesized\n"; } else { print "evaluating $parenthesized in $pre<<<$parenthesized>>>$post\n"; } } my $paren_evalled = eval_substituted_conditional($parenthesized); $exp = $pre . (is_unknown($paren_evalled) ? "UNKNOWNMACRO" : $paren_evalled ? "1" : "0") . $post; } } # No more parentheses; attack operators in increasing precedence order if ($exp =~ /\?/) { # Conditional expressions ( ?: ) are officially forbidden, but do occur. my $guard = $PREMATCH; my $then_and_else = $POSTMATCH; my $then_pending = ""; my $question_mark_depth = 0; while ($then_and_else =~ /[\?:]/) { if (($MATCH eq ':') && ($question_mark_depth == 0)) { my $then_exp = $then_pending . $PREMATCH; my $else_exp = $POSTMATCH; my $gval = eval_substituted_conditional($guard); my $tval = eval_substituted_conditional($then_exp); my $elval = eval_substituted_conditional($else_exp); if (is_unknown($gval)) { if (is_unknown($tval) || is_unknown($elval) || ($tval ne $elval)) { return $unknown; } else { return $tval; } } else { return ($gval ? $tval : $elval); } } else { $question_mark_depth += ($MATCH eq '?') ? 1 : -1; $then_pending .= $PREMATCH . $MATCH; $then_and_else = $POSTMATCH; } } die "Insufficient : or too many ? in $exp"; } elsif ($exp =~ /\|\|/) { my $left = $PREMATCH; my $right = $POSTMATCH; my $lval = eval_substituted_conditional($left); my $rval = eval_substituted_conditional($right); if (is_true($lval) || (is_true($rval))) { return $true; } elsif (is_false($lval) && is_false($rval)) { return $false; } else { return $unknown; } } elsif ($exp =~ /\&\&/) { my $left = $PREMATCH; my $right = $POSTMATCH; my $lval = eval_substituted_conditional($left); my $rval = eval_substituted_conditional($right); if (is_false($lval) || is_false($rval)) { return $false; } elsif (is_true($lval) && is_true($rval)) { return $true; } else { return $unknown; } } elsif ($exp =~ /\|/) { my $left = $PREMATCH; my $right = $POSTMATCH; my $lval = eval_substituted_conditional($left); my $rval = eval_substituted_conditional($right); if (is_unknown($lval) || is_unknown($rval)) { return $unknown; } else { return ($lval | $rval); } } elsif ($exp =~ /\^/) { my $left = $PREMATCH; my $right = $POSTMATCH; my $lval = eval_substituted_conditional($left); my $rval = eval_substituted_conditional($right); if (is_unknown($lval) || is_unknown($rval)) { return $unknown; } else { return ($lval ^ $rval); } } elsif ($exp =~ /\&/) { my $left = $PREMATCH; my $right = $POSTMATCH; my $lval = eval_substituted_conditional($left); my $rval = eval_substituted_conditional($right); if (is_unknown($lval) || is_unknown($rval)) { return $unknown; } else { return ($lval & $rval); } } elsif ($exp =~ /[!=]=/) { my $left = $PREMATCH; my $op = $MATCH; my $right = $POSTMATCH; $left =~ s/^\s+//; $left =~ s/\s+$//; $right =~ s/^\s+//; $right =~ s/\s+$//; if (($left eq $right) && ($left !~ /UNKNOWNMACRO/)) { return $op eq "=="; } my $lval = eval_substituted_conditional($left); my $rval = eval_substituted_conditional($right); if (is_unknown($lval) || is_unknown($rval)) { return $unknown; } elsif ($op eq "==") { return ($lval == $rval); } elsif ($op eq "!=") { return ($lval != $rval); } else { print "File: $current_file\n"; die "impossible"; } } elsif ($exp =~ /\b\s*[<>]=?\s*\b/) { my $left = $PREMATCH; my $op = $MATCH; my $right = $POSTMATCH; my $lval = eval_substituted_conditional($left); $op =~ s/\s+//g; my $rval = eval_substituted_conditional($right); if (is_unknown($lval) || is_unknown($rval)) { return $unknown; } elsif ($op eq "<") { return ($lval < $rval); } elsif ($op eq ">") { return ($lval > $rval); } elsif ($op eq "<=") { return ($lval <= $rval); } elsif ($op eq ">=") { return ($lval >= $rval); } else { print "File: $current_file\n"; die "impossible"; } } elsif ($exp =~ /<<|>>/) { my $left = $PREMATCH; my $op = $MATCH; my $right = $POSTMATCH; my $lval = eval_substituted_conditional($left); my $rval = eval_substituted_conditional($right); if (is_unknown($lval) || is_unknown($rval)) { return $unknown; } elsif ($op eq "<<") { return ($lval << $rval); } elsif ($op eq ">>") { return ($lval >> $rval); } else { print "File: $current_file\n"; die "impossible"; } } # Use \b to avoid capturing unary plus and minus elsif ($exp =~ /\b\s*[-+]\s*\b/) { my $left = $PREMATCH; my $op = $MATCH; my $right = $POSTMATCH; my $lval = eval_substituted_conditional($left); $op =~ s/\s+//g; my $rval = eval_substituted_conditional($right); if (is_unknown($lval) || is_unknown($rval)) { return $unknown; } elsif ($op eq "+") { return ($lval + $rval); } elsif ($op eq "-") { return ($lval - $rval); } else { print "File: $current_file\n"; die "impossible"; } } elsif ($exp =~ /[*\/]/) { my $left = $PREMATCH; my $op = $MATCH; my $right = $POSTMATCH; my $lval = eval_substituted_conditional($left); my $rval = eval_substituted_conditional($right); if (is_unknown($lval) || is_unknown($rval)) { return $unknown; } elsif ($op eq "*") { return ($lval * $rval); } elsif ($op eq "/") { return int($lval / $rval); } else { print "File: $current_file\n"; die "impossible"; } } # Skip unary +,- elsif ($exp =~ /^[~!]/) { my $rand = $POSTMATCH; my $op = $MATCH; my $val = eval_substituted_conditional($rand); if (is_unknown($val)) { return $unknown; } elsif ($op eq "~") { return ~$val; } elsif ($op eq "!") { return !$val; } else { print "File: $current_file\n"; die "impossible"; } } elsif ($exp =~ /[~!]/) { print "File: $current_file\n"; die "Should only find [~!] at beginning of expression: $exp"; } elsif ($exp =~ /^[-+]?0[xX]?[0-9A-Fa-f]+[lL]?$/) { if ($exp =~ /^[-+]?0[xX]?[0-9A-Fa-f]{9}/) { # There were 9 hexadecimal digits. That is too many. return $unknown; } else { # Perl does not permit non-hexadecimal digits in the argument to oct. $exp =~ s/[lL]$//; return oct($exp); } } elsif ($exp =~ /^[-+]?[0-9]+$/) { return int($exp); } elsif ($exp =~ /^[_A-Za-z][_A-Za-z0-9\$]*$/) { return $unknown; } elsif ($exp =~ /^\s*\#\s*[_A-Za-z][_A-Za-z0-9\$]*$/) { # I don't think stringization is legal, nor know how to deal with it. return $unknown; } else { print "File: $current_file\n"; die "What expression? $exp"; } } # cond1 was the previous condition; cond2 is the new one sub combine_conditional ( $$ ) { my ($cond1, $cond2) = check_args(2, @_); ## This is wrong: it combines ($true, $unknown) into $true # my $result = ((is_unknown($cond1)) # ? $cond2 # : ((is_unknown($cond2)) # ? $cond1 # : ($cond1 && $cond2))); my $result = ((is_false($cond1) || is_false($cond2)) ? 0 # $false : ((is_true($cond1) && is_true($cond2)) ? $true : $unknown)); if ($debug_cppp) { print "combine_conditional($cond1, $cond2) => $result\n"; } return $result; } ########################################################################### ### Command-line arguments ### sub process_command_line_flags ( ) { check_args(0, @_); while (($#ARGV > 0) && ($ARGV[0] =~ /^-./)) { my $thisarg = shift(@ARGV); my $thisflag; if ($thisarg =~ /^-([AUD])/) { $thisflag = $1; $thisarg = $POSTMATCH; if ($thisarg eq "") { if ($#ARGV == 0) { die "No argument follows $thisarg" } $thisarg = shift(@ARGV); } } elsif ($thisarg =~ /^-nocppp-/) { $backup_exists_action = $POSTMATCH; if ($backup_exists_action !~ /^(overwrite|skip|use|die)$/) { die "Bad flag $thisarg -- use overwrite, skip, use, or die, not $backup_exists_action"; } next; } elsif ($thisarg eq "-preservelines") { $preservelines = 1; } else { die "Unrecognized command-line arg `$thisarg' should be one of -D, -U, -A"; } if ($thisflag eq "D") { my ($name,$def) = split(/=/, $thisarg, 2); if (!defined($def)) { $def = "1"; } # If it exists but isn't defined, then don't define it now, because # -U takes precedence over -D. Don't check whether it's defined, # because the rightmost -D takes precedence. if (! (exists($cpp_macros{$name}) && !defined($cpp_macros{$name}))) { $cpp_macros{$name} = $def; } } elsif ($thisflag eq "U") { $cpp_macros{$thisarg} = undef; } elsif ($thisflag eq "A") { die "-A flag not yet implemented"; } else { die "Bad flag should have been one of D, U, or A: $thisflag $thisarg"; } } } ########################################################################### ### Main routine ### process_command_line_flags(); while (scalar(@ARGV) > 0) { my $file = shift(@ARGV); if ($debug_cppp) { print "processing file $file; remaining args @ARGV\n"; } process_file($file); } exit(0); ########################################################################### ### Process one file ### # Edits argument in-place after making a backup copy. sub process_file ( $ ) { my ($file) = check_args(1, @_); $current_file = $file; open(INPUT, $file) or die "Can't read $file: $!"; close(INPUT); my $backup = $file . ".nocppp"; if (-e $backup) { if ($backup_exists_action eq "overwrite") { rename($file, $backup); } elsif ($backup_exists_action eq "skip") { return; } elsif ($backup_exists_action eq "use") { } elsif ($backup_exists_action eq "die") { die "Backup file $backup already exists."; } } else { rename($file, $backup); } open(INPUT, $backup) || die "Can't read $backup: $!"; open(OUTPUT, ">$file") or die "Can't write $file: $!"; ### ### Data structures ### ## The key data structure is a stack that indicates the value of each #if ## condition that is curently active. ## * encountering #if pushes a value onto the stack ## * encountering #else negates the value on the top of the stack ## * encountering #endif pops the stack ## * encountering #elif is a bit tricky; it can't just negate and push, ## but must change the value at the top of the stack, so that the total ## number of #endif lines issued are correct. ## ## Based on the values on the stack, each line of C code is either ## retained or discarded, and each line of cpp is retained, discarded, or ## modified. ## ## The cpp case is more complicated because of the "modified" case. ## Whether a line of cpp is retained can depend not just on enclosing ## conditions (that held when the corresponding #if was encountered) but ## also on previous #if and #elif commands. In particular, suppose that ## a "#endif" is encountered. If the corresponding "#if" has been ## eliminated, it should be removed; otherwise, it should be retained. # conditions for all #if we are inside; each is $true, $false, or $unknown my @condition_stack = (); # the AND of @condition_stack my @condition_value_stack = ($true); # the top of @condition_value_stack my $current_condition_value = $true; ## Old implementation # # set when "if unknown elif known endif", so final endif is necessary # my @elif_needs_endif = (); # set when the innermost enclosing #if/#elif sequence was all known my @if_removed = (); # set when "if true elif **** endif", so no more branches need be output my @true_elif_found = (); # Invariants: # @condition_stack, @elif_needs_endif, @true_elif_found have the same size # @condition_value_stack is one element larger than those # @condition_value_stack's first element is $true ### ### Process lines ### while ($true) { # my ($raw, $simple, $phys_lines, $ncnb_lines) = get_fulltoken_cline(\*INPUT); my ($raw, $simple) = get_fulltoken_cline(\*INPUT); if (!$raw) { last; } if ($preservelines) { chomp $raw; chomp $simple; } # For debugging. (Should this code be unconditionally executed?) if (!($current_condition_value eq $condition_value_stack[$#condition_value_stack])) { die "bad current_condition_value, condition_value_stack in $file: $current_condition_value; @condition_value_stack"; } if (scalar(@condition_stack) ne scalar(@condition_value_stack)-1) { die "bad sizes for condition_stack (", scalar(@condition_stack), "), condition_value_stack (", scalar(@condition_value_stack), ") in $file"; } # if (scalar(@condition_stack) ne scalar(@elif_needs_endif)) { # die "bad sizes for condition_stack (", scalar(@condition_stack), # "), elif_needs_endif (", scalar(@elif_needs_endif), ") in $file"; # } if (scalar(@condition_stack) ne scalar(@if_removed)) { die "bad sizes for condition_stack (", scalar(@condition_stack), "), if_removed (", scalar(@if_removed), ") in $file"; } if (scalar(@condition_stack) ne scalar(@true_elif_found)) { die "bad sizes for condition_stack (", scalar(@condition_stack), "), true_elif_found (", scalar(@true_elif_found), ") in $file"; } if ($simple =~ /^\s*\#\s*(if(n?def)?|el(se|if)|endif)\b/) { $simple =~ s/^\s*\#\s*//; if ($debug_cppp) { print "simple preprocessor line: $simple"; } # ends in newline if ((scalar(@condition_stack) == 0) && ($simple =~ /^(el(se|if)|endif)\b/)) { # This is an extra directive, perhaps an error or a file # fragment. Just pass it through. if (!is_true($current_condition_value)) { die "empty condition_stack but non-true current_condition_value $current_condition_value in file $file"; } print OUTPUT $raw; } elsif ($simple =~ /^endif\b/) { ## "#endif": pop the condition stack my $thisvalue = pop(@condition_stack); pop(@condition_value_stack); # my $needs_endif = pop(@elif_needs_endif); my $needs_endif = ! pop(@if_removed); pop(@true_elif_found); $current_condition_value = $condition_value_stack[$#condition_value_stack]; if (($needs_endif || is_unknown($thisvalue)) && is_nonfalse($current_condition_value)) { print OUTPUT $raw; } } elsif ($simple =~ /^else\b/) { ## "#else": negate the value on the top of the condition stack # $thisvalue is before negation my $thisvalue = $condition_stack[$#condition_stack]; pop(@condition_value_stack); $current_condition_value = $condition_value_stack[$#condition_value_stack]; if ($debug_cppp) { print "Considering else: thisvalue=`$thisvalue', current_condition_value=`$current_condition_value'\n"; } # my $needs_endif = $elif_needs_endif[$#elif_needs_endif]; # my $needs_endif = ! $if_removed[$#if_removed]; if ((! $if_removed[$#if_removed]) && (! $true_elif_found[$#true_elif_found]) # && is_unknown($thisvalue) && is_nonfalse($current_condition_value)) { print OUTPUT $raw; } if ($true_elif_found[$#true_elif_found]) { $thisvalue = $false; } elsif (!is_unknown($thisvalue)) { $thisvalue = !$thisvalue; } $condition_stack[$#condition_stack] = $thisvalue; $current_condition_value = combine_conditional($current_condition_value, $thisvalue); push(@condition_value_stack, $current_condition_value); if ($debug_cppp) { print "After else:\n"; print " condition_stack = @condition_stack\n"; print " condition_value_stack = @condition_value_stack\n"; } } elsif ($simple =~ /^elif\b/) { ## Elif: side-effect the condition stack my $ifarg = $POSTMATCH; $ifarg =~ s/^\s+//; $ifarg =~ s/\s+$//; my $oldvalue = $condition_stack[$#condition_stack]; my $thisvalue = evaluate_conditional($ifarg); if ($true_elif_found[$#true_elif_found]) { $thisvalue = $false; } elsif (is_true($thisvalue)) { $true_elif_found[$#true_elif_found] = $true; } # my $old_current_condition_value = $current_condition_value; my $prev_current_condition_value = $condition_value_stack[$#condition_value_stack - 1]; # Replace the top of condition_stack according to: # if true elif **** => if false # if false elif **** => if **** # if unknown elif **** => if **** # (and recompute condition_value_stack). # If **** = false, this omits the #endif # if unknown elif **** => if **** # but this includes extraneous text # if unknown elif **** => if unknown # so we need separate conditions for the text and the endif. # That is what $if_removed ($elif_needs_endif) does. # It is wrong to just do # if true elif **** => if false # because then in # if true elif **** else XXX endif # the XXX gets output, because it's in the else of an "if false". # So remember if we are in a false branch (regardless # of what the current value is). # my $old_if_removed = $if_removed[$#if_removed] { $condition_stack[$#condition_stack] = is_true($oldvalue) ? $false : $thisvalue; # $elif_needs_endif[$#elif_needs_endif] |= is_unknown($oldvalue); # $if_removed[$#if_removed] &= ! is_unknown($oldvalue); pop(@condition_value_stack); $current_condition_value = combine_conditional($condition_value_stack[$#condition_value_stack], $condition_stack[$#condition_stack]); push(@condition_value_stack, $current_condition_value); } if ($debug_cppp) { my $simple_nonewline = $simple; chomp($simple_nonewline); print "After simple elif: $simple_nonewline:\n"; print " condition_stack = @condition_stack\n"; print " condition_value_stack = @condition_value_stack\n"; } # If doing output, use the following instead of the elif line: # if true elif **** => no output # if false elif true => no output # if false elif false => no output # if false elif => if # if unknown elif true => else # if unknown elif false => no output # if unknown elif => elif # We do output approximately if we did for the #if line: # (is_unknown($thisvalue) && is_nonfalse($old_condition_value)) if ($debug_cppp_output) { print "in elif, pccv=$prev_current_condition_value, oldv=$oldvalue, thisv=$thisvalue for $raw"; } if (is_nonfalse($prev_current_condition_value)) { # We might perform output for this line. if ($debug_cppp_output) { print "We might perform output for this line.\n"; } if ($if_removed[$#if_removed]) { if (!$oldvalue) { if (is_unknown($thisvalue)) { # if (! $elif_needs_endif[$#elif_needs_endif]) { if ($if_removed[$#if_removed]) { my $output_raw = $raw; chop $output_raw; # remove newline; $output_raw =~ s/^(\s*\#\s*)el/$1/; # change elif to if $if_removed[$#if_removed] = $false; print OUTPUT "$output_raw\t/* cppp replaced: elif */\n"; } else { print OUTPUT $raw; } } } } else { if (is_unknown($thisvalue)) { print OUTPUT $raw; } elsif ($thisvalue) { my $output_raw = $raw; chop $output_raw; # remove newline # change "*/" to "* ", as we'll embed this in a comment $output_raw =~ s/\*\//* /; # retain same indentation $output_raw =~ /^(\s*\#\s*)/; my $indentation = $1; print OUTPUT "${indentation}else\t/* cppp replaced: $output_raw */\n"; } } } } elsif ($simple =~ /^if(n?def)?\b/) { ## "#if": push on the condition stack my $iftype = $1; # Value is undef or "def" or "ndef". my $ifarg = $POSTMATCH; $ifarg =~ s/^\s+//; $ifarg =~ s/\s+$//; my $thisvalue; if (defined($iftype)) # ie, $iftype != "" { # ifdef or ifndef, not plain if my $is_ndef = ($iftype eq "ndef"); if ($ifarg =~ /\s/) { print "File: $current_file\n"; die "Embedded space in if$iftype: $simple"; } my $definedness = cpp_macro_definedness($ifarg); $thisvalue = is_unknown($definedness) ? $unknown : $is_ndef ? !$definedness : $definedness; } else { $thisvalue = evaluate_conditional($ifarg); } my $old_condition_value = $current_condition_value; $current_condition_value = combine_conditional($current_condition_value, $thisvalue); push(@condition_stack, $thisvalue); push(@condition_value_stack, $current_condition_value); # push(@elif_needs_endif, $false); push(@if_removed, $false); push(@true_elif_found, is_true($thisvalue)); if ($debug_cppp) { print "After ifarg $ifarg:\n"; print " condition_stack = @condition_stack\n"; print " condition_value_stack = @condition_value_stack\n"; } if (is_unknown($thisvalue) && is_nonfalse($old_condition_value)) { print OUTPUT $raw; } else { $if_removed[$#if_removed] = $true; } } else { print "File: $current_file\n"; die "What cpp conditional directive? $simple"; } } else { # Not a cpp conditional directive if ($debug_cppp_output) { print "non-cpp-line (ccv=$current_condition_value): $raw"; } if (is_nonfalse($current_condition_value)) { print OUTPUT $raw; } } if ($preservelines) { print OUTPUT "\n"; } } close(INPUT); close(OUTPUT); }