#!/usr/sbin/perl -s # This "munge" program summarizes referee reports for conferences. # Author: Original author unknown. # Revised by Peter Lee . # Sustantially revised 11/94 by Michael Ernst : # fix sort bug; relax input file format; add unconfident, triage, full flags; # rewrite documentation; abstract print_summary_line; don't make user set # $num_papers; more. ########################################################################### ### Invoking the program ### # The program can be invoked from the Unix command line as follows: # # % munge [action] [modifiers] file1 file2 ... # Select an action from the following list. # # If no action is specified, check referee reports for # syntactic correctness. See below for the input file format. # -summary List, for each paper, its weighted score, the paper's # reviewers, and the scores they assigned. # -controversial List only papers that have widely disparate reviews, # controlled by the $controversy_limit variable. # -unconfident List papers with no reviews of high confidence, # controlled by the $confidence_limit variable. # -sort List papers in decreasing order of weighted score. # -triage Separate the output into five groups from definite accept # to definite reject. This can be a better way to split the # papers than simply sorting, when some are controversial. # -bypaper Extract non-private information for each paper into a file # whose name is the paper number. These files can be used # by the send-letter script. # The modifiers, which may be used with any of the above, are: # # -latex Produce output in LaTeX format. # -full Also output the full reports, after the summary. Page breaks # separate the reports of paper N from those of paper N+1. ########################################################################### ### File format ### # Each file may contain any number of reports, and the first reports may # be preceded by any amount of junk. (This makes it easy to simply save # email messages containing a report at the end.) # # When munge finds a syntax error, it aborts processing and gives an # error message giving an approximate line number and paper number. The # error reporting is not exact, however, so you might have to do some # searching around to find the exact source of the error. # # This program expects reports one of the following formats (non-fixed parts # of a report are indicated in angle brackets): # # * # Paper number: # Title: # Authors: # PC member: # Reviewer: # Overall score: # Confidence: # Justification, comments, and recommendations for authors: # Comments for PC only (not for authors): # # or # # * # Paper #: # Title: # Authors: # Overall Evaluation: # Confidence: # PC Member Name: # Referee Name: # Justification: # Comments for Authors: # Comments to PC: # # Each report entry must start in column one. The "Referee Name" and # "Comments to PC" entries may be blank, but the entry keywords must still # be present. It is necessary to have either a "Justification" or # "Comments for Authors" field, but one may be left out if desired. Most # importantly, the asterisk must be present to separate reports from each # other. ########################################################################### ### Bugs ### # Bugs: # * A blank "PC Member:" field isn't flagged as an error. # * When there is no space between "Paper number:" and the number, # sometimes fields are lost. # * Blank lines at the end of the file should not be interpreted as a # review missing fields; adjacent report delimiters should be permitted. # * It should be possible to signal the end of the file and to include # other information after it, like that before the first report. # Desired features: # * Permit processing only of papers with specified numbers. # * Check that title and authors are consistent across reviews. ########################################################################### ### User variables ### # The following constants should be set according to your particular needs. $report_delimiter = '\*'; $controversy_limit = 4; $confidence_limit = 2; ########################################################################### ### Code ### $num_papers = 0; # Make arrays 1-based rather than 0-based. $[ = 1; ### ### Read the files of reports ### # open the first report file. $file = shift; # The main loop while ($file) { open(REPORT, $file); # Skip ahead to the beginning of the first report in the file. while () { if (/^$report_delimiter[ \t]*$/) { last; } } if (eof(REPORT)) { print STDERR "No reports found in file $file\n"; exit 1; } # Now, process each report in the file, recording the information for # later analysis. readall: while (!eof(REPORT)) { # Get the paper number of the next report. # Note that we assume that the paper number appears before any other field. while () { if (/^Paper *(\#|number):/i) { s/^Paper *(\#|number):[ \t]*([0-9]+)/$2/i; $paper = int($_); if ($paper < 1) { print STDERR "Bad paper number in file $file on line $.: $paper\n"; exit 1; } else { $num_papers = $paper if $paper > $num_papers; } if (! $count[$paper]) { $max[$paper] = 0; $min[$paper] = 10; $maxconf[$paper] = 0; } $count[$paper]++; # Increment the count of reviews for this paper last; } if (/^[ \t]*$/) { next; } # Skip blank lines, but nothing else # Die if paper number not found. if (/.*/) { print STDERR "Missing paper number in file $file, near line $.\n"; exit 1; } } if (eof(REPORT)) { print STDERR "Missing paper number in file $file, near line $.\n"; exit 1; } # With the paper number in hand, read all of the other fields. $score_found = 0; $confidence_found = 0; $pc_found = 0; $referee_found = 0; $comments_found = 0; $private_found = 0; $title_found = 0; $authors_found = 0; while () { # Data fields if (/^Overall *(evaluation|score):/i) { $score_found = 1; s/^Overall *(evaluation|score):[ \t]*([0-9]+)/$2/i; if ($_ > 10) { print STDERR "Bad score in file $file on line $.: $_\n"; exit 1; } $score{$paper,$count[$paper]} = int($_); $max[$paper] = int($_) if int($_) > $max[$paper]; $min[$paper] = int($_) if int($_) < $min[$paper]; } if (/^Confidence:/i) { $confidence_found = 1; s/^Confidence:[ \t]*([0-9]+)/$1/i; if ($_ < 0 || $_ > 4) { print STDERR "Bad confidence score in file $file on line $.: $_\n"; exit 1; } $confidence{$paper,$count[$paper]} = int($_); $maxconf[$paper] = int($_) if int($_) > $maxconf[$paper]; } if (/^PC *Member( *Name)?:/i) { $pc_found = 1; # Attempt to get last name s/^PC *Member( *Name)?:.*[ \t]+([a-zA-Z\-]+)\n/$2/i; $pc{$paper,$count[$paper]} = $_; } # Check for blank line after reviewer; what is going on here? if (/^(Referee *Name|Reviewer):[ \t]*$/i) { $referee_found = 1; next; } # This intermittently fails if there is no space after the colon. if (/^(Referee *Name|Reviewer):.*[ \t]+[a-zA-Z]+/i) { $referee_found = 1; s/^(Referee *Name|Reviewer):[ \t]*([a-zA-Z \.\-]+)\n/$2/i; $referee{$paper,$count[$paper]} = $_; } # Text fields. if (/^Justification:/i || /^Comments.*Authors:/i || /^Justification.*authors:/i) { $comments_found = 1; $comments{$paper,$count[$paper]} = $_; while () { if (/^Comments.*PC(:| only)/i || /^$report_delimiter[ \t]*$/) { last; } $comments{$paper,$count[$paper]} .= $_; } } if (/^Comments.*PC(:| only)/i) { $private_found = 1; $private{$paper,$count[$paper]} = $_; while () { if (/^$report_delimiter[ \t]*$/) { last; } $private{$paper,$count[$paper]} .= $_; } } if (/^Title:/i) { $title_found = 1; s/^Title:(.*)\n/$1/i; $title[$paper] = $_; } if (/^Authors:/i || /^Author:/i) { $authors_found = 1; s/^Author.*:(.*)\n/$1/i; $authors[$paper] = $_; } # End of report marker. if (/^$report_delimiter[ \t]*$/) { if ($score_found && $confidence_found && $pc_found && $referee_found && $comments_found && $private_found && $title_found && $authors_found) { next readall; } else { if (!$score_found) { print STDERR "Missing score\n" } if (!$confidence_found) { print STDERR "Missing confidence\n" } if (!$pc_found) { print STDERR "Missing pc\n" } if (!$referee_found) { print STDERR "Missing referee\n" } if (!$comments_found) { print STDERR "Missing comments\n" } if (!$private_found) { print STDERR "Missing private\n" } if (!$title_found) { print STDERR "Missing title\n" } if (!$authors_found) { print STDERR "Missing authors\n" } print STDERR "Missing fields in file $file on line $.: $_\n"; exit 1; } } } # If we reach end of file here, then we must be done. last; } $file = shift; } $totalreports=0; for ($i=1; $i<=$num_papers; $i++) { $totalreports += ($count[$i] + 1); } if ($summary || $controversial || $unconfident || $sort || $triage || $bypaper) { print STDERR "Processed $totalreports reports for $num_papers papers.\n"; } else { print STDERR "Checked syntax of $totalreports reports for $num_papers papers.\n"; } # Compute a weighted average of the scores for each paper. for ($i=1; $i<=$num_papers; $i++) { $x=0; $y=0; for ($j=1; $j<=$count[$i]; $j++) { $x += $score{$i,$j} * $confidence{$i,$j}; $y += $confidence{$i,$j}; } if ($y > 0) { $avg[$i] = $x/$y; } else { $avg[$i] = 0; } } ### Print out the reports, depending on the command-line switches. sub print_report { local($paperno, $reportno) = @_; print "Paper number: $paperno\n"; print "Title: $title[$paperno]\n"; print "Authors: $authors[$paperno]\n"; print "Overall score: $score{$paperno,$reportno}\n"; print "Confidence: $confidence{$paperno,$reportno}\n"; print "PC Member Name: $pc{$paperno,$reportno}\n"; print "Reviewer: $referee{$paperno,$reportno}\n\n"; print "$comments{$paperno,$reportno}\n"; print "$private{$paperno,$reportno}\n"; } # Not available for LaTeX yet, but that should be abstracted into this. sub print_summary_line { local($pno) = @_; if (!$latex) { print "$pno"; printf(' %2.1f ',($avg[$pno])); for ($j=1; $j<=$count[$pno]; $j++) { print ($score{$pno,$j},'(',$pc{$pno,$j},':',$confidence{$pno,$j},') '); } print ("\n"); } else { print "$pno",' \> '; for ($j=1; $j<$count[$pno]; $j++) { print ('$',$score{$pno,$j},'_{',$pc{$pno,$j},',',$confidence{$pno,$j},'}$, '); } print ('$',$score{$pno,$j},'_{',$pc{$pno,$j},',',$confidence{$pno,$j},'}$'); print ' \> '; printf('{\bf %2.1f}',($avg[$pno])); print ' \\\\', "\n"; } } ### Print non-private information into a file whose name is the paper number. if ($bypaper) { for ($i=1; $i<=$num_papers; $i++) { open(FOUT,">$i"); for ($j=1; $j<=$count[$i]; $j++) { print FOUT "Paper number: $i\n"; print FOUT "Title: $title[$i]\n"; print FOUT "Authors: $authors[$i]\n"; print FOUT "Overall score: $score{$i,$j}\n"; print FOUT "Confidence: $confidence{$i,$j}\n"; print FOUT "$comments{$i,$j}\n"; print FOUT "-----------------------------------------------------\n"; } close(FOUT); } } ### Sort if ($sort || $triage) { # Construct an array of paper numbers. for ($i=1; $i<=$num_papers; $i++) { $p[$i] = $i; } # Now sort according to the average weighted score. @sorted = sort { $avg[$b] <=> $avg[$a] } @p; } if ($sort && $latex) { print '\documentstyle[times,12pt,fullpage]{article} \pagestyle{myheadings} \markboth{Score summary}{Score summary} \begin{document} \begin{tabbing} {\bf Paper} \= {\bf Scores} \hspace*{5.5in}\= {\bf Average} \\\\'; print "\n"; for ($i=1; $i<=$num_papers; $i++) { do print_summary_line($sorted[$i]); } print '\end{tabbing} \end{document}'; print "\n"; } if ($sort && !$latex) { print "Paper\tAvg\tScores(Reviewer:confidence)\n\n"; for ($i=1; $i<=$num_papers; $i++) { do print_summary_line($sorted[$i]); } } ### Triage # Categorize papers into one of these groups: # 1. All reviews 6+: all reviews recommend acceptance # 2. At least one review 8+: at least one strong accept recommendation # 3. At least two reviews 6+: two weak accept recommendations # 4. At least one review 6+: at least one weak accept recommendation # 5. All reviews 5-: no accept recommendations # Sorry, no LaTeX version yet. if ($triage) { for ($i=1; $i<=$num_papers; $i++) { if ($min[$i] >= 6) { $triage[$i] = 1; } elsif ($max[$i] <= 5) { $triage[$i] = 5; } elsif ($max[$i] >= 8) { $triage[$i] = 2; } else { # Count reviews 6+ $sixplus = 0; for ($j=1; $j<=$count[$i]; $j++) { if ($score{$i, $j} >= 6) { $sixplus++; } } if ($sixplus > 1) { $triage[$i] = 3; } else { $triage[$i] = 4; } } } print "Paper\tAvg\tScores(Reviewer:confidence)\n\n"; print "All reviews 6+\n==============\n"; for ($i=1; $i<=$num_papers; $i++) { if ($triage[$sorted[$i]] == 1) { do print_summary_line($sorted[$i]); } } print "One review 8+\n=============\n"; for ($i=1; $i<=$num_papers; $i++) { if ($triage[$sorted[$i]] == 2) { do print_summary_line($sorted[$i]); } } print "Two reviews 6+\n==============\n"; for ($i=1; $i<=$num_papers; $i++) { if ($triage[$sorted[$i]] == 3) { do print_summary_line($sorted[$i]); } } print "One review 6+\n=============\n"; for ($i=1; $i<=$num_papers; $i++) { if ($triage[$sorted[$i]] == 4) { do print_summary_line($sorted[$i]); } } print "All reviews 5-\n==============\n"; for ($i=1; $i<=$num_papers; $i++) { if ($triage[$sorted[$i]] == 5) { do print_summary_line($sorted[$i]); } } } ### Print summary if ($summary && $latex) { print '\documentstyle[times,12pt,fullpage]{article} \pagestyle{myheadings} \markboth{Score summary}{Score summary} \begin{document} \begin{tabbing} {\bf Paper} \= {\bf Scores} \hspace*{5.5in}\= {\bf Average} \\\\'; print "\n"; for ($i=1; $i<=$num_papers; $i++) { do print_summary_line($i); } print '\end{tabbing} \end{document}'; print "\n"; } if ($summary && !$latex) { print "Paper\tScores(Reviewer:confidence)\n\n"; for ($i=1; $i<=$num_papers; $i++) { do print_summary_line($i); } if ($full) { print "\nFull reports on following pages:\n\f"; for ($i=1; $i<=$num_papers; $i++) { for ($j=1; $j<=$count[$i]; $j++) { do print_report($i,$j); } print ("\f"); } } } ########################################################################### ### Controversial papers ### if ($controversial) { # Report the papers with highly divergent scores. print "Paper\tAvg\tScores(Reviewer:confidence)\n\n"; # print "Summary:\n\n"; for ($i=1; $i<=$num_papers; $i++) { if (int($max[$i]-$min[$i]) > $controversy_limit) { do print_summary_line($i); } } if ($full) { print "\nFull reports on following pages:\n\f"; for ($i=1; $i<=$num_papers; $i++) { if (int($max[$i]-$min[$i]) > $controversy_limit) { for ($j=1; $j<=$count[$i]; $j++) { do print_report($i,$j); } print ("\f"); } } } } ########################################################################### ### Low-confidence papers ### if ($unconfident) { # Report the papers without any reviews of high confidence. print "Paper\tAvg\tScores(Reviewer:confidence)\n\n"; # print "Summary:\n\n"; for ($i=1; $i<=$num_papers; $i++) { if ($maxconf[$i] <= $confidence_limit) { do print_summary_line($i); } } if ($full) { print "\nFull reports on following pages:\n\f"; for ($i=1; $i<=$num_papers; $i++) { if ($maxconf[$i] <= $confidence_limit) { for ($j=1; $j<=$count[$i]; $j++) { do print_report($i,$j); } print ("\f"); } } } } ### Debugging; show all info about a paper. if ($debug) { for ($i=1; $i<=$num_papers; $i++) { print "$i"; printf(' %2.1f',($avg[$i])); print ' min=',"$min[$i]",' max=',"$max[$i]",' maxconf=',"$maxconf[$i]"; print ' triage=',"$triage[$i]",' '; for ($j=1; $j<=$count[$i]; $j++) { print ($score{$i,$j},'(',$pc{$i,$j},':',$confidence{$i,$j},') '); } print ("\n"); } }