scripts/copyright-extractor
changeset 0 10291c5dc856
equal deleted inserted replaced
-1:000000000000 0:10291c5dc856
       
     1 #!/usr/perl5/bin/perl
       
     2 #
       
     3 # Script for extracting copyright and licensing information from source code
       
     4 #
       
     5 # CDDL HEADER START
       
     6 #
       
     7 # The contents of this file are subject to the terms of the
       
     8 # Common Development and Distribution License, Version 1.0 only
       
     9 # (the "License").  You may not use this file except in compliance
       
    10 # with the License.
       
    11 #
       
    12 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
       
    13 # or http://www.opensolaris.org/os/licensing.
       
    14 # See the License for the specific language governing permissions
       
    15 # and limitations under the License.
       
    16 #
       
    17 # When distributing Covered Code, include this CDDL HEADER in each
       
    18 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
       
    19 # If applicable, add the following below this CDDL HEADER, with the
       
    20 # fields enclosed by brackets "[]" replaced with your own identifying
       
    21 # information: Portions Copyright [yyyy] [name of copyright owner]
       
    22 #
       
    23 # CDDL HEADER END
       
    24 #
       
    25 #
       
    26 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
       
    27 # Use is subject to license terms.
       
    28 #
       
    29 
       
    30 use strict;
       
    31 use warnings;
       
    32 use Cwd;
       
    33 use Getopt::Long qw(:config gnu_compat no_auto_abbrev bundling pass_through);
       
    34 
       
    35 my $min_merge = 10;
       
    36 
       
    37 sub usage() {
       
    38     print "copyright-extractor [options] <source directory>\n";
       
    39     print "\n";
       
    40     print "Options:\n";
       
    41     print "  -r, --raw\n";
       
    42     print "         Print raw comments only, do not attempt to merge,\n";
       
    43     print "         only unify identical comments.\n";
       
    44     print "  -c, --copyright-first\n";
       
    45     print "         Attempt to move copyright statements to the start of the\n";
       
    46     print "         comment block.\n";
       
    47     print "         Note: when using this option, there is a chance that\n";
       
    48     print "         lines get mixed up if a copyright statement extends to\n";
       
    49     print "         more than one line.\n";
       
    50     print "  -m n --min=n\n";
       
    51     print "         only merge if there are at least n consecutive identical\n";
       
    52     print "         lines.  default: $min_merge\n";
       
    53     print "  -g, --gpl\n";
       
    54     print "         Add the disclaimer about GPLv2 to the beginning of the\n";
       
    55     print "         output if any of the comments look like GPL/LGPL\n";
       
    56     print "  -O, --omitted\n";
       
    57     print "         Print a list of files that were not checked\n";
       
    58     print "  -h, --help\n";
       
    59     print "         Print this usage information\n";
       
    60     print "  -d n, --debug=n\n";
       
    61     print "         Turn on debug output.\n";
       
    62 }
       
    63 
       
    64 my %blurbs;
       
    65 
       
    66 my $gpl_found = 0;
       
    67 
       
    68 my @files_omitted;
       
    69 my $debug = 0;
       
    70 my $dumb_mode = 0;
       
    71 my $copyright_first = 0;
       
    72 my $gpl_disclaimer = 0;
       
    73 my $print_omitted = 0;
       
    74 
       
    75 my @dirs;
       
    76 sub process_args {
       
    77     my $arg = shift;
       
    78     
       
    79     if ($arg =~ /^-/) {
       
    80 	print "Unknown option: $arg\n";
       
    81 	print "Try --help for usage.\n";
       
    82 	exit (1);
       
    83     }
       
    84 
       
    85     push (@dirs, $arg);
       
    86 }
       
    87 
       
    88 sub process_options {
       
    89     
       
    90     Getopt::Long::Configure ("bundling");
       
    91       
       
    92     GetOptions ('d|debug=n' => sub { shift; $debug = shift; },
       
    93 		'm|min=n' => sub { shift; $min_merge = shift; },
       
    94 		'r|raw' => sub { $dumb_mode = 1; },
       
    95 		'c|copyright-first' => sub { $copyright_first = 1; },
       
    96 		'O|omitted' => sub { $print_omitted = 1; },
       
    97 		'g|gpl' => sub { $gpl_disclaimer = 1; },
       
    98 		'h|help' => sub { usage (); exit (0); },
       
    99 		'<>' => \&process_args);
       
   100 }
       
   101 
       
   102 use constant FTYPE_IGNORE => 0;
       
   103 use constant FTYPE_C => 1;
       
   104 use constant FTYPE_PERL => 2;
       
   105 use constant FTYPE_PYTHON => 3;
       
   106 use constant FTYPE_SHELL => 4;
       
   107 use constant FTYPE_JAVA => 5;
       
   108 
       
   109 # a very simple file type check based on the file name
       
   110 # fname: the file name to classify
       
   111 # Returns: one of the above contants
       
   112 sub get_file_type ($) {
       
   113     my $fname = shift;
       
   114 
       
   115     if ($fname =~ /([~]$|\/(ChangeLog|configure\.in|Makefile|ltmain\.sh|README|NEWS|INSTALL|HACKING|configure$|config\.)$)/) {
       
   116 	# some file names to ignore
       
   117 	push (@files_omitted, $fname);
       
   118 	return FTYPE_IGNORE;
       
   119     } elsif ($fname =~ /\.(am|ac|o|lo|ps|la|cache|diff|out|log|guess|spec)$/) {
       
   120 	# some more file names to ignore
       
   121 	push (@files_omitted, $fname);
       
   122 	return FTYPE_IGNORE;
       
   123     } elsif ($fname =~ /\.(c|h|hpp|cpp|C|CPP|cc|CC)$/) {
       
   124 	return FTYPE_C;
       
   125     } elsif ($fname =~ /\.pl$/) {
       
   126 	return FTYPE_PERL;
       
   127     } elsif ($fname =~ /\.py$/) {
       
   128 	return FTYPE_PYTHON;
       
   129     } elsif ($fname =~ /\.(sh|ksh|csh)$/) {
       
   130 	return FTYPE_SHELL;
       
   131     } elsif ($fname =~ /\.(java)$/) {
       
   132 	return FTYPE_JAVA;
       
   133     } else {
       
   134 	# FIXME: could do something smart here
       
   135 	push (@files_omitted, $fname);
       
   136 	return FTYPE_IGNORE;
       
   137     }
       
   138 }
       
   139 
       
   140 # return 1 if the string includes words that suggest that the string
       
   141 # is some sort of legal text.  If none of these words appear in the
       
   142 # string, this program will ignore it and assume that it's some other
       
   143 # comment that happens to be at the beginning of the file
       
   144 sub is_legalese ($) {
       
   145     my $str = shift;
       
   146     
       
   147     $str = lc ($str);
       
   148     if ($str =~ /(licen[cs]|legal|terms|condition|copyright|rights|\(c\)|copying|usage|binary|distribut|gpl)/) {
       
   149 	return 1;
       
   150     }
       
   151 
       
   152     return 0;
       
   153 }
       
   154 
       
   155 # extract the comments 
       
   156 sub extract_comments_shell($) {
       
   157     my $fname = shift;
       
   158 
       
   159     my $blurb;
       
   160     my $line;
       
   161     open SRCFILE, "<$fname" or die "failed to open file $fname";
       
   162     while ($line = <SRCFILE>) {
       
   163 	chomp ($line);
       
   164 	next if $line =~ /^#!/;
       
   165 	last if $line =~ /^[^#]/;
       
   166 	$line =~ s/^#//;
       
   167 	# delete certain types of comments, like emacs mode spec, etc
       
   168 	$line =~ s/^\s*-\*-.*-\*-\s*$//;
       
   169 	$line =~ s/^\s\$Id:.*\$\s*$//;
       
   170 	$line =~ s/^\s*vim(:\S+=\S+)+\s*$//;
       
   171 
       
   172 	chomp ($line);
       
   173 
       
   174 	if (defined $blurb) {
       
   175 	    $blurb = $blurb . "\n" . $line;
       
   176 	} elsif ($line ne '') {
       
   177 	    $blurb = $line;
       
   178 	}
       
   179 	$line = undef;
       
   180     }
       
   181     close SRCFILE;
       
   182 
       
   183     if (defined ($blurb) and is_legalese ($blurb)) {
       
   184 	$blurbs{$fname} = $blurb;
       
   185     }
       
   186 }
       
   187 
       
   188 sub extract_comments_c($) {
       
   189     my $fname = shift;
       
   190 
       
   191     my $blurb;
       
   192     my $in_comment_block = 0;
       
   193     open SRCFILE, "<$fname" or die "failed to open file $fname";
       
   194     my $line;
       
   195     while ($line = <SRCFILE>) {
       
   196 	chomp ($line);
       
   197 	if ($in_comment_block) {
       
   198 	    if ($line =~ /\*\//) {
       
   199 		$line =~ s/\*\/.*//;
       
   200 		$in_comment_block = 0;
       
   201 	    } elsif ($line =~ /^\/\//) {
       
   202 		$line =~ s/^\/\///;
       
   203 	    } elsif ($line =~ /^( \*|\*)/) {
       
   204 		$line =~ s/^( \*|\*)//;
       
   205 	    }
       
   206 	} else {
       
   207 	    if ($line =~ /^\s*\/\*(.*)\*\//) {
       
   208 		$line =~ s/^\s*\/\*(.*)\*\//$1/g;
       
   209 	    } elsif ($line =~ /^\s*\/\*/) {
       
   210 		$in_comment_block = 1;
       
   211 		$line =~ s/^\s*\/\*//;
       
   212 	    } elsif ($line =~ /^\/\//) {
       
   213 		$line =~ s/^\s*\/\///;
       
   214 	    } elsif ($line eq '') {
       
   215 		# add to blurb if not the start of the blurb
       
   216 	    } else {
       
   217 		# end of comments, stop processing
       
   218 		last;
       
   219 	    }
       
   220 	}
       
   221 	# delete certain types of comments, like emacs mode spec, etc
       
   222 	$line =~ s/^\s*-\*-.*-\*-\s*$//;
       
   223 	$line =~ s/^\s*vim(:\S+=\S+)+\s*$//;
       
   224 	$line =~ s/^\s\$Id:.*\$\s*$//;
       
   225 	$line =~ s/^\s*\**\s*\\ingroup\s*.*$//;
       
   226 	$line =~ s/^\s*\**\s*\\file\s*.*$//;
       
   227 	$line =~ s/^\s*\**\s*\@-type\@\s*$//;
       
   228 
       
   229 	chomp ($line);
       
   230 
       
   231 	if (defined $blurb) {
       
   232 	    $blurb = $blurb . "\n" . $line;
       
   233 	} elsif ($line ne '') {
       
   234 	    $blurb = $line;
       
   235 	}
       
   236 	$line = undef;
       
   237     }
       
   238     close SRCFILE;
       
   239     if (defined ($blurb) and is_legalese ($blurb)) {
       
   240 	$blurbs{$fname} = $blurb;
       
   241     }
       
   242 }
       
   243 
       
   244 sub extract_comments_python($) {
       
   245     my $fname = shift;
       
   246 
       
   247     my $blurb;
       
   248     my $in_comment_block = 0;
       
   249     open SRCFILE, "<$fname" or die "failed to open file $fname";
       
   250     my $line;
       
   251     while ($line = <SRCFILE>) {
       
   252 	chomp ($line);
       
   253 	if ($in_comment_block) {
       
   254 	    if ($line =~ /"""/) {
       
   255 		$line =~ s/"""//;
       
   256 		$in_comment_block = 0;
       
   257 	    } elsif ($line =~ /#/) {
       
   258 		$line =~ s/^#//;
       
   259 	    }
       
   260 	} else {
       
   261 	    if ($line =~ /^\s*"""(.*)"""/) {
       
   262 		$line =~ s/^\s*"""(.*)"""/$1/g;
       
   263 	    } elsif ($line =~ /^\s*"""/) {
       
   264 		$in_comment_block = 1;
       
   265 		$line =~ s/^\s*"""//;
       
   266 	    } elsif ($line =~ /^\/\//) {
       
   267 		$line =~ s/^\s*"""//;
       
   268 	    } elsif ($line eq '') {
       
   269 		# add to blurb if not the start of the blurb
       
   270 	    } else {
       
   271 		# end of comments, stop processing
       
   272 		last;
       
   273 	    }
       
   274 	}
       
   275 	# delete certain types of comments, like emacs mode spec, etc
       
   276 	$line =~ s/^\s*-\*-.*-\*-\s*$//;
       
   277 	$line =~ s/^\s*vim(:\S+=\S+)+\s*$//;
       
   278 	$line =~ s/^\s\$Id:.*\$\s*$//;
       
   279 
       
   280 	chomp ($line);
       
   281 
       
   282 	if (defined $blurb) {
       
   283 	    $blurb = $blurb . "\n" . $line;
       
   284 	} elsif ($line ne '') {
       
   285 	    $blurb = $line;
       
   286 	}
       
   287 	$line = undef;
       
   288     }
       
   289     close SRCFILE;
       
   290     if (defined ($blurb) and is_legalese ($blurb)) {
       
   291 	$blurbs{$fname} = $blurb;
       
   292     }
       
   293 }
       
   294 
       
   295 sub extract_comments($);
       
   296 
       
   297 # process a directory or a file recursively: extract the comments
       
   298 # from the beginning of each file and save them in @blurbs
       
   299 sub extract_comments($) {
       
   300     my $fname = shift;
       
   301     if (-d $fname) {
       
   302 	# directory -> process recursively
       
   303 	opendir(DIR, $fname) || die("Cannot open directory $fname");
       
   304 	my @thefiles= readdir(DIR);
       
   305 	closedir(DIR);
       
   306 	foreach my $f (@thefiles) {
       
   307 	    next if $f eq '.';
       
   308 	    next if $f eq '..';
       
   309 	    next if $f eq '.libs';
       
   310 	    next if $f eq 'intl';
       
   311 	    extract_comments ("$fname/$f");
       
   312 	}
       
   313     } elsif (-f $fname) {
       
   314 	# regular file -> identify file type and read comments
       
   315 	my $ftype = get_file_type ($fname);
       
   316 	return if $ftype == FTYPE_IGNORE;
       
   317 	if ($ftype == FTYPE_C) {
       
   318 	    extract_comments_c ($fname);
       
   319 	} elsif ($ftype == FTYPE_PERL) {
       
   320 	    extract_comments_shell ($fname);
       
   321 	} elsif ($ftype == FTYPE_SHELL) {
       
   322 	    extract_comments_shell ($fname);
       
   323 	} elsif ($ftype == FTYPE_PYTHON) {
       
   324 	    extract_comments_python ($fname);
       
   325 	} elsif ($ftype == FTYPE_JAVA) {
       
   326 	    extract_comments_c ($fname);
       
   327 	}
       
   328     } else {
       
   329 	print STDERR "ERROR: $fname: no such file or directory\n";
       
   330     }
       
   331 }
       
   332 
       
   333 # like uniq(1)
       
   334 sub uniq (@) {
       
   335     my @list = @_;
       
   336     my $prev;
       
   337     if (not @list) {
       
   338 	return @list;
       
   339     }
       
   340     $prev = $list[0];
       
   341     my @uniq_list = ($prev);
       
   342     foreach my $str (@list) {
       
   343 	next if $str eq $prev;
       
   344 	push (@uniq_list, $str);
       
   345 	$prev = $str;
       
   346     }
       
   347     return @uniq_list;
       
   348 }
       
   349 
       
   350 # return the number of lines in str
       
   351 sub line_count ($) {
       
   352     my $str = shift;
       
   353 
       
   354     return ($str =~ tr/\n//) + 1;
       
   355 }
       
   356 
       
   357 # return 1 if str is a member of the list, 0 otherwise
       
   358 sub is_member ($@) {
       
   359     my $str = shift;
       
   360     my @list = @_;
       
   361 
       
   362     foreach my $s (@list) {
       
   363 	if ($str eq $s) {
       
   364 	    return 1;
       
   365 	}
       
   366     }
       
   367     
       
   368     return 0;
       
   369 }
       
   370 
       
   371 sub do_merge_comments ($$$$$);
       
   372 
       
   373 # Args: references to lists of strings (lines of the texts)
       
   374 #
       
   375 # ml1: lines from the first text already processed
       
   376 # l1:  remaining lines of the 1st text
       
   377 # nl1: remaining normalised lines of the 1st text
       
   378 # l2:  remaining lines of the 2nd text
       
   379 # nl2: remaining normalised lines of the 1st text
       
   380 #
       
   381 # Return: list of merged lines
       
   382 sub do_merge_comments ($$$$$) {
       
   383     my $ml1_ref = shift;
       
   384     my $l1_ref = shift;
       
   385     my $nl1_ref = shift;
       
   386     my $l2_ref = shift;
       
   387     my $nl2_ref = shift;
       
   388 
       
   389     my @mlines1 = @$ml1_ref;
       
   390     my @nmlines1;
       
   391     my @lines1 = @$l1_ref;
       
   392     my @norm_lines1 = @$nl1_ref;
       
   393     my @lines2 = @$l2_ref;
       
   394     my @norm_lines2 = @$nl2_ref;
       
   395     my @nmlines2;
       
   396     my @mlines2;
       
   397 
       
   398     my @merged_lines;
       
   399     my $line1;
       
   400     my $norm_line1;
       
   401     my $line2;
       
   402     my $norm_line2;
       
   403 
       
   404     if ($debug > 2) {
       
   405 	print "DEBUG: attempting to merge\n";
       
   406 	if (@mlines1) {
       
   407 	    print "DEBUG: lines already processed from 1st text:\n";
       
   408 	    print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
       
   409 	    foreach my $l (@mlines1) {
       
   410 		print "DEBUG: $l\n";
       
   411 	    }
       
   412 	}
       
   413 	print "DEBUG: 1st text:\n";
       
   414 	print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
       
   415 	foreach my $l (@lines1) {
       
   416 	    print "DEBUG: $l\n";
       
   417 	}
       
   418 	print "DEBUG: 2nd text:\n";
       
   419 	print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
       
   420 	foreach my $l (@lines2) {
       
   421 	    print "DEBUG: $l\n";
       
   422 	}
       
   423 	print "DEBUG: <<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n";
       
   424     }
       
   425 
       
   426     if (not @lines1) {
       
   427 	push (@merged_lines, @mlines1);
       
   428 	push (@merged_lines, @lines2);
       
   429 	return @merged_lines;
       
   430     }
       
   431 
       
   432     if (not @lines2) {
       
   433 	push (@merged_lines, @mlines1);
       
   434 	push (@merged_lines, @lines1);
       
   435 	return @merged_lines;
       
   436     }
       
   437 
       
   438     # first save the lines only appearing in lines1,
       
   439     # stop at the first 2 common lines that are not empty
       
   440     while (@lines1) {
       
   441 	$line1 = shift (@lines1);
       
   442 	$norm_line1 = shift (@norm_lines1);
       
   443 	if (($norm_line1 ne '') and
       
   444 	    is_member ($norm_line1, @norm_lines2)) {
       
   445 	    last;
       
   446 	} else {
       
   447 	    push (@mlines1, $line1);
       
   448 	    push (@nmlines1, $norm_line1);
       
   449 	}
       
   450     }
       
   451     # now save the lines appearing in lines2 before the common line
       
   452     while (@lines2) {
       
   453 	$line2 = shift (@lines2);
       
   454 	$norm_line2 = shift (@norm_lines2);
       
   455 
       
   456 	if ($norm_line2 ne $norm_line1) {
       
   457 	    push (@mlines2, $line2);
       
   458 	    push (@nmlines2, $line2);
       
   459 	} else {
       
   460             last;
       
   461         }
       
   462     }
       
   463     my @common_lines;
       
   464     my @ncommon_lines;
       
   465     # now save the first common line
       
   466     if ($norm_line1 eq $norm_line2) {
       
   467 	if ($debug > 3) {
       
   468 	    print "DEBUG: 1st common line:\n";
       
   469 	    print "DEBUG: $line1\n";
       
   470 	}
       
   471 	@common_lines = ($line1);
       
   472 	@ncommon_lines = ($norm_line2);
       
   473     } else {
       
   474 	# no common lines were found
       
   475 	# lines1 should be empty, all lines moved to mlines1
       
   476 	push (@merged_lines, @mlines1);
       
   477 	push (@merged_lines, @mlines2);
       
   478 	return @merged_lines;
       
   479     }
       
   480     # save all common lines
       
   481     while (@lines1 and @lines2) {
       
   482 	$line1 = shift (@lines1);
       
   483 	$norm_line1 = shift (@norm_lines1);
       
   484 	$line2 = shift (@lines2);
       
   485 	$norm_line2 = shift (@norm_lines2);
       
   486 	if ($norm_line1 ne $norm_line2) {
       
   487 	    if ($debug > 3) {
       
   488 		print "DEBUG: no more common lines.\n";
       
   489 	    }
       
   490 	    unshift (@lines1, $line1);
       
   491 	    unshift (@norm_lines1, $norm_line1);
       
   492 	    unshift (@lines2, $line2);
       
   493 	    unshift (@norm_lines2, $norm_line2);
       
   494 	    last;
       
   495 	} else {
       
   496 	    if ($debug > 3) {
       
   497 		print "DEBUG: common line:\n";
       
   498 		print "DEBUG: $line1\n";
       
   499 	    }
       
   500 	    push (@common_lines, $line1);
       
   501 	    push (@ncommon_lines, $norm_line1);
       
   502 	}
       
   503     }
       
   504 
       
   505     # only merge if the number of common lines is at least $min_merge
       
   506     # or we are at the end of one of the texts or if at the
       
   507     # beginning of the 2nd text
       
   508     if (($#common_lines >= $min_merge) or 
       
   509 	(not @lines1) or (not @lines2) or
       
   510 	(not @mlines2)) {
       
   511 	if ($debug > 1) {
       
   512 	    print "DEBUG: common lines:\n";
       
   513 	    foreach my $l (@common_lines) {
       
   514 		print "DEBUG: $l\n";
       
   515 	    }
       
   516 	    print "DEBUG: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n";
       
   517 	}
       
   518 	# first the lines from the 1st text
       
   519 	push (@merged_lines, @mlines1);
       
   520 	# then the lines from the 2nd text
       
   521 	push (@merged_lines, @mlines2);
       
   522 	# finally the common lines
       
   523 	push (@merged_lines, @common_lines);
       
   524     } else {
       
   525 	# don't merge
       
   526 
       
   527 	# add the common lines to the processed part of the 1st text
       
   528 	push (@mlines1, @common_lines);
       
   529 
       
   530 	# add the common lines back to the unprocessed part of the 2nd text
       
   531 	unshift (@lines2, @common_lines);
       
   532 	# add the lines before the common lines back to the unprocessed
       
   533 	# part of the 2nd text
       
   534 	unshift (@lines2, @mlines2);
       
   535 	# add the normalised common lines back to
       
   536 	# the unprocessed part of the 2nd text
       
   537 	unshift (@norm_lines2, @ncommon_lines);
       
   538 	# add the normalised lines before the common lines back to
       
   539         # the unprocessed part of the 2nd text
       
   540 	unshift (@norm_lines2, @nmlines2);
       
   541 
       
   542 	# add the normalised common lines back to 
       
   543 	# try to merge the rest of the texts
       
   544 	my @more_merged_lines = do_merge_comments (\@mlines1,
       
   545 	    \@lines1, \@norm_lines1, \@lines2, \@norm_lines2);
       
   546 	push (@merged_lines, @more_merged_lines);	
       
   547 	return @merged_lines;
       
   548     }
       
   549 
       
   550     if (not @lines1) {
       
   551 	push (@merged_lines, @lines2);
       
   552     } elsif (not @lines2) {
       
   553 	push (@merged_lines, @lines1);
       
   554     } else {
       
   555 	# repeat the process for the remaining lines
       
   556 	my @l1;
       
   557 	my @more_merged_lines = do_merge_comments (\@l1,
       
   558 	    \@lines1, \@norm_lines1, \@lines2, \@norm_lines2);
       
   559 	push (@merged_lines, @more_merged_lines);
       
   560     }
       
   561 
       
   562     return @merged_lines;
       
   563 }
       
   564 
       
   565 sub merge_comments ($$) {
       
   566     my $str1 = shift;
       
   567     my $str2 = shift;
       
   568     my @lines1 = split /\n/, $str1;
       
   569     my @lines2 = split /\n/, $str2;
       
   570     my @norm_lines1;
       
   571     my @norm_lines2;
       
   572 
       
   573     foreach my $l0 (@lines1) {
       
   574 	# ignore whitespace differences
       
   575 	my $l1 = "$l0";
       
   576 	$l1 =~ s/\s+/ /g;
       
   577 	$l1 =~ s/^ //g;
       
   578 	chomp ($l1);
       
   579 	$l1 =~ s/ $//g;
       
   580 	$l1 = lc ($l1);
       
   581 	push (@norm_lines1, $l1);
       
   582     }
       
   583     foreach my $l0 (@lines2) {
       
   584 	# ignore whitespace differences
       
   585 	my $l2 = "$l0";
       
   586 	$l2 =~ s/\s+/ /g;
       
   587 	$l2 =~ s/^ //g;
       
   588 	chomp ($l2);
       
   589 	$l2 =~ s/ $//g;
       
   590 	$l2 = lc ($l2);
       
   591 	push (@norm_lines2, $l2);
       
   592     }
       
   593 
       
   594     my @l0;
       
   595     my @merged_lines = do_merge_comments (\@l0, \@lines1, \@norm_lines1,
       
   596 					  \@lines2, \@norm_lines2);
       
   597     my $merged_str;
       
   598     if ($copyright_first) {
       
   599 	my @copyright_lines;
       
   600 	my @non_cr_lines;
       
   601 
       
   602 	foreach my $line (@merged_lines) {
       
   603 	    if ($line =~ /^\s*(copyright|\(c\)|©|author:|all rights reserved)/i) {
       
   604 		push (@copyright_lines, $line);
       
   605 	    } else {
       
   606 		push (@non_cr_lines, $line);
       
   607 	    }
       
   608 	}
       
   609 	@copyright_lines = sort (@copyright_lines);
       
   610 	@copyright_lines = uniq (@copyright_lines);
       
   611 	$merged_str = join ("\n", (@copyright_lines, @non_cr_lines));
       
   612     } else {
       
   613 	$merged_str = join ("\n", @merged_lines);
       
   614     }
       
   615     return $merged_str;
       
   616 }
       
   617 
       
   618 my @all_comments;
       
   619 my %comments;
       
   620 
       
   621 sub unify_comments () {
       
   622     foreach my $fname (keys %blurbs) {
       
   623 	if ($blurbs{$fname} =~ /\b(gpl|lgpl|gnu\s+(library\s+|lesser\s+|)general\s+public\s+license)\b/si) {
       
   624 	    # looks like GNU GPL/LGPL
       
   625 	    $gpl_found = 1;
       
   626 	}
       
   627 	if (defined ($comments{$blurbs{$fname}})) {
       
   628 	    $comments{$blurbs{$fname}} = $comments{$blurbs{$fname}} .
       
   629 		", $fname";
       
   630 	} else {
       
   631 	    $comments{$blurbs{$fname}} = $fname;
       
   632 	}
       
   633     }
       
   634     @all_comments = (keys %comments);
       
   635 }
       
   636 
       
   637 sub smart_merge_comments () {
       
   638     my @temp_all_comments = @all_comments;
       
   639     @all_comments = ();
       
   640 
       
   641     my $i = 0;
       
   642     while ($i <= $#temp_all_comments) {
       
   643 	my $did_merge = 0;
       
   644 	my $c1 = $temp_all_comments[$i];
       
   645 	for (my $j = $i+1; $j <= $#temp_all_comments; $j++) {
       
   646 	    my $c2 = $temp_all_comments[$j];
       
   647 	    my $c1_lc = line_count ($c1);
       
   648 	    my $c2_lc = line_count ($c2);
       
   649 	    my $c12_merged = merge_comments ($c1, $c2);
       
   650 	    my $c12_lc = line_count ($c12_merged);
       
   651 	    # if more than 10 lines or more than 25% saved then
       
   652 	    # keep the merged comments
       
   653 	    my $diff_lc = $c1_lc + $c2_lc - $c12_lc;
       
   654 	    if (($diff_lc > 10) or ($c12_lc <= ($c1_lc + $c2_lc)*0.75)) {
       
   655 		if ($debug > 0) {
       
   656 		    print "DEBUG*****************************************\n";
       
   657 		    print "$c1\n";
       
   658 		    print "++++++++++++++++++++++++++++++++++++++++++++++\n";
       
   659 		    print "$c2\n";
       
   660 		    print "==============================================\n";
       
   661 		    print "$c12_merged\n";
       
   662 		    print "*****************************************DEBUG\n";
       
   663 		}
       
   664 		$temp_all_comments[$j] = $c12_merged;
       
   665 		$did_merge = 1;
       
   666 		$comments{$c12_merged} = "$comments{$c1}, $comments{$c2}";
       
   667 		last;
       
   668 	    }
       
   669 	}
       
   670 	if (not $did_merge) {
       
   671 	    push (@all_comments, $c1);
       
   672 	}
       
   673 	$i++;
       
   674     }
       
   675 }
       
   676 
       
   677 sub print_comments () {
       
   678     if ($gpl_found and $gpl_disclaimer) {
       
   679 	print << "__EOF"
       
   680 For the avoidance of doubt, except that if any license choice other
       
   681 than GPL or LGPL is available it will apply instead, Sun elects to
       
   682 use only the General Public License version 2 (GPLv2) at this time
       
   683 for any software where a choice of GPL license versions is made
       
   684 available with the language indicating that GPLv2 or any later
       
   685 version may be used, or where a choice of which version of the GPL
       
   686 is applied is otherwise unspecified.
       
   687 
       
   688 --------------------------------------------------------------------
       
   689 
       
   690 __EOF
       
   691     }
       
   692     foreach my $comment (@all_comments) {
       
   693 	print "$comments{$comment}:\n";
       
   694 	print $comment;
       
   695 	print "\n\n" .
       
   696 	    "--------------------------------------------------------------------" .
       
   697 	    "\n\n";
       
   698     }
       
   699 }
       
   700 
       
   701 sub main() {
       
   702     my $srcdir;
       
   703 
       
   704     process_options ();
       
   705 
       
   706     if (not @dirs) {
       
   707 	usage();
       
   708 	exit (1);
       
   709     }
       
   710 
       
   711     foreach my $srcdir (@dirs) {
       
   712 	if ($srcdir =~ /^\./) {
       
   713 	    $srcdir = getcwd();
       
   714 	}
       
   715 	extract_comments ($srcdir);
       
   716     }
       
   717 
       
   718     unify_comments ();
       
   719     if (not $dumb_mode) {
       
   720 	smart_merge_comments ();
       
   721     }
       
   722 
       
   723     print_comments ();
       
   724 
       
   725     if ($print_omitted and @files_omitted) {
       
   726 	print "\nThe following files were not checked:\n\n";
       
   727 	foreach my $fname (@files_omitted) {
       
   728 	    print "    $fname\n";
       
   729 	}
       
   730     }
       
   731 }
       
   732 
       
   733 main();