|
1 #!/usr/perl5/bin/perl |
|
2 # |
|
3 # Script for extracting copyright and licensing information from source code |
|
4 # |
|
5 # CDDL HEADER START |
|
6 # |
|
7 # The contents of this file are subject to the terms of the |
|
8 # Common Development and Distribution License, Version 1.0 only |
|
9 # (the "License"). You may not use this file except in compliance |
|
10 # with the License. |
|
11 # |
|
12 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
13 # or http://www.opensolaris.org/os/licensing. |
|
14 # See the License for the specific language governing permissions |
|
15 # and limitations under the License. |
|
16 # |
|
17 # When distributing Covered Code, include this CDDL HEADER in each |
|
18 # file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
19 # If applicable, add the following below this CDDL HEADER, with the |
|
20 # fields enclosed by brackets "[]" replaced with your own identifying |
|
21 # information: Portions Copyright [yyyy] [name of copyright owner] |
|
22 # |
|
23 # CDDL HEADER END |
|
24 # |
|
25 # |
|
26 # Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
|
27 # Use is subject to license terms. |
|
28 # |
|
29 |
|
30 use strict; |
|
31 use warnings; |
|
32 use Cwd; |
|
33 use Getopt::Long qw(:config gnu_compat no_auto_abbrev bundling pass_through); |
|
34 |
|
35 my $min_merge = 10; |
|
36 |
|
37 sub usage() { |
|
38 print "copyright-extractor [options] <source directory>\n"; |
|
39 print "\n"; |
|
40 print "Options:\n"; |
|
41 print " -r, --raw\n"; |
|
42 print " Print raw comments only, do not attempt to merge,\n"; |
|
43 print " only unify identical comments.\n"; |
|
44 print " -c, --copyright-first\n"; |
|
45 print " Attempt to move copyright statements to the start of the\n"; |
|
46 print " comment block.\n"; |
|
47 print " Note: when using this option, there is a chance that\n"; |
|
48 print " lines get mixed up if a copyright statement extends to\n"; |
|
49 print " more than one line.\n"; |
|
50 print " -m n --min=n\n"; |
|
51 print " only merge if there are at least n consecutive identical\n"; |
|
52 print " lines. default: $min_merge\n"; |
|
53 print " -g, --gpl\n"; |
|
54 print " Add the disclaimer about GPLv2 to the beginning of the\n"; |
|
55 print " output if any of the comments look like GPL/LGPL\n"; |
|
56 print " -O, --omitted\n"; |
|
57 print " Print a list of files that were not checked\n"; |
|
58 print " -h, --help\n"; |
|
59 print " Print this usage information\n"; |
|
60 print " -d n, --debug=n\n"; |
|
61 print " Turn on debug output.\n"; |
|
62 } |
|
63 |
|
64 my %blurbs; |
|
65 |
|
66 my $gpl_found = 0; |
|
67 |
|
68 my @files_omitted; |
|
69 my $debug = 0; |
|
70 my $dumb_mode = 0; |
|
71 my $copyright_first = 0; |
|
72 my $gpl_disclaimer = 0; |
|
73 my $print_omitted = 0; |
|
74 |
|
75 my @dirs; |
|
76 sub process_args { |
|
77 my $arg = shift; |
|
78 |
|
79 if ($arg =~ /^-/) { |
|
80 print "Unknown option: $arg\n"; |
|
81 print "Try --help for usage.\n"; |
|
82 exit (1); |
|
83 } |
|
84 |
|
85 push (@dirs, $arg); |
|
86 } |
|
87 |
|
88 sub process_options { |
|
89 |
|
90 Getopt::Long::Configure ("bundling"); |
|
91 |
|
92 GetOptions ('d|debug=n' => sub { shift; $debug = shift; }, |
|
93 'm|min=n' => sub { shift; $min_merge = shift; }, |
|
94 'r|raw' => sub { $dumb_mode = 1; }, |
|
95 'c|copyright-first' => sub { $copyright_first = 1; }, |
|
96 'O|omitted' => sub { $print_omitted = 1; }, |
|
97 'g|gpl' => sub { $gpl_disclaimer = 1; }, |
|
98 'h|help' => sub { usage (); exit (0); }, |
|
99 '<>' => \&process_args); |
|
100 } |
|
101 |
|
102 use constant FTYPE_IGNORE => 0; |
|
103 use constant FTYPE_C => 1; |
|
104 use constant FTYPE_PERL => 2; |
|
105 use constant FTYPE_PYTHON => 3; |
|
106 use constant FTYPE_SHELL => 4; |
|
107 use constant FTYPE_JAVA => 5; |
|
108 |
|
109 # a very simple file type check based on the file name |
|
110 # fname: the file name to classify |
|
111 # Returns: one of the above contants |
|
112 sub get_file_type ($) { |
|
113 my $fname = shift; |
|
114 |
|
115 if ($fname =~ /([~]$|\/(ChangeLog|configure\.in|Makefile|ltmain\.sh|README|NEWS|INSTALL|HACKING|configure$|config\.)$)/) { |
|
116 # some file names to ignore |
|
117 push (@files_omitted, $fname); |
|
118 return FTYPE_IGNORE; |
|
119 } elsif ($fname =~ /\.(am|ac|o|lo|ps|la|cache|diff|out|log|guess|spec)$/) { |
|
120 # some more file names to ignore |
|
121 push (@files_omitted, $fname); |
|
122 return FTYPE_IGNORE; |
|
123 } elsif ($fname =~ /\.(c|h|hpp|cpp|C|CPP|cc|CC)$/) { |
|
124 return FTYPE_C; |
|
125 } elsif ($fname =~ /\.pl$/) { |
|
126 return FTYPE_PERL; |
|
127 } elsif ($fname =~ /\.py$/) { |
|
128 return FTYPE_PYTHON; |
|
129 } elsif ($fname =~ /\.(sh|ksh|csh)$/) { |
|
130 return FTYPE_SHELL; |
|
131 } elsif ($fname =~ /\.(java)$/) { |
|
132 return FTYPE_JAVA; |
|
133 } else { |
|
134 # FIXME: could do something smart here |
|
135 push (@files_omitted, $fname); |
|
136 return FTYPE_IGNORE; |
|
137 } |
|
138 } |
|
139 |
|
140 # return 1 if the string includes words that suggest that the string |
|
141 # is some sort of legal text. If none of these words appear in the |
|
142 # string, this program will ignore it and assume that it's some other |
|
143 # comment that happens to be at the beginning of the file |
|
144 sub is_legalese ($) { |
|
145 my $str = shift; |
|
146 |
|
147 $str = lc ($str); |
|
148 if ($str =~ /(licen[cs]|legal|terms|condition|copyright|rights|\(c\)|copying|usage|binary|distribut|gpl)/) { |
|
149 return 1; |
|
150 } |
|
151 |
|
152 return 0; |
|
153 } |
|
154 |
|
155 # extract the comments |
|
156 sub extract_comments_shell($) { |
|
157 my $fname = shift; |
|
158 |
|
159 my $blurb; |
|
160 my $line; |
|
161 open SRCFILE, "<$fname" or die "failed to open file $fname"; |
|
162 while ($line = <SRCFILE>) { |
|
163 chomp ($line); |
|
164 next if $line =~ /^#!/; |
|
165 last if $line =~ /^[^#]/; |
|
166 $line =~ s/^#//; |
|
167 # delete certain types of comments, like emacs mode spec, etc |
|
168 $line =~ s/^\s*-\*-.*-\*-\s*$//; |
|
169 $line =~ s/^\s\$Id:.*\$\s*$//; |
|
170 $line =~ s/^\s*vim(:\S+=\S+)+\s*$//; |
|
171 |
|
172 chomp ($line); |
|
173 |
|
174 if (defined $blurb) { |
|
175 $blurb = $blurb . "\n" . $line; |
|
176 } elsif ($line ne '') { |
|
177 $blurb = $line; |
|
178 } |
|
179 $line = undef; |
|
180 } |
|
181 close SRCFILE; |
|
182 |
|
183 if (defined ($blurb) and is_legalese ($blurb)) { |
|
184 $blurbs{$fname} = $blurb; |
|
185 } |
|
186 } |
|
187 |
|
188 sub extract_comments_c($) { |
|
189 my $fname = shift; |
|
190 |
|
191 my $blurb; |
|
192 my $in_comment_block = 0; |
|
193 open SRCFILE, "<$fname" or die "failed to open file $fname"; |
|
194 my $line; |
|
195 while ($line = <SRCFILE>) { |
|
196 chomp ($line); |
|
197 if ($in_comment_block) { |
|
198 if ($line =~ /\*\//) { |
|
199 $line =~ s/\*\/.*//; |
|
200 $in_comment_block = 0; |
|
201 } elsif ($line =~ /^\/\//) { |
|
202 $line =~ s/^\/\///; |
|
203 } elsif ($line =~ /^( \*|\*)/) { |
|
204 $line =~ s/^( \*|\*)//; |
|
205 } |
|
206 } else { |
|
207 if ($line =~ /^\s*\/\*(.*)\*\//) { |
|
208 $line =~ s/^\s*\/\*(.*)\*\//$1/g; |
|
209 } elsif ($line =~ /^\s*\/\*/) { |
|
210 $in_comment_block = 1; |
|
211 $line =~ s/^\s*\/\*//; |
|
212 } elsif ($line =~ /^\/\//) { |
|
213 $line =~ s/^\s*\/\///; |
|
214 } elsif ($line eq '') { |
|
215 # add to blurb if not the start of the blurb |
|
216 } else { |
|
217 # end of comments, stop processing |
|
218 last; |
|
219 } |
|
220 } |
|
221 # delete certain types of comments, like emacs mode spec, etc |
|
222 $line =~ s/^\s*-\*-.*-\*-\s*$//; |
|
223 $line =~ s/^\s*vim(:\S+=\S+)+\s*$//; |
|
224 $line =~ s/^\s\$Id:.*\$\s*$//; |
|
225 $line =~ s/^\s*\**\s*\\ingroup\s*.*$//; |
|
226 $line =~ s/^\s*\**\s*\\file\s*.*$//; |
|
227 $line =~ s/^\s*\**\s*\@-type\@\s*$//; |
|
228 |
|
229 chomp ($line); |
|
230 |
|
231 if (defined $blurb) { |
|
232 $blurb = $blurb . "\n" . $line; |
|
233 } elsif ($line ne '') { |
|
234 $blurb = $line; |
|
235 } |
|
236 $line = undef; |
|
237 } |
|
238 close SRCFILE; |
|
239 if (defined ($blurb) and is_legalese ($blurb)) { |
|
240 $blurbs{$fname} = $blurb; |
|
241 } |
|
242 } |
|
243 |
|
244 sub extract_comments_python($) { |
|
245 my $fname = shift; |
|
246 |
|
247 my $blurb; |
|
248 my $in_comment_block = 0; |
|
249 open SRCFILE, "<$fname" or die "failed to open file $fname"; |
|
250 my $line; |
|
251 while ($line = <SRCFILE>) { |
|
252 chomp ($line); |
|
253 if ($in_comment_block) { |
|
254 if ($line =~ /"""/) { |
|
255 $line =~ s/"""//; |
|
256 $in_comment_block = 0; |
|
257 } elsif ($line =~ /#/) { |
|
258 $line =~ s/^#//; |
|
259 } |
|
260 } else { |
|
261 if ($line =~ /^\s*"""(.*)"""/) { |
|
262 $line =~ s/^\s*"""(.*)"""/$1/g; |
|
263 } elsif ($line =~ /^\s*"""/) { |
|
264 $in_comment_block = 1; |
|
265 $line =~ s/^\s*"""//; |
|
266 } elsif ($line =~ /^\/\//) { |
|
267 $line =~ s/^\s*"""//; |
|
268 } elsif ($line eq '') { |
|
269 # add to blurb if not the start of the blurb |
|
270 } else { |
|
271 # end of comments, stop processing |
|
272 last; |
|
273 } |
|
274 } |
|
275 # delete certain types of comments, like emacs mode spec, etc |
|
276 $line =~ s/^\s*-\*-.*-\*-\s*$//; |
|
277 $line =~ s/^\s*vim(:\S+=\S+)+\s*$//; |
|
278 $line =~ s/^\s\$Id:.*\$\s*$//; |
|
279 |
|
280 chomp ($line); |
|
281 |
|
282 if (defined $blurb) { |
|
283 $blurb = $blurb . "\n" . $line; |
|
284 } elsif ($line ne '') { |
|
285 $blurb = $line; |
|
286 } |
|
287 $line = undef; |
|
288 } |
|
289 close SRCFILE; |
|
290 if (defined ($blurb) and is_legalese ($blurb)) { |
|
291 $blurbs{$fname} = $blurb; |
|
292 } |
|
293 } |
|
294 |
|
295 sub extract_comments($); |
|
296 |
|
297 # process a directory or a file recursively: extract the comments |
|
298 # from the beginning of each file and save them in @blurbs |
|
299 sub extract_comments($) { |
|
300 my $fname = shift; |
|
301 if (-d $fname) { |
|
302 # directory -> process recursively |
|
303 opendir(DIR, $fname) || die("Cannot open directory $fname"); |
|
304 my @thefiles= readdir(DIR); |
|
305 closedir(DIR); |
|
306 foreach my $f (@thefiles) { |
|
307 next if $f eq '.'; |
|
308 next if $f eq '..'; |
|
309 next if $f eq '.libs'; |
|
310 next if $f eq 'intl'; |
|
311 extract_comments ("$fname/$f"); |
|
312 } |
|
313 } elsif (-f $fname) { |
|
314 # regular file -> identify file type and read comments |
|
315 my $ftype = get_file_type ($fname); |
|
316 return if $ftype == FTYPE_IGNORE; |
|
317 if ($ftype == FTYPE_C) { |
|
318 extract_comments_c ($fname); |
|
319 } elsif ($ftype == FTYPE_PERL) { |
|
320 extract_comments_shell ($fname); |
|
321 } elsif ($ftype == FTYPE_SHELL) { |
|
322 extract_comments_shell ($fname); |
|
323 } elsif ($ftype == FTYPE_PYTHON) { |
|
324 extract_comments_python ($fname); |
|
325 } elsif ($ftype == FTYPE_JAVA) { |
|
326 extract_comments_c ($fname); |
|
327 } |
|
328 } else { |
|
329 print STDERR "ERROR: $fname: no such file or directory\n"; |
|
330 } |
|
331 } |
|
332 |
|
333 # like uniq(1) |
|
334 sub uniq (@) { |
|
335 my @list = @_; |
|
336 my $prev; |
|
337 if (not @list) { |
|
338 return @list; |
|
339 } |
|
340 $prev = $list[0]; |
|
341 my @uniq_list = ($prev); |
|
342 foreach my $str (@list) { |
|
343 next if $str eq $prev; |
|
344 push (@uniq_list, $str); |
|
345 $prev = $str; |
|
346 } |
|
347 return @uniq_list; |
|
348 } |
|
349 |
|
350 # return the number of lines in str |
|
351 sub line_count ($) { |
|
352 my $str = shift; |
|
353 |
|
354 return ($str =~ tr/\n//) + 1; |
|
355 } |
|
356 |
|
357 # return 1 if str is a member of the list, 0 otherwise |
|
358 sub is_member ($@) { |
|
359 my $str = shift; |
|
360 my @list = @_; |
|
361 |
|
362 foreach my $s (@list) { |
|
363 if ($str eq $s) { |
|
364 return 1; |
|
365 } |
|
366 } |
|
367 |
|
368 return 0; |
|
369 } |
|
370 |
|
371 sub do_merge_comments ($$$$$); |
|
372 |
|
373 # Args: references to lists of strings (lines of the texts) |
|
374 # |
|
375 # ml1: lines from the first text already processed |
|
376 # l1: remaining lines of the 1st text |
|
377 # nl1: remaining normalised lines of the 1st text |
|
378 # l2: remaining lines of the 2nd text |
|
379 # nl2: remaining normalised lines of the 1st text |
|
380 # |
|
381 # Return: list of merged lines |
|
382 sub do_merge_comments ($$$$$) { |
|
383 my $ml1_ref = shift; |
|
384 my $l1_ref = shift; |
|
385 my $nl1_ref = shift; |
|
386 my $l2_ref = shift; |
|
387 my $nl2_ref = shift; |
|
388 |
|
389 my @mlines1 = @$ml1_ref; |
|
390 my @nmlines1; |
|
391 my @lines1 = @$l1_ref; |
|
392 my @norm_lines1 = @$nl1_ref; |
|
393 my @lines2 = @$l2_ref; |
|
394 my @norm_lines2 = @$nl2_ref; |
|
395 my @nmlines2; |
|
396 my @mlines2; |
|
397 |
|
398 my @merged_lines; |
|
399 my $line1; |
|
400 my $norm_line1; |
|
401 my $line2; |
|
402 my $norm_line2; |
|
403 |
|
404 if ($debug > 2) { |
|
405 print "DEBUG: attempting to merge\n"; |
|
406 if (@mlines1) { |
|
407 print "DEBUG: lines already processed from 1st text:\n"; |
|
408 print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; |
|
409 foreach my $l (@mlines1) { |
|
410 print "DEBUG: $l\n"; |
|
411 } |
|
412 } |
|
413 print "DEBUG: 1st text:\n"; |
|
414 print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; |
|
415 foreach my $l (@lines1) { |
|
416 print "DEBUG: $l\n"; |
|
417 } |
|
418 print "DEBUG: 2nd text:\n"; |
|
419 print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; |
|
420 foreach my $l (@lines2) { |
|
421 print "DEBUG: $l\n"; |
|
422 } |
|
423 print "DEBUG: <<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"; |
|
424 } |
|
425 |
|
426 if (not @lines1) { |
|
427 push (@merged_lines, @mlines1); |
|
428 push (@merged_lines, @lines2); |
|
429 return @merged_lines; |
|
430 } |
|
431 |
|
432 if (not @lines2) { |
|
433 push (@merged_lines, @mlines1); |
|
434 push (@merged_lines, @lines1); |
|
435 return @merged_lines; |
|
436 } |
|
437 |
|
438 # first save the lines only appearing in lines1, |
|
439 # stop at the first 2 common lines that are not empty |
|
440 while (@lines1) { |
|
441 $line1 = shift (@lines1); |
|
442 $norm_line1 = shift (@norm_lines1); |
|
443 if (($norm_line1 ne '') and |
|
444 is_member ($norm_line1, @norm_lines2)) { |
|
445 last; |
|
446 } else { |
|
447 push (@mlines1, $line1); |
|
448 push (@nmlines1, $norm_line1); |
|
449 } |
|
450 } |
|
451 # now save the lines appearing in lines2 before the common line |
|
452 while (@lines2) { |
|
453 $line2 = shift (@lines2); |
|
454 $norm_line2 = shift (@norm_lines2); |
|
455 |
|
456 if ($norm_line2 ne $norm_line1) { |
|
457 push (@mlines2, $line2); |
|
458 push (@nmlines2, $line2); |
|
459 } else { |
|
460 last; |
|
461 } |
|
462 } |
|
463 my @common_lines; |
|
464 my @ncommon_lines; |
|
465 # now save the first common line |
|
466 if ($norm_line1 eq $norm_line2) { |
|
467 if ($debug > 3) { |
|
468 print "DEBUG: 1st common line:\n"; |
|
469 print "DEBUG: $line1\n"; |
|
470 } |
|
471 @common_lines = ($line1); |
|
472 @ncommon_lines = ($norm_line2); |
|
473 } else { |
|
474 # no common lines were found |
|
475 # lines1 should be empty, all lines moved to mlines1 |
|
476 push (@merged_lines, @mlines1); |
|
477 push (@merged_lines, @mlines2); |
|
478 return @merged_lines; |
|
479 } |
|
480 # save all common lines |
|
481 while (@lines1 and @lines2) { |
|
482 $line1 = shift (@lines1); |
|
483 $norm_line1 = shift (@norm_lines1); |
|
484 $line2 = shift (@lines2); |
|
485 $norm_line2 = shift (@norm_lines2); |
|
486 if ($norm_line1 ne $norm_line2) { |
|
487 if ($debug > 3) { |
|
488 print "DEBUG: no more common lines.\n"; |
|
489 } |
|
490 unshift (@lines1, $line1); |
|
491 unshift (@norm_lines1, $norm_line1); |
|
492 unshift (@lines2, $line2); |
|
493 unshift (@norm_lines2, $norm_line2); |
|
494 last; |
|
495 } else { |
|
496 if ($debug > 3) { |
|
497 print "DEBUG: common line:\n"; |
|
498 print "DEBUG: $line1\n"; |
|
499 } |
|
500 push (@common_lines, $line1); |
|
501 push (@ncommon_lines, $norm_line1); |
|
502 } |
|
503 } |
|
504 |
|
505 # only merge if the number of common lines is at least $min_merge |
|
506 # or we are at the end of one of the texts or if at the |
|
507 # beginning of the 2nd text |
|
508 if (($#common_lines >= $min_merge) or |
|
509 (not @lines1) or (not @lines2) or |
|
510 (not @mlines2)) { |
|
511 if ($debug > 1) { |
|
512 print "DEBUG: common lines:\n"; |
|
513 foreach my $l (@common_lines) { |
|
514 print "DEBUG: $l\n"; |
|
515 } |
|
516 print "DEBUG: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n"; |
|
517 } |
|
518 # first the lines from the 1st text |
|
519 push (@merged_lines, @mlines1); |
|
520 # then the lines from the 2nd text |
|
521 push (@merged_lines, @mlines2); |
|
522 # finally the common lines |
|
523 push (@merged_lines, @common_lines); |
|
524 } else { |
|
525 # don't merge |
|
526 |
|
527 # add the common lines to the processed part of the 1st text |
|
528 push (@mlines1, @common_lines); |
|
529 |
|
530 # add the common lines back to the unprocessed part of the 2nd text |
|
531 unshift (@lines2, @common_lines); |
|
532 # add the lines before the common lines back to the unprocessed |
|
533 # part of the 2nd text |
|
534 unshift (@lines2, @mlines2); |
|
535 # add the normalised common lines back to |
|
536 # the unprocessed part of the 2nd text |
|
537 unshift (@norm_lines2, @ncommon_lines); |
|
538 # add the normalised lines before the common lines back to |
|
539 # the unprocessed part of the 2nd text |
|
540 unshift (@norm_lines2, @nmlines2); |
|
541 |
|
542 # add the normalised common lines back to |
|
543 # try to merge the rest of the texts |
|
544 my @more_merged_lines = do_merge_comments (\@mlines1, |
|
545 \@lines1, \@norm_lines1, \@lines2, \@norm_lines2); |
|
546 push (@merged_lines, @more_merged_lines); |
|
547 return @merged_lines; |
|
548 } |
|
549 |
|
550 if (not @lines1) { |
|
551 push (@merged_lines, @lines2); |
|
552 } elsif (not @lines2) { |
|
553 push (@merged_lines, @lines1); |
|
554 } else { |
|
555 # repeat the process for the remaining lines |
|
556 my @l1; |
|
557 my @more_merged_lines = do_merge_comments (\@l1, |
|
558 \@lines1, \@norm_lines1, \@lines2, \@norm_lines2); |
|
559 push (@merged_lines, @more_merged_lines); |
|
560 } |
|
561 |
|
562 return @merged_lines; |
|
563 } |
|
564 |
|
565 sub merge_comments ($$) { |
|
566 my $str1 = shift; |
|
567 my $str2 = shift; |
|
568 my @lines1 = split /\n/, $str1; |
|
569 my @lines2 = split /\n/, $str2; |
|
570 my @norm_lines1; |
|
571 my @norm_lines2; |
|
572 |
|
573 foreach my $l0 (@lines1) { |
|
574 # ignore whitespace differences |
|
575 my $l1 = "$l0"; |
|
576 $l1 =~ s/\s+/ /g; |
|
577 $l1 =~ s/^ //g; |
|
578 chomp ($l1); |
|
579 $l1 =~ s/ $//g; |
|
580 $l1 = lc ($l1); |
|
581 push (@norm_lines1, $l1); |
|
582 } |
|
583 foreach my $l0 (@lines2) { |
|
584 # ignore whitespace differences |
|
585 my $l2 = "$l0"; |
|
586 $l2 =~ s/\s+/ /g; |
|
587 $l2 =~ s/^ //g; |
|
588 chomp ($l2); |
|
589 $l2 =~ s/ $//g; |
|
590 $l2 = lc ($l2); |
|
591 push (@norm_lines2, $l2); |
|
592 } |
|
593 |
|
594 my @l0; |
|
595 my @merged_lines = do_merge_comments (\@l0, \@lines1, \@norm_lines1, |
|
596 \@lines2, \@norm_lines2); |
|
597 my $merged_str; |
|
598 if ($copyright_first) { |
|
599 my @copyright_lines; |
|
600 my @non_cr_lines; |
|
601 |
|
602 foreach my $line (@merged_lines) { |
|
603 if ($line =~ /^\s*(copyright|\(c\)|©|author:|all rights reserved)/i) { |
|
604 push (@copyright_lines, $line); |
|
605 } else { |
|
606 push (@non_cr_lines, $line); |
|
607 } |
|
608 } |
|
609 @copyright_lines = sort (@copyright_lines); |
|
610 @copyright_lines = uniq (@copyright_lines); |
|
611 $merged_str = join ("\n", (@copyright_lines, @non_cr_lines)); |
|
612 } else { |
|
613 $merged_str = join ("\n", @merged_lines); |
|
614 } |
|
615 return $merged_str; |
|
616 } |
|
617 |
|
618 my @all_comments; |
|
619 my %comments; |
|
620 |
|
621 sub unify_comments () { |
|
622 foreach my $fname (keys %blurbs) { |
|
623 if ($blurbs{$fname} =~ /\b(gpl|lgpl|gnu\s+(library\s+|lesser\s+|)general\s+public\s+license)\b/si) { |
|
624 # looks like GNU GPL/LGPL |
|
625 $gpl_found = 1; |
|
626 } |
|
627 if (defined ($comments{$blurbs{$fname}})) { |
|
628 $comments{$blurbs{$fname}} = $comments{$blurbs{$fname}} . |
|
629 ", $fname"; |
|
630 } else { |
|
631 $comments{$blurbs{$fname}} = $fname; |
|
632 } |
|
633 } |
|
634 @all_comments = (keys %comments); |
|
635 } |
|
636 |
|
637 sub smart_merge_comments () { |
|
638 my @temp_all_comments = @all_comments; |
|
639 @all_comments = (); |
|
640 |
|
641 my $i = 0; |
|
642 while ($i <= $#temp_all_comments) { |
|
643 my $did_merge = 0; |
|
644 my $c1 = $temp_all_comments[$i]; |
|
645 for (my $j = $i+1; $j <= $#temp_all_comments; $j++) { |
|
646 my $c2 = $temp_all_comments[$j]; |
|
647 my $c1_lc = line_count ($c1); |
|
648 my $c2_lc = line_count ($c2); |
|
649 my $c12_merged = merge_comments ($c1, $c2); |
|
650 my $c12_lc = line_count ($c12_merged); |
|
651 # if more than 10 lines or more than 25% saved then |
|
652 # keep the merged comments |
|
653 my $diff_lc = $c1_lc + $c2_lc - $c12_lc; |
|
654 if (($diff_lc > 10) or ($c12_lc <= ($c1_lc + $c2_lc)*0.75)) { |
|
655 if ($debug > 0) { |
|
656 print "DEBUG*****************************************\n"; |
|
657 print "$c1\n"; |
|
658 print "++++++++++++++++++++++++++++++++++++++++++++++\n"; |
|
659 print "$c2\n"; |
|
660 print "==============================================\n"; |
|
661 print "$c12_merged\n"; |
|
662 print "*****************************************DEBUG\n"; |
|
663 } |
|
664 $temp_all_comments[$j] = $c12_merged; |
|
665 $did_merge = 1; |
|
666 $comments{$c12_merged} = "$comments{$c1}, $comments{$c2}"; |
|
667 last; |
|
668 } |
|
669 } |
|
670 if (not $did_merge) { |
|
671 push (@all_comments, $c1); |
|
672 } |
|
673 $i++; |
|
674 } |
|
675 } |
|
676 |
|
677 sub print_comments () { |
|
678 if ($gpl_found and $gpl_disclaimer) { |
|
679 print << "__EOF" |
|
680 For the avoidance of doubt, except that if any license choice other |
|
681 than GPL or LGPL is available it will apply instead, Sun elects to |
|
682 use only the General Public License version 2 (GPLv2) at this time |
|
683 for any software where a choice of GPL license versions is made |
|
684 available with the language indicating that GPLv2 or any later |
|
685 version may be used, or where a choice of which version of the GPL |
|
686 is applied is otherwise unspecified. |
|
687 |
|
688 -------------------------------------------------------------------- |
|
689 |
|
690 __EOF |
|
691 } |
|
692 foreach my $comment (@all_comments) { |
|
693 print "$comments{$comment}:\n"; |
|
694 print $comment; |
|
695 print "\n\n" . |
|
696 "--------------------------------------------------------------------" . |
|
697 "\n\n"; |
|
698 } |
|
699 } |
|
700 |
|
701 sub main() { |
|
702 my $srcdir; |
|
703 |
|
704 process_options (); |
|
705 |
|
706 if (not @dirs) { |
|
707 usage(); |
|
708 exit (1); |
|
709 } |
|
710 |
|
711 foreach my $srcdir (@dirs) { |
|
712 if ($srcdir =~ /^\./) { |
|
713 $srcdir = getcwd(); |
|
714 } |
|
715 extract_comments ($srcdir); |
|
716 } |
|
717 |
|
718 unify_comments (); |
|
719 if (not $dumb_mode) { |
|
720 smart_merge_comments (); |
|
721 } |
|
722 |
|
723 print_comments (); |
|
724 |
|
725 if ($print_omitted and @files_omitted) { |
|
726 print "\nThe following files were not checked:\n\n"; |
|
727 foreach my $fname (@files_omitted) { |
|
728 print " $fname\n"; |
|
729 } |
|
730 } |
|
731 } |
|
732 |
|
733 main(); |