#!/usr/bin/perl
#############################################################################
# Copyright (c) 2004,2005 Novell, Inc.
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.   See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, contact Novell, Inc.
#
# To contact Novell about this file by physical or electronic mail,
# you may find current contact information at www.novell.com
#############################################################################
#
# patch-tag is meant to maintain a set of metadata tags in a diff.  Multiple
# files can be specified on the command line and all options can be
# given more than once.
#
# All options can be abbreviated.  --print is the same as -p
#
# All tags are changed so the first letter is uppercase and the rest are
# lowercase.
#
# Example usage:
#
# patch-tag file
#	print the entire header before the diff starts
#
# patch-tag -e filename
#	Runs $EDITOR on filename.  If there are no tags in the file yet
#	a default set of tags is filled in.  See $default_comment for the
#	list.
#
# patch-tag -p Author -p Subject file
#	print the author and subject tags only
#
# patch-tag -s [-p tag] file
#	prints in summary form, default tags are Subject and References
#
# --print forces everything into readonly mode.  If you specify --tag 
# along with --print, the file won't be changed although the output on stdout
# will.
#
# patch-tag -P file
#	Prints only the comments other then tags in the file.
#
# patch-tag -t author=Mason -t subject="a patch to fix an oops"
#	Add or modify the author and subject tags.  If more than one
#	author tag is already present in the comment, only the first will
#	be changed.
#
# patch-tag -a author -a Subject=patch
#	Add an empty author tag and Subject: patch tag to the patch,
#	but don't overwrite any existing values if these tags were present
#	already.
#
# patch-tag -a filename
#	Read in a list of tags for -a from filename
#
# patch-tag -A works the same as -a, but always adds the new tag, even
#	if one is already present.
#
# patch-tag -c filename
#	Read a whole new comment block from stdin for filename.
# patch-tag -C string filename
#	Replace the non-tag comment with string if it does not exist
# patch-tag -m filename
#	Concatenate multiline tags into one line
#
# The template files for -a can have comments starting with #.  Only lines
# starting with string: will be used as tags.  The tags may have default
# values.  You can also place the template file into ~/.patchtag, it will
# be used automatically
#
use strict;
use Getopt::Long qw(:config no_ignore_case);
use File::Temp;
use IO::File;

my $VERSION = "0.11";

my $default_comment = "from:subject:patch-mainline:Git-commit:references:signed-off-by:acked-by:reviewed-by:";
my $post_comment_tags = "signed-off-by acked-by reviewed-by";

# when these are in the bk comment, and non-bk section, use the non-bk one
my $non_dup_tags = "from subject";

# command line options
my %tags;	  # hash of tags to be replaced
my %print_tags;   # hash of tags for printing to stdout
my %add_tags;	  # hash of tags to be added if not already present
my %always_add_tags; # hash of tags to be added no matter what
my %delete_tags;  # tags to be deleted entirely
my $new_comment;  # boolean, replace comment with data read from stdin
my $edit;         # invoke $EDITOR when done
my $multiline;	  # concatenate multiline tags
my $print_comment_only; # print only the comment block
my $summary;	  # print output tags in summary form
my $guard;	  # pattern to use for pulling guards from the filename
my $replace_empty_comment; # new value for empty non-tag comment

# globals
my @output_array; # the finished comment as printed
my @all_tags;   # array used to do final tag output
my @bk_footer_tags; # holds signed-off-by and acked-by from bk
my %replaced_tags; # copy of %tags so we can detect which ones are found
my $replace = 0;   # should we overwrite the patch file?
my $outfh;	   # current output file handle (could be a temp file)
my $infh;	   # current input file handle
my @files; 	   # list of all the files to be read
my $input; 	   # the current patch file we're reading
my $ret;
my $tag_re = '(^[^:\s#]*):(\s+|$)(.*)';
my $git_re = '^From ([0-9a-f]{40}) .*'; # mbox From line by git format-patch

sub print_usage() {
    print STDERR "patch-tag version $VERSION\n";
    print STDERR "usage: patch-tag.pl [-cePms ] [-C val] [-aAtpd tag=val] patch ...\n";
    print STDERR "\t--print a given tag\n";
    print STDERR "\t--comment replace the comment block with text from stdin\n";
    print STDERR "\t--Comment val replace non-tag comment with val if it does not exist\n";
    print STDERR "\t--edit invoke \$EDITOR on each file after processing\n";
    print STDERR "\t--delete tag delete tag from header\n";
    print STDERR "\t--tag tag[=value] Replace or add a given tag\n";
    print STDERR "\t--add tag[=value] Add a tag if not already present\n";
    print STDERR "\t--Add tag[=value] Unconditionally add a tag\n";
    print STDERR "\t--add filename containing template of tags to add\n";
    print STDERR "\t\t~/.patchtag will be used as a default template file\n";
    print STDERR "\t--multiline concatenate multiline tags into one line\n";
    print STDERR "\t--Print-comment prints only the comment block without tags\n";
    print STDERR "\t--summary print output tags in summary form\n";
    print STDERR "\nAll options can be specified more than once, example\n";
    print STDERR "usage and additional docs at the top of this script\n";
    exit(1);
}

# we want the hashes of tags in lower case, normalize whatever
# crud the user sent us
#
sub lc_hash(%) {
    my (%h) = (@_);
    my %lch;
    my $tag;
    my $value;
    foreach my $k (keys(%h)) {
	$tag = lc($k);
	$value = $h{$k};
	# did they use --opt "tag: value"? If so, turn it into a tag value pair
	if (($tag =~ m/(.+[^:]):\s*(.+)/) && $value eq "") {
	    $tag = $1;
	    $value = $2;
	}
	# strip off any : in the tag
	$tag =~ s/://g;
        $lch{$tag} = $value;
    }
    return %lch;
}

# check for and collect a multiline tag from the input stream
sub peek_multi_line($$$) {
    my ($infh, $buf, $line) = @_;
    my $next;

    $next = read_next_line($infh, $buf);
    while($next =~ m/^\s/ && !($next =~ m/^\n/)) {
	if ($multiline) {
	    chomp $line;
	}
        $line .= $next;
	$next = read_next_line($infh, $buf);
    }
    push @$buf, $next;
    return $line;
}

# do tag replacement and other checks for a specific tag/value pair
# pushing it into the output tag array
sub process_tag($$) {
    my ($t, $value) = @_;
    # only do replacement on the first tag with a given key
    if (defined($tags{$t}) && defined($replaced_tags{$t})) {
	$value = $tags{$t};
	push_output_tag($t, $value);
    } elsif (defined($print_tags{$t})) {
	push_output_tag($t, $value);
    } elsif (!%print_tags) {
	push_output_tag($t, $value);
    }
    delete $replaced_tags{$t};
}

# tags that get pulled from bk comments get special treatment
sub process_bk_tag($$) {
    my ($tag, $value) = @_;
    my $always_tags = "signed-off-by acked-by reviewed-by";

    if ($always_tags =~ m/$tag/) {
	push @bk_footer_tags, [$tag, $value];
	return;
    }
    # don't pull the bk tag out if it already exists
    foreach my $v (@all_tags) {
	$v =~ m/$tag_re/;
	my $t = $1;
	if (lc($t) eq $tag) {
	    return;
	}
    }
    $replaced_tags{$tag} = $value;
}

# look for any tags that we were asked to print or replace from
# the command line.  Build the array of tags found in the comment
sub check_tags($$$) {
    my ($infh, $buf, $line) = @_;
    my $filespec = "";
    my $bk_comment = 0;
    my $orig_line;
    my $bkold_style = 0;

again:
    $orig_line = $line;
    if ($bk_comment) {
        $line =~ s/^#\s*//;
    }
    # Preserve git From line
    if ($line =~ m/$git_re/) {
        push @output_array, $orig_line;
        $line = read_next_line($infh, $buf);
        goto again;
    }
    if ($line =~ m/$tag_re/) {
	$line = peek_multi_line($infh, $buf, $line);
	# evaluate again in case the multi-line string changed
	# check it as a multi line re.  Clean up trailing newlines and
	# ws
	$line =~ s/[\s\n]*$//gs;
	$line =~ m/(^[^:\s#]*):\s*(.*)/s;
	my $lc_tag = lc($1);
	my $value = $2;
	if ($bk_comment) {
	    # only pull out specific tags from the bk comment stream
	    if ($post_comment_tags =~ m/$lc_tag/) {
		process_bk_tag($lc_tag, $value);
	    }
	    if (!%print_tags) {
		push @output_array, $orig_line;
	    }
	} else {
	    process_tag($lc_tag, $value);
	}
    } elsif (!%print_tags) {
	push @output_array, $orig_line;
    } 
    # did we find a bitkeeper style patch header?
    # if so, just parse the whole thing here
    if ($line =~ m/^# The following is the BitKeeper ChangeSet Log/ ||
        $line =~ m/^# This is a BitKeeper generated diff -Nru style patch/) {
	# there are two bk patch styles
	# old:
	# # -------------------
	# # date author changset
	# # subject
	# new:
	# #
	# # Changeset
	# # date time author
	# # subject

	$bk_comment = 1;
	my $next = read_next_line($infh, $buf);
	push @output_array, $next if (!%print_tags);
	if ($next =~ m/^# ---------/) {
	    $bkold_style = 1;
	} else {
	    # read empty line
	    $next = read_next_line($infh, $buf);
	    push @output_array, $next if (!%print_tags);
	}
	$next = read_next_line($infh, $buf);
	push @output_array, $next if (!%print_tags);
	my @words = split /\s+/, $next;
	if ($bkold_style) {
	    process_bk_tag('from', $words[2]);
	} else {
	    process_bk_tag('from', $words[3]);
	}
	$next = read_next_line($infh, $buf);
	push @output_array, $next if (!%print_tags);
	chomp($next);
	$next =~ s/^#\s*//;
	# sometimes the bk comment is empty and there is just a filename
	# there's no good way to tell.
	if (!($next =~ m/(.*\/)+?.*?\.(c|h|s)$/)) {
	    process_bk_tag('subject', $next);
	}

	# we've read the from tag and subject tag, the goto again
	# will loop through the ChangeSet Log section looking
	# for other tags
    }
    if ($bk_comment) {
        $line = read_next_line($infh, $buf);
	# old style bk comments end with a line full of dashes
	# new style bk comments end with a # filename or no # at all
        if (($bkold_style && $line =~ m/^# --------/) ||
	    (!$bkold_style && $line =~ m/^# \S|^[^#]/)) {
	    push @$buf, $line;
	    return;
	}
        goto again;
    }
}

# print the array of tags found in the comment
sub print_output_array($$) {
    my ($input_file, $array) = @_;
    my $filespec = "";

    # if there is more then one file, include some info about which file
    # we're printing tags from
    if (scalar(@files) > 1 && %print_tags && !$summary) {
        $filespec = "$input_file: ";
    } elsif ($summary) {
        $filespec = "# ";
    }
    foreach my $s (@$array) {
	if ($summary) {
	    $s =~ s/\n\s/\n#\t/mg;
	}
	print $outfh "${filespec}$s";
    }
    if ($summary) {
	print $outfh "$input_file\n\n";
    }
}

# for -a and -A, look for a filename as an arg instead of a tag.  If found
# fill in the hash with the contents of the file
#
sub fill_hash_from_file($) {
    my ($h) = @_;
    # look for tags to add either from the command line or template files
    if (%$h && keys(%$h) <= 1) {
	my ($k, $v) = each %$h;
	my $source;
	if (defined($k) && (!defined($v) || $v eq "")) {
	    if (-f $k) {
		delete($h->{$k});
		$source = $k;
	    }
	}
	if (defined($source)) {
	    print STDERR "Using $source as tag template source\n";
	    # delete($$h{$k});
	    my $template = new IO::File;
	    $template->open("<$source") || die "Unable to open $source for reading";
	    while(<$template>) {
		# eat comments
		s/#.*//;
		# eat ws at the start of the line
		s/^\s//;
    		if (m/$tag_re/) {
		    $h->{lc($1)} = $3;
		}
	    }
	    $template->close();
	    return 1;
	}
    }
    return 0;
}

# helper function to pick the proper output array for the tags
# some go before the comment block and some go after
# send $allow_dup = 0 if you want to prevent duplicate tag names.
# Completely duplicate tag name,value pairs are always removed.
sub push_output_tag($$) {
    my ($tag, $value) = @_;
    my $uc_tag = ucfirst($tag);
    my $string = $uc_tag . ": $value";
    # check against the delete hash
    if (defined($delete_tags{$tag})) {
        return;
    }
    # check for dups;
    foreach my $v (@all_tags) {
        if ($v eq $string) {
	    return;
	}
    }
    push @all_tags, $string;
    push @output_array, "$string\n";
}

# helper function to cherry pick output tags from a hash.
# This is used to print the From and Subject Tags first.
sub add_output_tag ($$) {
    my ($tag, $h) = @_;
    my $value;
    my $tag_end = 0;
    my $line;

    if (!defined($$h{$tag})) {
        return;
    }
    $value = $$h{$tag};
    delete($$h{$tag});
    # for post comment tags, just tack it onto the very end.
    if ($post_comment_tags =~ m/$tag/) {
	process_tag($tag, $value);
	return;
    }

    # find the end of the top tag section in the comment.
    foreach $line (@output_array) {
	# Account git From line as tag header
	if ($line =~ m/$git_re/) {
	    $tag_end++;
	} elsif ($line =~ m/$tag_re/) {
	    my $t = lc($1);
	    # did we find our way into the post comment tag section?
	    if ($post_comment_tags =~ m/$t/) {
	        last;
	    }
	    $tag_end++;
	} else {
	    last;
	}
    }
    my @tmp_array = @output_array[$tag_end .. scalar(@output_array)-1];
    $#output_array = $tag_end - 1;
    process_tag($tag, $value);
    push @output_array, @tmp_array;
}

# from the much harder then it should be category.  Walk the 
# comment block and divide it into three parts.  Header, comment,
# footer.  Make sure each part is separated by no more then one
# blank line.
#
sub cleanup_blank_lines($) {
    my ($ar) = @_;
    my $line;
    my $tag_end;
    my $footer_start;
    my @header_ar = ();
    my @comment_ar = ();
    my @footer_ar = ();

    # find the header
    foreach $line (@$ar) {
	if ($line =~ m/$tag_re/) {
	    my $t = lc($1);
	    # did we find our way into the post comment tag section?
	    if ($post_comment_tags =~ m/$t/) {
	        last;
	    }
	    $tag_end++;
	} else {
	    last;
	}
    }
    if ($tag_end) {
	@header_ar = @$ar[0 .. $tag_end-1];
    }
    # eat all the blank lines
    while($tag_end < scalar(@$ar)) {
        $line = $$ar[$tag_end];
	if ($line =~ m/^\s*\n$/) {
	    $tag_end++;
	} else {
	    last;
	}
    }
    # $tag_end is now the start of the comment block.
    # pop ws off the end of the output array;
    while($$ar[scalar(@$ar)-1] =~ m/^\s*\n$/) {
        my $t = pop @$ar;
    }

    # walk up the array to find the end of the footer.
    for($footer_start = scalar(@$ar) - 1 ; $footer_start >= $tag_end;
        $footer_start--) {
        $line = $ar->[$footer_start];
	if (!($line =~ m/$tag_re/)) {
	    last;
	}
    }
    @footer_ar = @$ar[$footer_start+1 .. scalar(@$ar)-1];
    # eat ws between the comment and the footer
    while($footer_start > $tag_end && 
          $ar->[$footer_start] =~ m/^\s*\n$/) {
        $footer_start--;
    }

    @comment_ar = @$ar[$tag_end .. $footer_start];
    if ($print_comment_only) {
        @$ar = @comment_ar;
	return;
    }
    if (defined($replace_empty_comment) && scalar(@comment_ar) == 0) {
        push @comment_ar, "$replace_empty_comment\n";
    }
    @$ar = @header_ar;
    if (scalar(@comment_ar)) {
	if (scalar(@header_ar)) {
	    push @$ar, "\n";
	}
	push @$ar, @comment_ar;
    }
    if (scalar(@footer_ar)) {
        push @$ar, "\n";
	push @$ar, @footer_ar;
    }
    if ($replace) {
        push @$ar, "\n";
    }
}
# read a line from $fh, using anything queued up in @$buf first
#
sub read_next_line($$) {
    my ($fh, $buf) = @_;
    my $line;

    $line = pop @$buf;
    if (!defined($line)) {
        $line = $fh->getline();    
    }
    return $line;
}

# diff reading state machine.  When it returns a state of "done"
# that means the line is the start of the diff.  Any state other then
# "comment" might be the start of the diff, the only way to know for
# sure is to check the following line.
#
sub process_line($$) {
    my ($line, $state) = @_;
    my $return_state = "";

    # bk uses this: ===== fs/reiserfs/inode.c 1.49 vs 1.50 ===== 
    if ($line =~ m/(^Index:)|(^=====.*vs.*=====$)/) {
	$return_state = "index";
    } elsif ($line =~ m/^=================/ && $state eq "index") {
	$return_state = "done";
    } elsif ($line =~ m/^diff/) {
	$return_state = "diff";
    } elsif ($line =~ m/(^---)|(^\+\+\+)/) {
	$return_state = "done";
    } elsif ($state ne "comment") {
	$return_state = "comment";
    } else {
	$return_state = "comment";
    }
    return $return_state;
}

$ret = GetOptions("add:s%" => \%add_tags,
		  "Add:s%" => \%always_add_tags,
		  "delete:s%" => \%delete_tags,
		  "edit" => \$edit,
		  "guard=s" => \$guard,
		  "tag:s%" => \%tags,
		  "print:s" => \%print_tags,
		  "Print-comment" => \$print_comment_only,
		  "comment" => \$new_comment,
		  "Comment=s" => \$replace_empty_comment,
		  "multiline" => \$multiline,
		  "summary" => \$summary,
		  ) || print_usage();

@files = @ARGV;

if (scalar(@ARGV) < 1) {
    print_usage();
}

if ($new_comment && scalar(@ARGV) > 1) {
    print STDERR "error: --comment can only be used on one file at a time\n";
    print_usage();
}

fill_hash_from_file(\%add_tags);
fill_hash_from_file(\%always_add_tags);

if ($summary && !%print_tags) {
    $print_tags{'subject'} = 1;
    $print_tags{'references'} = 1;
    $print_tags{'suse-bugzilla'} = 1;
}

# if we're in edit mode and no tags are provided, check for a default
# template file.
if ($edit && keys(%tags) == 0 && keys(%add_tags) == 0 && 
    keys(%always_add_tags) == 0 && 
    -r "$ENV{'HOME'}/.patchtag") {
    $add_tags{"$ENV{'HOME'}/.patchtag"} = "";    
    fill_hash_from_file(\%add_tags);
}

# never overwrite the original when --print is used
#
if ((%add_tags || %always_add_tags || %tags || 
     $new_comment || $edit || %delete_tags || $replace_empty_comment) && 
    !(%print_tags || $print_comment_only)) {
    $replace = 1;
}

%tags = lc_hash(%tags);
%print_tags = lc_hash(%print_tags);
%add_tags = lc_hash(%add_tags);
%always_add_tags = lc_hash(%always_add_tags);
%delete_tags = lc_hash(%delete_tags);

# if we're editing setup the default tags
if ($edit && keys(%add_tags) == 0) {
    my @words = split /:/, $default_comment;
    foreach my $w (@words) {
	$add_tags{$w} = "";
    }
}

foreach my $guarded_input (@files) {
    my $last = "";
    my $scan_state = "comment";
    my @input_buffer = ();
    my $input;

    if ($summary && $guard && $guarded_input =~ m/($guard)(.+)/) {
        $input = $2;
    } else {
        $input = $guarded_input;
    }
    $infh = new IO::File;
    $infh->open("<$input") || die "Unable to open $input for reading";
    %replaced_tags = (%tags, %add_tags);
    @all_tags = ();
    @output_array = ();
    @bk_footer_tags = ();
    my %tmp_always_add = %always_add_tags;

    if ($replace) {
	$outfh = new File::Temp(TEMPLATE => "$input.XXXXXX", UNLINK => 0) || 
		 die "Unable to create temp file";
    } else {
	$outfh = new IO::File;
	$outfh->open(">-") || die "Unable to open stdout";
    }

    # loop through until the start of the diff.
    while($_ = read_next_line($infh, \@input_buffer)) {
	$scan_state = process_line($_, $scan_state);
	if ($scan_state eq "done")  {
	    push @input_buffer, $_;
	    last;
	}
	if ($scan_state ne "comment") {
	    my $next = read_next_line($infh, \@input_buffer);
	    $scan_state = process_line($next, $scan_state);
	    if ($scan_state ne "comment") {
	        push @input_buffer, $next;
	        push @input_buffer, $_;
		last;
	    }
	    push @input_buffer, $next;
	}
	check_tags($infh, \@input_buffer, $_);
    }
    # pop ws off the end of the output array;
    while($output_array[scalar(@output_array)-1] =~ m/^\s*\n$/) {
        pop @output_array;
    }
    foreach my $h (@bk_footer_tags) {
	process_tag($h->[0], $h->[1]);
    }
    # add any new tags left over, but do From and Subject first
    add_output_tag('from', \%replaced_tags);
    add_output_tag('subject', \%replaced_tags);
    foreach my $h (sort(keys(%replaced_tags))) {
	add_output_tag($h, \%replaced_tags);
    }
    # add any of the tags from -A 
    foreach my $h (sort(keys(%tmp_always_add))) {
	add_output_tag($h, \%tmp_always_add);
    }
    # replace the comment entirely for -c
    if ($new_comment) {
        while(<STDIN>) {
	    print $outfh $_;
	}
    } else {
	cleanup_blank_lines(\@output_array);
	print_output_array($guarded_input, \@output_array);
    }
    # in replace mode, copy our temp file over the original.
    if ($replace) {
	while($_ = read_next_line($infh, \@input_buffer)) {
	    print $outfh $_;
	}
	unlink "$input" || die "Unable to unlink $input";
	rename $outfh->filename, $input || 
	       die "Unable to rename $outfh->filename to $input";
    }
    if ($edit) {
	my $editor = "vi";
	if (defined($ENV{'EDITOR'})) {
	    $editor = $ENV{'EDITOR'};
	}
        $ret = system("$editor $input");
	if ($ret) {
	    $ret = $ret >> 8;
	    print STDERR "warning $editor exited with $ret\n";
	}
    }
    $infh->close();
    $outfh->close();
}