Blob Blame History Raw
#!/usr/bin/perl

#############################################################################
# Copyright (c) 2012 Novell, Inc.
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.   See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, contact Novell, Inc.
#
# To contact Novell about this file by physical or electronic mail,
# you may find current contact information at www.novell.com
#############################################################################

# A very limited tar writer. Creates archives in the ustar format and
# makes sure that identical content results in identical archives.

use strict;
use warnings;

my $USAGE = "Usage: $0 --mtime=<decimal timestamp> [--exclude=<pattern] [-C directory] file|directory ... >archive.tar\n";

use Getopt::Long;
use File::Find;
use File::Copy;
use Fcntl ':mode';

my $mtime;
my $disable_paxheaders;
my $chdir;
my $files_from;
my @exclude = ();
GetOptions(
	"t|mtime=i" => \$mtime,
	"exclude=s" => \@exclude,
	"no-paxheaders" => \$disable_paxheaders,
	"C=s" => \$chdir,
	"T|files-from=s" => \$files_from,
) or die($USAGE);

if (!defined($mtime)) {
	warn "$0: --mtime not specified, using 2000-01-01\n";
	$mtime = 946681200;
}
my @args;
if ($files_from) {
	if ($files_from eq '-') {
		@args = <>;
	} else {
		open(my $fh, '<', $files_from) or die "$files_from: $!\n";
		@args = <$fh>;
		close($fh);
	}
	chomp(@args);
} else {
	@args = @ARGV;
}
if (!@args) {
	print STDERR "No arguments given\n";
	die($USAGE);
}

chdir($chdir) if $chdir;
my @files;

s/\./\\./g for @exclude;
s/\*/.*/g for @exclude;
s/\?/./g for @exclude;
s/.*/^$&\$/ for @exclude;
sub wanted {
	for my $pattern (@exclude) {
		return if $_ =~ $pattern;
	}
	push(@files, $File::Find::name);
}

for my $file (@args) {
	if (-d $file) {
		find(\&wanted, $file);
	} else {
		push(@files, $file);
	}
}
$| = 1;
my $total_size = 0;

# Tar header format description
# filed => <width><type>
# where type is either _s_string or _o_ctal
my @header_layout = (
	name => "100s",
	mode => "8o",
	uid => "8o",
	gid => "8o",
	size => "12o",
	mtime => "12o",
	csum => "8s",
	typeflag => "1s",
	linktarget => "100s",
	magic => "6s",
	version => "2s",
	user => "32s",
	group => "32s",
	devmajor => "8o",
	devminor => "8o",
	prefix => "155s"
);

for my $file (sort(@files)) {
	my %header = ();

	$header{name} = $file;
	my $need_paxheader = 0;
	if (length($file) > 100) {
		($header{prefix} = $file) =~ s:/[^/]*$::;
		($header{name} = $file) =~ s:^.*/::;
		if (length($header{name}) > 100 ||
					length($header{prefix}) > 155) {
			if ($disable_paxheaders) {
				die "Too long filenames are impossible with --no-paxheaders: $file\n";
			}
			$header{name} = substr($header{name}, 0, 100);
			$header{prefix} = substr($header{prefix}, 0, 155);
			$need_paxheader = 1;
		}
	}
	my @stat = lstat($file) or die "$file: $!\n";
	my $mode = $stat[2];
	$header{mode} = ($mode & 0111) ? 0755 : 0644;
	# 65534:65534 is commonly used for nobody:nobody
	$header{uid} = 65534;
	$header{gid} = 65533;
	$header{user} = "nobody";
	$header{group} = "nobody";

	$header{mtime} = $mtime;

	if ($need_paxheader) {
		my $record = "path=$file\n";
		# length means length of the whole record, including the
		# length number
		my $length = length($record) + 2;
		while ($length < length(sprintf("%d %s", $length, $record))) {
			$length++;
		}
		$record = sprintf("%d %s", $length, $record);
		$header{typeflag} = "x";
		$header{size} = length($record);
		print gen_header(\%header);
		$total_size += 512;
		print $record;
		# padding to 512 byte boundary
		my $pad = pad_tail($header{size}, 512);
		print $pad;
		$total_size += length($pad);
	}
	if (S_ISREG($mode)) {
		$header{size} = $stat[7];
		$header{typeflag} = "0";
	} elsif (S_ISLNK($mode)) {
		$header{size} = 0;
		$header{linktarget} = readlink($file);
		$header{typeflag} = "2";
	} elsif (S_ISDIR($mode)) {
		$header{size} = 0;
		$header{typeflag} = "5";
	} else {
		die "Only regular files, symlinks and directories supported: $file\n";
	}
	print gen_header(\%header);
	$total_size += 512;
	next unless S_ISREG($mode);

	# PAYLOAD
	copy($file, \*STDOUT);
	$total_size += $header{size};
	# padding to 512 byte boundary
	my $pad = pad_tail($header{size}, 512);
	print $pad;
	$total_size += length($pad);
}
# end of archive marker
print pad("", 1024);
$total_size += 1024;
# pad to 10240 boundary
print pad_tail($total_size, 10240);
exit;

sub gen_header {
	my $header = shift;

	$header->{magic} = "ustar";
	$header->{version} = "00";

	my $res = "";
	my $csum_pos = 0;
	for (my $i = 0; $i < scalar(@header_layout); $i += 2) {
		my $field = $header_layout[$i];
		my $fmt = $header_layout[$i + 1];
		(my $length = $fmt) =~ s/.$//;
		(my $type = $fmt) =~ s/^\d*//;
		my $value = $header->{$field};

		# special case
		if ($field eq "csum") {
			$csum_pos = length($res);
			$res .= " " x 8;
			next;
		}
		if ($type eq "s") {
			$value = "" unless defined($value);
			$res .= pad($value, $length);
		} elsif ($type eq "o") {
			$value = 0 unless defined($value);
			$res .= pad_octal($value, $length);
		} else {
			die "Invalid format for $field: $fmt";
		}
	}
	# add the checksum, using the "%06o\0 " format like GNU tar
	my $csum = header_checksum($res);
	substr($res, $csum_pos, 7) = pad_octal($csum, 7);

	# padding to 512 byte boundary
	$res .= pad("", 12);
	die "error: header is not 512 bytes long" unless length($res) == 512;

	return $res;
}

sub pad {
	my ($string, $length) = @_;
	
	my $pad = $length - length($string);
	if ($pad < 0) {
		die "Field over $length bytes: $string\n";
	}
	return $string . "\0" x $pad;
}

sub pad_octal {
	my ($num, $length) = @_;

	$length--;
	return sprintf("%0${length}o\0", $num);
}

# after $size bytes written, padd to the neares $boundary
sub pad_tail {
	my ($size, $boundary) = @_;

	return "" unless $size % $boundary;
	my $padding = $boundary - $size % $boundary;
	return pad("", $padding);
}

sub header_checksum {
	my $header = shift;

	my $res = 0;
	for (my $i = 0; $i < length($header); $i++) {
		$res += ord(substr($header, $i, 1))
	}
	return $res;
}