#!/usr/bin/perl -w                                               -*-CPerl-*-
#
# Perl Script to emulate Windows cabarc
# Incomplete, naturally.
# Waider, some time in 1998
#
# Wow. Listing mode actually works on old cabfiles.
# 2003/01/06 Add use strict, fix resulting errors
# 2003/07/14 Started working on cab creation
use strict;
use IO::Seekable;
use Compress::Zlib;
use File::Find;

# Templates
my $cfhdrTemplate = "A4VVVVVCCvvvvv";
my $cfhdrRESERVETemplate = "vCC"; # + array of C, size = v
my $cffolderTemplate = "Vvv"; # + array of C, size = cfhdr.cbCFFolder
my $cffileTemplate   = "VVvvvv"; # + array of C, null terminated
my $cfdataTemplate   = "Vvv"; # + array of C, size = 2nd v

# Constants
# CAB version
sub MAJOR_VER { 0x01 }
sub MINOR_VER { 0x03 }

# cfHDR optional bits
sub cfhdrPREV_CABINET { 0x1 }
sub cfhdrNEXT_CABINET { 0x2 }
sub cfhdrRESERVE_PRESENT { 0x3 }
sub cfhdrALLFLAGS { cfhdrPREV_CABINET | cfhdrNEXT_CABINET | cfhdrRESERVE_PRESENT }

#   Compression
sub COMPRESSION_NONE { 0x0 }
sub COMPRESSION_MSZIP { 0x1 }

#   Folder
sub ifoldCONTINUED_FROM_PREV { 0xFFFD }
sub ifoldCONTINUED_TO_NEXT { 0xFFFE }
sub ifoldCONTINUED_PREV_AND_NEXT { 0xFFFF }

#   Attributes
sub attrRDONLY { 0x1 }
sub attrHIDDEN { 0x2 }
sub attrSYSTEM { 0x4 }
sub attrARCH { 0x20 }
sub attrEXEC { 0x40 } # Run after extraction
sub attrNAME_IS_UTF { 0x8 } # szName contains UTF chars.

# Private data
my ( $signature, $reserverd1, $cbCabinet, $reserved2, $coffFiles, $reserved3,
     $versionMinor, $versionMajor, $cFolders, $cFiles, $flags, $setID,
     $iCabinet, $cbCFhdr, $cbCFFolder, $cbCFData, $abReserveData, $szCabinetPrev,
     $szDiskPrev, $szCabinetNext, $szDiskNext );

my @cffolderList;
my @cffileList;
my @cfdataList;

# Parameters
if ( $#ARGV < 1 ) {
  &usage;
}

my $COMMAND = $ARGV[ 0 ];
my $CABFILE = $ARGV[ 1 ];

$COMMAND = lc( $COMMAND );

&usage if $COMMAND !~ /^[lxn]$/;

&listcab if $COMMAND eq "l";
&createcab if $COMMAND eq "n";

exit;

sub listcab {
# Microsoft Listing Format, spacing dumped as-is (yes, it's crap!)
# Filename -> size looks like "indent to tabstop"; the rest appear to be spacing.

#
#
#Microsoft (R) Cabinet Tool - Version 1.00.0601 (03/18/97)
#Copyright (c) Microsoft Corp 1996-1997. All rights reserved.
#
#Listing of cabinet file 'install.cab' (size 779149):
#   257 file(s), 1 folder(s), set ID 0, cabinet #0
#
#File name                      File size     Date      Time   Attrs
#-----------------------------  ---------- ---------- -------- -----
#   com/cognotec/comms/CommsFactory.class     1680 1998/03/12 10:40:48  ----

  open( CAB, "<$CABFILE" ) || die $!;
  binmode( CAB );

  # Get the header
  # Calculate the structure size
  my $cfhdrLength = length( pack( $cfhdrTemplate, 0 ));
  cfhdrParse( saferead( $cfhdrLength ));

  print <<"EOH";

Microsoft (R) Cabinet Tool - Version 1.00.0601 (03/18/97)
Copyright (c) Microsoft Corp 1996-1997. All rights reserved.

Listing of cabinet file \'$CABFILE\' (size $cbCabinet):
   $cFiles file(s), $cFolders folder(s), set ID $setID, cabinet #$iCabinet

File name                      File size     Date      Time   Attrs
-----------------------------  ---------- ---------- -------- -----
EOH

  # Fix up the templates based on header values - if RESERVE_PRESENT is
  # set and cbCFFolder is non-zero, then each CFFolder structure has a
  # trailing array of reserved data.
  if (( $flags & $cfhdrRESERVE_PRESENT ) && $cbCFFolder > 0 ) {
	$cffolderTemplate .= "C$cbCFFolder";
  }

  for( my $cFoldersDone = 0; $cFoldersDone < $cFolders; $cFoldersDone++ ) {
	my $cffolderLength = length( pack( $cffolderTemplate, 0 ));

	# This stacks it into the storage list
	cffolderParse( saferead( $cffolderLength ));
  }

  # Seek to the start of the file blocks.
  my $tell = sysseek( CAB, 0, SEEK_CUR ); # TELL for sysr/w
  if ( $tell != $coffFiles ) {
	warn "Expected to be at $coffFiles, but we're at $tell. Resyncing...\n";
	die $! if ( !defined( sysseek CAB, $coffFiles, SEEK_SET ));
  }

  for( my $cFilesDone = 0; $cFilesDone < $cFiles; $cFilesDone ++ ) {
	my $cffileLength = length( pack( $cffileTemplate, 0 ));
	my $buffer = saferead( $cffileLength );

	# Add in the filename.
	my $fname = &safereadtonull;

	cffileParse( $buffer, $fname );

	my %foo = %{$cffileList[ $cFilesDone ]};

	printf( "   %- 29s% 9s %8s %6s  %4s\n", $foo{ 'szName'}, $foo{ 'cbFile' }, $foo{ 'date' }, $foo{ 'time' }, $foo{ 'attribs' } );

  }

  print "\n"; # DOS compatibility!
  close( CAB );
}

# ----------------------------------------------------------------------
sub cfhdrParse {
  my $buffer = shift;

  # Unpack various private data
  ( $signature,
	$reserverd1,
	$cbCabinet,
	$reserved2,
	$coffFiles,
	$reserved3,
	$versionMinor,
	$versionMajor,
	$cFolders,
	$cFiles,
	$flags,
	$setID,
	$iCabinet ) = unpack( $cfhdrTemplate, $buffer );

  if ( $flags & $cfhdrRESERVE_PRESENT ) {
	my $cfhdrRESERVELength = length( pack( $cfhdrRESERVETemplate, 0 ));
	my $optional = saferead( $cfhdrRESERVELength );
	( $cbCFhdr, $cbCFFolder, $cbCFData ) =
	  unpack( $cfhdrRESERVETemplate, $optional );
	$abReserveData = saferead( $cbCFhdr );
  } else {
	$cbCFhdr = 0;
	$cbCFFolder = 0;
	$cbCFData = 0;
	$abReserveData = undef;
  }

  if ( $flags & cfhdrPREV_CABINET ) {
	die "Not implemented!";
  } else {
	$szCabinetPrev = undef;
	$szDiskPrev = undef;
  }

  if ( $flags & cfhdrNEXT_CABINET ) {
	die "Not implemented!";
  } else {
	$szCabinetNext = undef;
	$szDiskNext = undef;
  }
}

sub cffolderParse {
  my $buffer = shift;
  my ( $coffCabStart, $cCFData, $typeCompress, $abReserveData ) =
	unpack( $cffolderTemplate, $buffer );
  my ( %data );

  # NB: abReserveData is actually an array.

  # Make a hashtable of the data
  $data{'coffCabStart'} = $coffCabStart;
  $data{'cCFData'} = $cCFData;
  $data{'typeCompress'} = $typeCompress;
  $data{'abReserveData'} = $abReserveData; # undef, probably.

  push @cffolderList, \%data;
}

sub cffileParse {
  my $buffer = shift;
  my $szName = shift;
  my ( $cbFile, $uoffFolderStart, $iFolder, $date, $time, $attribs )
	= unpack $cffileTemplate, $buffer;
  my $attribstring;

  my %data;

  $data{'cbFile'} = $cbFile;
  $data{'uoffFolderStart'} = $uoffFolderStart;
  $data{'iFolder'} = $iFolder;

  # Parse date
  my( $y, $m, $d );
  $y = 1980 + ( $date >> 9);
  $m = ( $date & 0x1ff ) >> 5;
  $d = $date & 0x001F;
  $data{'date'} = sprintf( "%04d/%02d/%02d", $y, $m, $d );

  # Parse time
  my( $h, $mn, $s );
  $h = $time >> 11;
  $mn = ( $time & 0x07ff) >> 5;
  $s = ( $time & 0x001f ) * 2;
  $data{'time'} = sprintf( "%02d:%02d:%02d", $h, $mn, $s );

  # Parse the attributes
  $attribstring = sprintf( "%s%s%s%s",
						   $attribs & attrRDONLY?"r":"-",
						   $attribs & attrARCH  ?"a":"-",
						   $attribs & attrHIDDEN?"h":"-",
						   $attribs & attrSYSTEM?"s":"-"
						 );

  $data{'attribs'} = $attribstring;

  # FIXME parse UTF/locale
  if ( $attribs & attrNAME_IS_UTF ) {
	$data{'szName'} = $szName;
  } else {
	$data{'szName'} = $szName;
  }

  push @cffileList, \%data;
}

sub cfdataParse {
  my $buffer = shift;
  my ($csum, $cbData, $cbUncomp ) = unpack $cfdataTemplate, $buffer;

  if ( $cfhdrRESERVE_PRESENT ) {
	die "Not implemented!";
  }

  # Shlurp up the bytes
  my $dataBlock = saferead( $cbData );
}

# FIXME generalise - safe handle + context text for errors
sub saferead {
  my $bytes = shift;

  my $tot = 0;
  my $buffer = "";
  my $chars = "";

  while( $tot < $bytes ) {
	my $nbytes = sysread( CAB, $chars, $bytes - $tot );
	die $! if !defined( $nbytes );
	die $! if ( $nbytes == -1 );
	die "EOF" if $nbytes == 0;
	$tot += $bytes;
	$buffer .= $chars;
  }

  return $buffer;
}

# FIXME generalise as above
sub safereadtonull {
  my $buffer = "";
  my $chars = "";

  while( 1 ) {
	my $nbytes = sysread( CAB, $chars, 1 );
	die $! if !defined( $nbytes );
	die $! if ( $nbytes == -1 );
	die "EOF" if $nbytes == 0;
	last if ( ord( $chars ) == 0 );
	$buffer .= $chars;
  }

  return $buffer;
}

sub usage {
# NB this is copied exactly from the version of cabarc it mentions in line 1.
  print <<"EOU";

Microsoft (R) Cabinet Tool - Version 1.00.0601 (03/18/97)
Copyright (c) Microsoft Corp 1996-1997. All rights reserved.

Usage: CABARC [options] command cabfile [\@list] [files] [dest_dir]

Commands:
   L   List contents of cabinet (e.g. cabarc l test.cab)
   N   Create new cabinet (e.g. cabarc n test.cab *.c app.mak *.h)
   X   Extract file(s) from cabinet (e.g. cabarc x test.cab foo*.c)

Options:
  -c   Confirm files to be operated on
  -o   When extracting, overwrite without asking for confirmation
  -m   Set compression type [LZX:<15..21>|MSZIP|NONE], (default is MSZIP)
  -p   Preserve path names (absolute paths not allowed)
  -P   Strip specified prefix from files when added
  -r   Recurse into subdirectories when adding files (see -p also)
  -s   Reserve space in cabinet for signing (e.g. -s 6144 reserves 6K bytes)
  -i   Set cabinet set ID when creating cabinets (default is 0)

Notes
-----
When creating a cabinet, the plus sign (+) may be used as a filename
to force a folder boundary; e.g. cabarc n test.cab *.c test.h + *.bmp

When extracting files to disk, the <dest_dir>, if provided, must end in
a backslash; e.g. cabarc x test.cab bar*.cpp *.h d:\\test\\

The -P (strip prefix) option can be used to strip out path information
e.g. cabarc -r -p -P myproj\\ a test.cab myproj\\balloon\\*.*
The -P option can be used multiple times to strip out multiple paths


EOU
  exit( 1 );
}

sub createcab {
  # XXX setting up for whenever I clean this code up
  @_ = @ARGV;
  shift; # first arg is the command which we've already parsed
  my $cabfile = shift or usage();
  my @files = @_ or usage();
  my ( @wanted, $tmp );
  my ( $size, $offset, $folders, $files, $offset2 ) = ( 0, 0, 0, 0 );

  # parse what we've been given into a list of files relative to CWD.
  map { -d $_ ? find( { wanted => sub {push @wanted, $File::Find::name if !-d $File::Find::name},
						follow => 1, no_chdir => 1 },
					  $_ ) : push @wanted, $_ } @files;

  open( CAB, ">$cabfile" ) or die "$cabfile: $!";
  binmode( CAB );

  $files = scalar( @wanted );
  $folders = 1; # XXX for now
  $offset = 0x2c; # XXX also for now
  $offset2 = 0;

  # since we're doing uncompressed cabinets, we can add the size of
  # each file as-is to the total cab size.
  map {
	my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$fsize,
		$atime,$mtime,$ctime,$blksize,$blocks)
	  = lstat($_);
	$size += $fsize;

	# also CFILE overhead
	$size += length( pack( $cffileTemplate, 0 )) +
	  length( $_ ) + 1; # NULL terminated filename

	# in order to figure out offset2 we just need the CFILE bit
	$offset2 += length( pack( $cffileTemplate, 0 )) +
	  length( $_ ) + 1; # NULL terminated filename

  } @wanted;

  # add in CFOLDER overhead
  $size += length( pack( $cffolderTemplate, 0 )); # XXX ignoring cbCFFolder

  # add in CFDATA header overhead (data length is included already)
  $size += length( pack( $cfdataTemplate, 0 ));

  # Cab file header
  $tmp = pack( $cfhdrTemplate,
			   "MSCF", # signature
			   0, # reserved
			   $size, # size
			   0, # reserved,
			   $offset, # offset to first CFFILE
			   0, # reserved,
			   MINOR_VER, # minor version
			   MAJOR_VER, # major version
			   $folders, # number of CFFOLDERS
			   $files, # number of CFFILEs
			   0, # flags - we're not supporting any of 'em.
			   0x622, # set ID - not using, either
			   0, # cabinet index - 0 = first
#			   0, # cbCFHEADER not supported
#			   0, # cbCFFOLDER not supported
#			   0, # cbCFData not supported,
#			   0, # abReserve not present.
			 );
  $size += length( $tmp );
  $tmp = pack( $cfhdrTemplate,
			   "MSCF", # signature
			   0, # reserved
			   $size, # size
			   0, # reserved,
			   $offset, # offset to first CFFILE
			   0, # reserved
			   MINOR_VER, # minor version
			   MAJOR_VER, # major version
			   $folders, # number of CFFOLDERS
			   $files, # number of CFFILEs
			   0, # flags - we're not supporting any of 'em.
			   0x622, # set ID - not using, either
			   0, # cabinet index - 0 = first
#			   0, # cbCFHEADER not supported
#			   0, # cbCFFOLDER not supported
#			   0, # cbCFData not supported,
#			   0, # abReserve not present.
			 );
  print CAB $tmp;

  # Add a folder
  $tmp = pack( $cffolderTemplate, $offset + $offset2,
			   1, # all data in this cabinet, or something
			   COMPRESSION_NONE );

  print CAB $tmp;

  # now add a CFFILE for each file
  $offset = 0; # reuse!
  for my $file ( @wanted) {
	my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$fsize,
		$atime,$mtime,$ctime,$blksize,$blocks)
	  = lstat($file);
	my ( $date, $time ) = unixdatetodos( $mtime );

	$tmp = pack( $cffileTemplate,
				 $fsize, # size in bytes
				 $offset,# offset in CDATA structure
				 0, # iFolder
				 $date,
				 $time,
				 attrARCH,
			   );
	$tmp .= $file . chr(0);
	print CAB $tmp;

	$offset += $fsize;
  }

  # and finally add the CFDATA structure containing the files.
  print CAB pack( $cfdataTemplate,
				  0, # checksum - setting to zero means ignore
				  $offset, # size of block compressed
				  $offset, # size of block uncompressed
				);

  for my $file ( @wanted ) {
	my $contents;
	open( FILE, "<$file" ) or die "$file: $!";
	binmode( FILE );
	{
	  local $/ = undef;
	  $contents = <FILE>;
	}
	close( FILE );
	print CAB $contents;
  }

  close( CAB );
}

# Date/Time conversion
sub unixdatetodos {
  my $indate = shift;
  my ( $date, $time );
  my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) =
	gmtime($indate);
  $date = (( $year - 80 ) << 9 ) + (( $mon + 1 ) << 5) + $mday;
  $time = ( $hour << 11 ) + ( $min << 5 ) + int( $sec / 2 );

  ( $date, $time );
}

# unfinished
sub dosdatetounix {
  my ( $indate, $intime ) = @_;

  my( $y, $m, $d );
  $y = 1980 + ( $indate >> 9);
  $m = ( $indate & 0x1ff ) >> 5;
  $d = $indate & 0x001F;

  my( $h, $mn, $s );
  $h = $intime >> 11;
  $mn = ( $intime & 0x07ff) >> 5;
  $s = ( $intime & 0x001f ) * 2;
}
