#!/usr/bin/perl
#
# $Id$
#
use strict;
use Getopt::Std;
use File::Find;

my $VERSION = '2.8.2';


my %opts;
getopts('Vhdsva:b:e:m:w:',\%opts);

my $archive_dir = $opts{a};
my $verbose     = $opts{v};
my $dry_run     = $opts{d};
my $export_dir  = $opts{e};
my $maxfiles    = $opts{m};
my $skipdefect  = $opts{s};

if ($opts{V}) {
    print "benno-bennobox2eml version $VERSION\n";
    exit 0;
}

help_exit() if $opts{h};
help_exit() if not ($archive_dir);

my $f_boxstate  = $opts{b} || "$archive_dir/boxstate.xml";

use warnings;
if (! -f "$f_boxstate") {
    print STDERR "Boxstate \"$f_boxstate\" not found! Repopath incorrect?\n";
    exit 1;
}
find({ wanted => \&process_archive, follow => 1}, ($archive_dir));


### SUBS ####

### process_archive
sub process_archive
{
    my $filename = $File::Find::name;
    my $dirname  = $File::Find::dir;

    if ($filename eq $dirname)   { return undef; }  # directory
    if ($filename !~ /\.gz$/)    { return undef; }  # not a gzipped file
    if ($dirname =~ m!/journal!) { return undef; }  # journal subdir
    my $gzfile = $_;

    my $dirprefix;
    (my $boxfilepath = $dirname) =~ s/^$archive_dir//;
    if ($boxfilepath) {
        #($dirprefix) = $boxfilepath =~ /([^\/]+)/;
        ($dirprefix = $boxfilepath) =~ s/\///g;
    }

    if (!$dirprefix) {
        print STDERR "Dirname: $dirname\n";
        print STDERR "Cannot extract box prefix. Please set with -p\n";
        exit 1;
    }
    $dirprefix =~ s/\//:/;          # boxprefix
    $dirprefix =~ s/\///g;          # remove slashes

    # Fetch up to $maxfiles in inbox
    if ($maxfiles) {
        my $inbox_count = inbox_count($export_dir,'.+\.eml');
        if ($inbox_count > $maxfiles) {
            my $waitfactor = $inbox_count - $maxfiles;
            my $waitnext = $waitfactor * 0.5;
            select(undef,undef,undef,$waitnext);
        }
    }

    process_file($gzfile,$dirprefix,$archive_dir,$export_dir);
}


### 
#
# check number of inboxfiles
#
sub inbox_count
{
    my ($inboxdir,$filepattern) = @_;
    my $counter = 0;

    opendir INBOXDIR, $inboxdir or die "Cannto open $inboxdir: $!";
    while (my $filename = readdir INBOXDIR) {
        next unless $filename =~ /^$filepattern$/;
        $counter++;
    }
    close INBOXDIR;
    return $counter;
}


### process_file
sub process_file
{
    my ($gzfile,$dirprefix,$archive_dir,$export_dir) = @_;

    my $benno_hash;
    my @recipient_headers;

    verbose("Read $File::Find::name");
    my $hash_re      = qr/^===== Hash:\s(.+)$/;
    my $sender_re    = qr/^Sender:.+?(\S+\@\S+)/;
    my $recipient_re = qr/^Recipient:.+?(\S+\@\S+)/;
    my $defective_re = qr/Defective: YES/;

    # uncompress file to pipe
    open(FILE, "gzip -c -d $gzfile|") || print STDERR "Cannot unzip file. $!\n";
    my $outdir = $File::Find::dir;
    if ($export_dir) {
        if (! -d $export_dir) {
            print STDERR "No directory: $export_dir\n";
            exit 1;
        }
        my $filename = $File::Find::dir.$gzfile;
        $filename =~ s/^$archive_dir//;
        $filename =~ s/\//-/g;
        $filename =~ s/^-//;
        $outdir = $export_dir.'/';
    }
    my ($fileprefix) = $gzfile =~ /^(.+?)\.gz$/;
    my $tmpfile = $outdir.$dirprefix.$fileprefix.'.tmp';
    my $outfile = $outdir.$dirprefix.$fileprefix.'.eml';

    if (!$dry_run) {
        open(TMPF, ">$tmpfile") || print STDERR "Cannot open : $!\n";
    }

    my $error;
    my $sender_set;
    my $recipient_set;
    my $sender = '';
    foreach my $line (<FILE>) {
        if ($line =~ $defective_re) {
            print STDERR "DEFECTIVE: $File::Find::name\n";
            verbose("DEFECTIVE: $File::Find::name");
            last if $skipdefect;
        }
        if ($line =~ $sender_re)    { $sender = $1; $sender_set = 1; }
        if ($line =~ $recipient_re) {
            push @recipient_headers, $1;
            $recipient_set = 1;
        }

        if ($line =~ $hash_re) {
            $benno_hash = $1;

            my @recipients;
            foreach my $address (@recipient_headers) {
                $address =~ s/[<>]//g;
                push @recipients,$address;
            }

            $sender = 'NOTSET' unless $sender_set;
            if (!$dry_run) {
                print TMPF "X-REAL-MAILFROM: $sender\n";
                foreach my $recipient (@recipients) {
                    print TMPF "X-REAL-RCPTTO: $recipient\n";
                }
            }

            verbose("Checksum: $benno_hash");
            verbose("Sender: $sender");
            verbose("Recipients: @recipients");
            next;
        }

        if(!$benno_hash)  { next; }
        if(!$dry_run)     { print TMPF $line; }
    }
    if ($error) {
        print STDERR "Mail error: $error ($File::Find::name)\n";
        verbose("ERROR: Mail error $error ($File::Find::name)\n");
        if (!$dry_run) {
            close TMPF or warn "Cannot close $tmpfile. $!\n";
            unlink $tmpfile;
        }
        return $error;
    }

    if (!$benno_hash) {
        $outfile =~ s/\.eml$/.err/;
    }
    if (!$dry_run) {
        close TMPF or warn "Cannot close $tmpfile. $!\n";
        if (link($tmpfile, $outfile)) {
            unlink $tmpfile;
        }
        else {
            print STDERR "Cannot link to outfile $outfile. $!\n";
        }
    }
    print "Write $outfile\n";
    verbose("\n");
}


### verbose
sub verbose
{
    my $msg = shift;
    if ($verbose) {
        print $msg, "\n";
    }
}


### help_exit()
sub help_exit
{
    print "Usage: $0 [-h] [-d] [-v] [-m <num>] -a <archive directory>\n";
    print "       [-e <export directory]\n";
    print "\n";
    print "    -a          archive (repo) directory\n";
    print "    -b          boxstate file (default \$archive_dir/boxstate.xml)\n";
    print "    -e          export files to directory (default current archive directory)\n";
    print "    -d          dry run\n";
    print "    -s          skip defective marked mails\n";
    print "    -m <num>    max files in inbox directory\n";
    print "    -v          verbose\n";
    print "    -V          print version\n";
    print "\n";
    
    exit 1;
}
