package Clair::MEAD::Wrapper;

use strict;
use Clair::Cluster;
use Clair::Config;
use lib "$MEAD_HOME/lib";
use Clair::MEAD::DocsentConverter;
use MEAD::SentFeature;
use MEAD::Extract;
use MEAD::Document;
use Clair::MEAD::Summary;

sub new {

    my $class = shift;
    my %parameters = @_;

    $parameters{cluster_dir} = "temp.mead" 
        unless (defined $parameters{cluster_dir});

    $parameters{cluster_name} = $parameters{cluster_dir};
    $parameters{cluster_name} =~ s/\/?[^\/]+\///g;

#    die "'mead_home' is a required parameter" 
    $parameters{mead_home} = $MEAD_HOME
        unless (defined $parameters{mead_home});

    $parameters{options} = []; 
    if (defined $parameters{cluster}) {
        my $converter = Clair::MEAD::DocsentConverter->new( 
            dest => $parameters{cluster_dir} 
        );
        $converter->add_cluster($parameters{cluster});
        $converter->convert();
    } else {
        die "'cluster' is a required parameter";
    }

    my $self = bless \%parameters, $class;
    return $self;

}

sub clear_options {
    my $self = shift;
    $self->{options} = [];
}

sub add_option {
    my $self = shift;
    my $option = shift;
    push @{ $self->{options} }, $option;
}

sub run_mead {
    my $self = shift;

    my $dest = $self->{cluster_dir};
    my $mead_home = $self->{mead_home};
    my $mead_script = "$mead_home/bin/mead.pl";

    my $command = $mead_script;
    for (@{ $self->{options} }) {
        $command .= " $_";
    }
    $command .= " $dest 2>/dev/null";
    #$command .= " $dest"; 

    my $output = `$command`;
    if (!$output) {
        die "Mead command '$command' returned nothing";
    }
    my @sents = split /\s*\[\d+\]\s*/, $output;
    shift @sents;
    for (@sents) {
        chomp;
    }
    $self->{summary} = \@sents;
    $self->{ran_mead} = 1;
    return @sents;
}

sub get_extract {

    my $self = shift;

    my $dest = $self->{cluster_dir};
    my $mead_home = $self->{mead_home};
    my $mead_script = "$mead_home/bin/mead.pl";

    my $command = $mead_script;
    for (@{ $self->{options} }) {
        $command .= " $_";
    }
    $command .= " -extract $dest > $dest/extract.xml 2> /dev/null"; 
    if (system($command)) {
        die "Error running mead command: $command";
    }
    $self->{ran_mead} = 1;

    my $extract = read_extract("$dest/extract.xml");
    my @list;
    my @fnames = $self->get_feature_names();
    my %feats;
    foreach my $fname (@fnames) {
        my %feature = $self->get_feature($fname);
        $feats{$fname} = \%feature;
    }

    my %docs;
    foreach my $key (sort keys %$extract) {
        my $did = $extract->{$key}->{DID};
        my $sno = $extract->{$key}->{SNO};

        my $sents = $docs{$did};
        unless ($sents) {
            $sents = read_document($did, "$dest/docsent/$did.docsent");
            $docs{$did} = $sents;
        }

        $sents->[$sno]->{FEATURES} = {};
        foreach my $fname (@fnames) {
            $sents->[$sno]->{FEATURES}->{$fname} 
                = $feats{$fname}->{$did}->[$sno - 1];
        }
        push @list, $sents->[$sno];
    }

    return @list;

}

sub get_summary {

    my $self = shift;
    my @extract = $self->get_extract();
    use Data::Dumper;
    Dumper(\@extract);

    my $summary = Clair::MEAD::Summary->new(\@extract);
    return $summary;

}

sub get_dids {
    my $self = shift;
    
    my $docsent_dir = "$self->{cluster_dir}/docsent";
    my @files = glob("$docsent_dir/*.docsent") 
        or die "Couldn't read docsent dir: $!";
    for (@files) {
        s/\.docsent$//g;
        s/\/?[^\/]+\///g;
    }
    return @files;

}

sub get_feature_names {
    my $self = shift;

    die "Call to get_feature_names without call to run_mead"
        unless ($self->{ran_mead});

    my %features;
    my $cluster_dir = $self->{cluster_dir};
    my $cluster_name = $self->{cluster_name};
    my $feature_dir = "$cluster_dir/feature";
    my @files = glob("$feature_dir/*.sentfeature") 
        or die "Couldn't read feature dir: $!\n";;
    for (@files) {
        s/\/?[^\/]+\///g;
        if (/^$cluster_name\.(.+?)\.sentfeature$/) {
            $features{$1} = 1;
        } else {
            warn "Unexpected feature file: $_";
        }
    }
    return keys %features;
}

sub get_feature {

    my $self = shift;
    my $feature_name = shift;

    die "Call to get_sentfeatures without call to run_mead" 
        unless ($self->{ran_mead});

    my $cluster = $self->{cluster_dir};
    my $cluster_name = $self->{cluster_name};
    my $path = "$cluster/feature/$cluster_name.$feature_name.sentfeature";
    my %feature = read_sentfeature($path);

    foreach my $did (keys %feature) {
        my $listref = $feature{$did};
        for (my $i = 0; $i < @$listref; $i++) {
            my $hashref = $$listref[$i];
            $$listref[$i] = $hashref->{$feature_name};
        }
        shift @$listref;
    }
    return %feature;

}

=head1 NAME

Clair::MEAD::Wrapper - A perl module wrapper for MEAD

head1 VERSION

Version 0.01

=cut

our $VERSION = '0.01';

=head1 SYNOPSIS

    use Clair::Cluster;
    use Clair::MEAD::Wrapper;
    my $cluster = Clair::Cluster->new();
    # ...
    my $mead = Clair::MEAD::Wrapper->new(
        mead_home => "/path/to/mead",
        cluster => $cluster,
        cluster_dir => "my_cluster"
    );
    $mead->add_option("-s -p 5"); # Generate a 5% summary
    my @sentences = $mead->run_mead();

=head1 METHODS

=cut


=head2 new(%params)
    
    Clair::MEAD::Wrapper->new( mead_home => ..., cluster => ..., cluster_dir => ... )

Creates a new Wrapper instance. This takes two required parameters: 
'mead_home' a path to a MEAD instance and 'cluster' a Clair::Cluster
object containing the documents to summarize. The cluster_dir is an optional
parameter that defines where the Mead cluster directory will be stored. This
defaults to 'temp.mead'.

=cut


=head2 add_option($option)
    
    $mead->add_option("-s -p 5");

Adds $option to the executed command. These are concatenated together and
added to the MEAD command. The wrapper expects a summary from MEAD, so 
options like "-centroid" shouldn't be used.

=cut


=head2 clear_options()

    $mead->clear_options();

Clears the command line options.

=cut


=head2 run_mead()

    @summary = $mead->run_mead();

Returns a summary in the form of an array of string sentences.

=cut

=head2 get_dids()

    @did_list = $mead->get_dids();

Returns an array of the document IDs in this Mead cluster. This method
will die if called when Mead has not yet been run.

=cut

=head2 get_feature_names()

    @fnames = $mead->get_feature_names();

Returns an array containing all of the features that have been computed. This 
method will die if called when Mead has not yet been run.

=cut

=head2 get_feature($fname)

    my %feature = $mead->get_feature("Centroid");
    my $did = "fed1.txt";
    my $sno = 2;
    my $score = $feature{$did}->[$sno]; # Returns the score of the 3rd sent.

Returns the scores of a given feature for each document in the cluster. The
result is a hashmap mapping document IDs to array references. Each array 
contains the scores of the sentences in order. This method will die if called 
when Mead has not yet been run.

=head2 get_extract

    my @extract = $mead->get_extract();
    foreach my $i (0 .. $#extract) {

        my $rank = $i + 1;
        my ($did, $sno, $rsnt, $par, $text, $feats) = (
            $extract[$i]->{DID},
            $extract[$i]->{SNO},
            $extract[$i]->{RSNT},
            $extract[$i]->{PAR},
            $extract[$i]->{TEXT},
            $extract[$i]->{FEATURES},
        );

        print "Sentence $rank ($did.$sno): $text\n";
        foreach my $fname (keys %$feats) {
            print "\t$fname => $feats->{$fname}\n";
        }

    }

Returns a list of sentences returned by MEAD. The sentences are ordered by
rank. Each sentence is a hashref. Each hashref will map DID, SNO, RSNT, PAR,
TEXT, and FEATURES to their respective values. FEATURES is itself a hashref
mapping feature names to values. This method is an alternative to run_mead().


=head2 get_summary

    my $summary = $mead->get_summary();
    print $summary->to_string . "\n";

Returns a Clair::MEAD::Summary object.

=cut

1;
