#!/usr/local/bin/perl

###############################################################################
# Program     : GetGenotypes
# Author      : Eric Deutsch <edeutsch@systemsbiology.org>
# $Id: GetGenotypes,v 1.4 2003/09/18 05:51:02 edeutsch Exp $
#
# Description : This CGI program that allows users to
#               browse through genotypes.
#
###############################################################################


###############################################################################
# Set up all needed modules and objects
###############################################################################
use strict;
use Getopt::Long;
use FindBin;

use lib "$FindBin::Bin/../../lib/perl";
use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username
             $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE
             $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME
             @MENU_OPTIONS);

use SBEAMS::Connection;
use SBEAMS::Connection::Settings;
use SBEAMS::Connection::Tables;

use SBEAMS::SNP;
use SBEAMS::SNP::Settings;
use SBEAMS::SNP::Tables;

$sbeams = new SBEAMS::Connection;
$sbeamsMOD = new SBEAMS::SNP;
$sbeamsMOD->setSBEAMS($sbeams);
$sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR);


use CGI;
use CGI::Carp qw(fatalsToBrowser croak);
$q = new CGI;


###############################################################################
# Set program name and usage banner for command like use
###############################################################################
$PROG_NAME = $FindBin::Script;
$USAGE = <<EOU;
Usage: $PROG_NAME [OPTIONS] key=value key=value ...
Options:
  --verbose n         Set verbosity level.  default is 0
  --quiet             Set flag to print nothing at all except errors
  --debug n           Set debug flag

 e.g.:  $PROG_NAME [OPTIONS] [keyword=value],...

EOU

#### Process options
unless (GetOptions(\%OPTIONS,"verbose:s","quiet","debug:s")) {
  print "$USAGE";
  exit;
}

$VERBOSE = $OPTIONS{"verbose"} || 0;
$QUIET = $OPTIONS{"quiet"} || 0;
$DEBUG = $OPTIONS{"debug"} || 0;
if ($DEBUG) {
  print "Options settings:\n";
  print "  VERBOSE = $VERBOSE\n";
  print "  QUIET = $QUIET\n";
  print "  DEBUG = $DEBUG\n";
}


###############################################################################
# Set Global Variables and execute main()
###############################################################################
main();
exit(0);



###############################################################################
# Main Program:
#
# Call $sbeams->Authenticate() and exit if it fails or continue if it works.
###############################################################################
sub main {

  #### Do the SBEAMS authentication and exit if a username is not returned
  exit unless ($current_username = $sbeams->Authenticate(
    permitted_work_groups_ref=>['SNP','Admin'],
    #connect_read_only=>1,
    #allow_anonymous_access=>1,
  ));


  #### Read in the default input parameters
  my %parameters;
  my $n_params_found = $sbeams->parse_input_parameters(
    q=>$q,parameters_ref=>\%parameters);
  #$sbeams->printDebuggingInfo($q);


  #### Decide what action to take based on information so far
  if ($parameters{action} eq "???") {
    # Some action
  } else {
    $sbeamsMOD->display_page_header();
    handle_request(ref_parameters=>\%parameters);
    $sbeamsMOD->display_page_footer();
  }


} # end main



###############################################################################
# Handle Request
###############################################################################
sub handle_request {
  my %args = @_;


  #### Process the arguments list
  my $ref_parameters = $args{'ref_parameters'}
    || die "ref_parameters not passed";
  my %parameters = %{$ref_parameters};


  #### Define some generic varibles
  my ($i,$element,$key,$value,$line,$result,$sql);


  #### Define some variables for a query and resultset
  my %resultset = ();
  my $resultset_ref = \%resultset;
  my (%url_cols,%hidden_cols,%max_widths,$show_sql);


  #### Read in the standard form values
  my $apply_action  = $parameters{'action'} || $parameters{'apply_action'};
  my $TABLE_NAME = $parameters{'QUERY_NAME'};


  #### Set some specific settings for this program
  my $CATEGORY="Get Genotypes";
  $TABLE_NAME="SN_GetGenotypes" unless ($TABLE_NAME);
  ($PROGRAM_FILE_NAME) =
    $sbeamsMOD->returnTableInfo($TABLE_NAME,"PROGRAM_FILE_NAME");
  my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME";


  #### Get the columns and input types for this table/query
  my @columns = $sbeamsMOD->returnTableInfo($TABLE_NAME,"ordered_columns");
  my %input_types = 
    $sbeamsMOD->returnTableInfo($TABLE_NAME,"input_types");


  #### Read the input parameters for each column
  my $n_params_found = $sbeams->parse_input_parameters(
    q=>$q,parameters_ref=>\%parameters,
    columns_ref=>\@columns,input_types_ref=>\%input_types);


  #### If the apply action was to recall a previous resultset, do it
  my %rs_params = $sbeams->parseResultSetParams(q=>$q);
  if ($apply_action eq "VIEWRESULTSET") {
    $sbeams->readResultSet(resultset_file=>$rs_params{set_name},
        resultset_ref=>$resultset_ref,query_parameters_ref=>\%parameters);
    $n_params_found = 99;
  }


  #### Set some reasonable defaults if no parameters supplied
  unless ($n_params_found) {
  }


  #### Apply any parameter adjustment logic
  #none


  #### Display the user-interaction input form
  $sbeams->display_input_form(
    TABLE_NAME=>$TABLE_NAME,CATEGORY=>$CATEGORY,apply_action=>$apply_action,
    PROGRAM_FILE_NAME=>$PROGRAM_FILE_NAME,
    parameters_ref=>\%parameters,
    input_types_ref=>\%input_types,
  );


  #### Display the form action buttons
  $sbeams->display_form_buttons(TABLE_NAME=>$TABLE_NAME);


  #### Finish the upper part of the page and go begin the full-width
  #### data portion of the page
  $sbeams->display_page_footer(close_tables=>'YES',
    separator_bar=>'YES',display_footer=>'NO');




  #########################################################################
  #### Process all the constraints

  #### Build PROJECT_ID constraint
  my $project_id_clause = $sbeams->parseConstraint2SQL(
    constraint_column=>"ERV.project_id",
    constraint_type=>"text_list",
    constraint_name=>"Project ID",
    constraint_value=>$parameters{project_id} );
  return if ($project_id_clause == -1);


  #### Build SNP_ACCESSION constraint
  my $assay_id_clause = $sbeams->parseConstraint2SQL(
    constraint_column=>"ERV.assay_id",
    constraint_type=>"plain_text",
    constraint_name=>"Assay ID",
    constraint_value=>$parameters{assay_id_constraint} );
  return if ($assay_id_clause == -1);


  #### Build CALL QUALITY constraint
  my $call_quality_clause = $sbeams->parseConstraint2SQL(
    constraint_column=>"ERV.description",
    constraint_type=>"text_list",
    constraint_name=>"Call Quality",
    constraint_value=>$parameters{description} );
  return if ($call_quality_clause == -1);



  #### Build ROWCOUNT constraint
  $parameters{row_limit} = 50000
    unless ($parameters{row_limit} > 0);
  my $limit_clause = "TOP $parameters{row_limit}";
  #### Disable LIMIT clause
  $limit_clause = "";


  #### Define the desired columns in the query
  #### [friendly name used in url_cols,SQL,displayed column title]
  my @column_array = (
    ["project_id","ERV.project_id","Project ID"],
    ["assay_id","ERV.assay_id","Assay Name"],
    ["sample_id","ERV.sample_id","Sample Name"],
    ["genotype_id","ERV.genotype_id","Genotype"],
  );


  #### Set flag to display SQL statement if user selected
  if ( $parameters{display_options} =~ /ShowSQL/ ) {
    $show_sql = 1;
  }


  #### Build the columns part of the SQL statement
  my %colnameidx = ();
  my @column_titles = ();
  my $columns_clause = $sbeams->build_SQL_columns_list(
    column_array_ref=>\@column_array,
    colnameidx_ref=>\%colnameidx,
    column_titles_ref=>\@column_titles
  );


  #### Define the SQL statement
  $sql = qq~
SELECT $limit_clause $columns_clause
  FROM SNP.dbo.export_results_view ERV
 WHERE 1=1
$project_id_clause
$assay_id_clause
$call_quality_clause
   AND genotype_id IS NOT NULL
 ORDER BY project_id,assay_id,sample_id,call_date
  ~;


  #### Certain types of actions should be passed to links
  my $pass_action = "QUERY";
  $pass_action = $apply_action if ($apply_action =~ /QUERY/i);


  #### Define the hypertext links for columns that need them
  %url_cols = ('set_tag' => "$CGI_BASE_DIR/SNP/ManageTable.cgi?TABLE_NAME=biosequence_set&biosequence_set_id=\%$colnameidx{biosequence_set_id}V",
               'accession' => "\%$colnameidx{uri}V\%$colnameidx{accesssion}V",
  );


  #### Define columns that should be hidden in the output table
  %hidden_cols = ('biosequence_set_id' => 1,
                  'uri' => 1,
  );



  #########################################################################
  #### If QUERY or VIEWRESULTSET was selected, display the data
  if ($apply_action =~ /QUERY/i || $apply_action eq "VIEWRESULTSET") {

    #### If the action contained QUERY, then fetch the results from
    #### the database
    if ($apply_action =~ /QUERY/i) {

      #### Show the SQL that will be or was executed
      $sbeams->display_sql(sql=>$sql) if ($show_sql);

      #### Fetch the results from the database server
      $sbeams->fetchResultSet(sql_query=>$sql,
        resultset_ref=>$resultset_ref);


      #### Post process the resultset
      postProcessResultset(rs_params_ref=>\%rs_params,
        resultset_ref=>$resultset_ref,query_parameters_ref=>\%parameters,
      );

      #### Store the resultset and parameters to disk resultset cache
      $rs_params{set_name} = "SETME";
      $sbeams->writeResultSet(resultset_file_ref=>\$rs_params{set_name},
        resultset_ref=>$resultset_ref,query_parameters_ref=>\%parameters);
    }

    #### Display the resultset
    @column_titles = @{$resultset_ref->{column_list_ref}};
    $sbeams->displayResultSet(rs_params_ref=>\%rs_params,
        url_cols_ref=>\%url_cols,hidden_cols_ref=>\%hidden_cols,
        max_widths=>\%max_widths,resultset_ref=>$resultset_ref,
        column_titles_ref=>\@column_titles,
        base_url=>$base_url,query_parameters_ref=>\%parameters,
    );


    #### Display the resultset controls
    $sbeams->displayResultSetControls(rs_params_ref=>\%rs_params,
        resultset_ref=>$resultset_ref,query_parameters_ref=>\%parameters,
        base_url=>$base_url);


  #### If QUERY was not selected, then tell the user to enter some parameters
  } else {
    if ($sbeams->invocation_mode() eq 'http') {
      print "<H4>Select parameters above and press QUERY</H4>\n";
    } else {
      print "You need to supply some parameters to contrain the query\n";
    }
  }


} # end handle_request



###############################################################################
# evalSQL
#
# Callback for translating Perl variables into their values,
# especially the global table variables to table names
###############################################################################
sub evalSQL {
  my $sql = shift;

  return eval "\"$sql\"";

} # end evalSQL


###############################################################################
# postProcessResultset
#
# Perform some additional processing on the resultset that would otherwise
# be very awkward to do in SQL.
###############################################################################
sub postProcessResultset {
  my %args = @_;

  my ($i,$element,$key,$value,$line,$result,$sql);

  #### Process the arguments list
  my $resultset_ref = $args{'resultset_ref'};
  my $rs_params_ref = $args{'rs_params_ref'};
  my $query_parameters_ref = $args{'query_parameters_ref'};

  my %rs_params = %{$rs_params_ref};
  my %parameters = %{$query_parameters_ref};


  #### Get the predicted locations of the rank list file
  my $order_file = "/users/kdeutsch/examples/oracle/picked-sorted.tsv:chr10zzz";


  #### Read in order file and create hash out of its contents
  my %snphash;
  if (open(ASSAYFILE,"$order_file")) {
    while (<ASSAYFILE>) {
      chomp;
      next if ($_ =~ /pos/);
      my ($snp_id,$snp_pos) = split("\t",$_);
      $snphash{$snp_pos}=$snp_id;
    }
  }


  #### Get the column indices
  my $project_id_index = $resultset_ref->{column_hash_ref}->{project_id};
  my $sample_id_index = $resultset_ref->{column_hash_ref}->{sample_id};
  my $assay_id_index = $resultset_ref->{column_hash_ref}->{assay_id};
  my $genotype_id_index = $resultset_ref->{column_hash_ref}->{genotype_id};

  #### Create a data structure for all values
  my ($project_id,$sample_id,$assay_id,$genotype_id);
  my $data;
  my @query_assays = ();
  my %dup_stats;
  my %samples;

  #### Loop over each row in the resultset
  my $n_rows = scalar(@{$resultset_ref->{data_ref}});
  for (my $row=0;$row<$n_rows-1; $row++) {

    $project_id = $resultset_ref->{data_ref}->[$row]->[$project_id_index];
    $sample_id = $resultset_ref->{data_ref}->[$row]->[$sample_id_index];
    $assay_id = $resultset_ref->{data_ref}->[$row]->[$assay_id_index];
    $genotype_id = $resultset_ref->{data_ref}->[$row]->[$genotype_id_index];
    $samples{$sample_id} = 1;

    $genotype_id =~ s/\./\//;
    $genotype_id="C/C" if ($genotype_id eq "C");
    $genotype_id="A/A" if ($genotype_id eq "A");
    $genotype_id="G/G" if ($genotype_id eq "G");
    $genotype_id="T/T" if ($genotype_id eq "T");

    #### Store the genotype with great care about collisions
    #### Does this one exist already?
    if (exists($data->{$assay_id}->{$sample_id})) {
      #### Is it defined?
      if (defined($data->{$assay_id}->{$sample_id})) {
        #### If is the same?
        if ($data->{$assay_id}->{$sample_id} eq $genotype_id) {
          if ($genotype_id eq '') {
	    $dup_stats{no_replace_empty_with_empty}++;
	  } else {
  	    $dup_stats{no_replace_with_same}++;
	  #print "$sample_id: $genotype_id is same as $data->{$assay_id}->{$sample_id}.... ";
	  }
	} elsif ($data->{$assay_id}->{$sample_id} eq '') {
	  $dup_stats{replace_an_empty}++;
          $data->{$assay_id}->{$sample_id} = $genotype_id;
	} elsif ($genotype_id eq '') {
	  $dup_stats{no_replace_with_empty}++;
	} else {
	  $dup_stats{no_replace_conflict}++;
	  #print "$genotype_id is not $data->{$assay_id}->{$sample_id}.... ";
	}
      } else {
        $dup_stats{replace_an_undef}++;
        $data->{$assay_id}->{$sample_id} = $genotype_id;
      }

    #### Else just add it
    } else {
      $data->{$assay_id}->{$sample_id} = $genotype_id;
    }

    push(@query_assays,$assay_id) unless ($query_assays[-1] eq $assay_id);

  }

  #### Print out some stats
  foreach my $element (keys %dup_stats) {
    print "$element = $dup_stats{$element}<BR>\n";
  }


  #### Write out a gap file
  open(GAPFILE,">/users/kdeutsch/examples/oracle/SBEAMS.gapfile");
  my $ppos = 0;
  my $position;
  my @assays;

  #### If there's a snphash
  if (defined(%snphash)) {
    foreach $position (sort keys %snphash) {
      my $assay = $snphash{$position};
      if (defined($data->{$assay})) {
  	print GAPFILE "\t",($position-$ppos),"\n" if ($ppos);
  	push (@assays,$assay);
  	print GAPFILE "$assay\t$position";
  	$ppos = $position;
      }
    }
    print GAPFILE "\t0\n";

  #### Else write out just the observed snps
  } else {
    foreach my $assay (@query_assays) {
      push(@assays,$assay);
      print GAPFILE "$assay\t0\t0\n";
    }
  }
  close(GAPFILE);



  my $n_rows = scalar(@{$resultset_ref->{data_ref}});
  my @new_data_array;


  #### Create a has to hold allele_frequencies
  my %allele_frequencies;


  #### Create new resultset
  if (0 == 1) {
    foreach my $letter ( 'A','B','C','D','E','F','G' ) {
      for (my $num1 = 1; $num1 <=12; $num1++) {
  	for (my $num2 = 0; $num2 <= 84; $num2+=12) {
  	  my $fac = ($num1 + $num2);
  	  my $sample_name = "$letter$fac";
  	  my @row = ($sample_name);
  	  foreach my $assay (@assays) {
  	    if (defined($data->{$assay}->{$sample_name})) {
  	      push(@row,$data->{$assay}->{$sample_name});

  	      #### Add to the allele counters
  	      my @alleles = split("/",$data->{$assay}->{$sample_name});
  	      foreach my $allele (@alleles) {
  		$allele_frequencies{$assay}->{$allele}++;
  		$allele_frequencies{$assay}->{total}++;
  	      }

  	    } else {
  	      #push(@row,'0/0');
  	      push(@row,'');
  	    }

  	  }

  	  push(@new_data_array,\@row);

  	}

      }

    }

  } else {
    my @samples = sort(keys(%samples));
    foreach my $sample_name (@samples) {
      my @row = ($sample_name);
      foreach my $assay (@assays) {
    	if (defined($data->{$assay}->{$sample_name})) {
    	  push(@row,$data->{$assay}->{$sample_name});

    	  #### Add to the allele counters
    	  my @alleles = split("/",$data->{$assay}->{$sample_name});
    	  foreach my $allele (@alleles) {
    	    $allele_frequencies{$assay}->{$allele}++;
    	    $allele_frequencies{$assay}->{total}++;
    	  }

    	} else {
    	  #push(@row,'0/0');
    	  push(@row,'');
    	}

      }

      push(@new_data_array,\@row);

    }

  }


  #### Add an allele frequency summary at the bottom
  my $n_failed_assays = 0;
  foreach my $allele ('A','T','G','C') {
    my @row = ("$allele frequency");
    foreach my $assay (@assays) {
      if (defined($allele_frequencies{$assay}->{$allele})) {
        my $newval = sprintf("%.2f",$allele_frequencies{$assay}->{$allele} /
                             $allele_frequencies{$assay}->{total});
	if ($newval >= 0.90) {
	  #$failed_assay{$assay} = $newval;
	  print "Assay $assay failed.  Major frequency too high: $newval<BR>\n";
	  $n_failed_assays++;
	}
  	push(@row,$newval);
      } else {
        push(@row,'');
      }
    }
    push(@new_data_array,\@row);
  }
  print "$n_failed_assays total failed assays<BR>\n" if ($n_failed_assays);
  print scalar(@assays)." total assays<BR><BR>\n";


  #### Fudge the assays
  my @fudged_assays;
  foreach my $tmp (@assays) {
    $tmp =~ s/gi//;
    push(@fudged_assays,$tmp);
  }



  $resultset_ref->{data_ref} = \@new_data_array;
  $resultset_ref->{column_list_ref} = ['Sample',@fudged_assays];
  $resultset_ref->{precisions_list_ref} = [ (50) x (scalar(@assays)+1) ];


  #### Print out some debugging information about the returned resultset:
  if (0 == 1) {
    my $HTML = "<br>\n";
    print "<BR><BR>resultset_ref = $resultset_ref$HTML\n";
    while ( ($key,$value) = each %{$resultset_ref} ) {
      printf("%s = %s$HTML\n",$key,$value);
    }
    #print "columnlist = ",
    #  join(" , ",@{$resultset_ref->{column_list_ref}}),"<BR>\n";
    print "nrows = ",scalar(@{$resultset_ref->{data_ref}}),"$HTML\n";
    print "rs_set_name=",$rs_params{set_name},"$HTML\n";
 }

  return 1;



} # end postProcessResult
