#!/usr/local/bin/perl

###############################################################################
# Program     : GetExpression
# Author      : Eric Deutsch <edeutsch@systemsbiology.org>
# $Id: GetExpression 3243 2005-03-21 09:02:00Z edeutsch $
#
# Description : This program that allows users to
#               access expression ratios for one or more conditions
#
# SBEAMS is Copyright (C) 2000-2005 Institute for Systems Biology
# This program is governed by the terms of the GNU General Public License (GPL)
# version 2 as published by the Free Software Foundation.  It is provided
# WITHOUT ANY WARRANTY.  See the full description of GPL terms in the
# LICENSE file distributed with this software.
#
###############################################################################


###############################################################################
# Set up all needed modules and objects
###############################################################################
use strict;
use Getopt::Long;
use FindBin;

use lib "$FindBin::Bin/../../lib/perl";
use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username
             $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE
             $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME
             @MENU_OPTIONS);

use SBEAMS::Connection qw($q);
use SBEAMS::Connection::Settings;
use SBEAMS::Connection::Tables;

use SBEAMS::Inkjet;
use SBEAMS::Inkjet::Settings;
use SBEAMS::Inkjet::Tables;

$sbeams = new SBEAMS::Connection;
$sbeamsMOD = new SBEAMS::Inkjet;
$sbeamsMOD->setSBEAMS($sbeams);
$sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR);


#use CGI;
#$q = new CGI;


###############################################################################
# Set program name and usage banner for command like use
###############################################################################
$PROG_NAME = $FindBin::Script;
$USAGE = <<EOU;
Usage: $PROG_NAME [OPTIONS] key=value key=value ...
Options:
  --verbose n         Set verbosity level.  default is 0
  --quiet             Set flag to print nothing at all except errors
  --debug n           Set debug flag

 e.g.:  $PROG_NAME [OPTIONS] [keyword=value],...

EOU

#### Process options
unless (GetOptions(\%OPTIONS,"verbose:s","quiet","debug:s")) {
  print "$USAGE";
  exit;
}

$VERBOSE = $OPTIONS{"verbose"} || 0;
$QUIET = $OPTIONS{"quiet"} || 0;
$DEBUG = $OPTIONS{"debug"} || 0;
if ($DEBUG) {
  print "Options settings:\n";
  print "  VERBOSE = $VERBOSE\n";
  print "  QUIET = $QUIET\n";
  print "  DEBUG = $DEBUG\n";
}


###############################################################################
# Set Global Variables and execute main()
###############################################################################
main();
exit(0);


###############################################################################
# Main Program:
#
# Call $sbeams->Authenticate() and exit if it fails or continue if it works.
###############################################################################
sub main {

  #### Do the SBEAMS authentication and exit if a username is not returned
  exit unless ($current_username = $sbeams->Authenticate(
    permitted_work_groups_ref=>['Inkjet_user','Inkjet_admin','Admin'],
    #connect_read_only=>1,
    #allow_anonymous_access=>1,
  ));


  #### Read in the default input parameters
  my %parameters;
  my $n_params_found = $sbeams->parse_input_parameters(
    q=>$q,parameters_ref=>\%parameters);
  #$sbeams->printDebuggingInfo($q);


  #### Process generic "state" parameters before we start
  $sbeams->processStandardParameters(parameters_ref=>\%parameters);


  #### Decide what action to take based on information so far
  if (defined($parameters{action}) && $parameters{action} eq "???") {
    # Some action
  } else {
    $sbeamsMOD->printPageHeader();
    handle_request(ref_parameters=>\%parameters);
    $sbeamsMOD->printPageFooter();
  }


} # end main



###############################################################################
# Handle Request
###############################################################################
sub handle_request {
  my %args = @_;


  #### Process the arguments list
  my $ref_parameters = $args{'ref_parameters'}
    || die "ref_parameters not passed";
  my %parameters = %{$ref_parameters};


  #### Define some generic varibles
  my ($i,$element,$key,$value,$line,$result,$sql);


  #### Define some variables for a query and resultset
  my %resultset = ();
  my $resultset_ref = \%resultset;
  my (%url_cols,%hidden_cols,%max_widths,$show_sql);


  #### Read in the standard form values
  my $apply_action=$parameters{'action'} || $parameters{'apply_action'} || '';
  my $TABLE_NAME = $parameters{'QUERY_NAME'};


  #### Set some specific settings for this program
  my $CATEGORY="Get Expression Values";
  $TABLE_NAME="IJ_GetExpression" unless ($TABLE_NAME);
  ($PROGRAM_FILE_NAME) =
    $sbeamsMOD->returnTableInfo($TABLE_NAME,"PROGRAM_FILE_NAME");
  my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME";


  #### Get the columns and input types for this table/query
  my @columns = $sbeamsMOD->returnTableInfo($TABLE_NAME,"ordered_columns");
  my %input_types = 
    $sbeamsMOD->returnTableInfo($TABLE_NAME,"input_types");


  #### Read the input parameters for each column
  my $n_params_found = $sbeams->parse_input_parameters(
    q=>$q,parameters_ref=>\%parameters,
    columns_ref=>\@columns,input_types_ref=>\%input_types);


  #### If the apply action was to recall a previous resultset, do it
  my %rs_params = $sbeams->parseResultSetParams(q=>$q);
  if ($apply_action eq "VIEWRESULTSET") {
    $sbeams->readResultSet(
      resultset_file=>$rs_params{set_name},
      resultset_ref=>$resultset_ref,
      query_parameters_ref=>\%parameters,
      resultset_params_ref=>\%rs_params,
    );
    $n_params_found = 99;
  }


  #### Set some reasonable defaults if no parameters supplied
  unless ($n_params_found) {
    $parameters{input_form_format} = "minimum_detail";
  }


  #### Apply any parameter adjustment logic


  #### Display the user-interaction input form
  $sbeams->display_input_form(
    TABLE_NAME=>$TABLE_NAME,CATEGORY=>$CATEGORY,apply_action=>$apply_action,
    PROGRAM_FILE_NAME=>$PROGRAM_FILE_NAME,
    parameters_ref=>\%parameters,
    input_types_ref=>\%input_types,
  );


  #### Display the form action buttons
  $sbeams->display_form_buttons(TABLE_NAME=>$TABLE_NAME);


  #### Finish the upper part of the page and go begin the full-width
  #### data portion of the page
  $sbeams->display_page_footer(close_tables=>'YES',
    separator_bar=>'YES',display_footer=>'NO');



  #########################################################################
  #### Process all the constraints

  #### Build CONDITION constraint
  my $condition_clause = $sbeams->parseConstraint2SQL(
    constraint_column=>"C.condition_id",
    constraint_type=>"int_list",
    constraint_name=>"Condition List",
    constraint_value=>$parameters{condition_id} );
  return if ($condition_clause eq '-1');


  #### Build GENE NAME constraint
  my $gene_name_clause = $sbeams->parseConstraint2SQL(
    constraint_column=>"GE.gene_name",
    constraint_type=>"plain_text",
    constraint_name=>"Gene Name",
    constraint_value=>$parameters{gene_name_constraint} );
  return if ($gene_name_clause eq '-1');


  #### Build SECOND NAME constraint
  my $second_name_clause = $sbeams->parseConstraint2SQL(
    constraint_column=>"GE.second_name",
    constraint_type=>"plain_text",
    constraint_name=>"Second Name",
    constraint_value=>$parameters{second_name_constraint} );
  return if ($second_name_clause eq '-1');


  #### Build BIOSEQUENCE NAME constraint
  my $biosequence_name_clause = $sbeams->parseConstraint2SQL(
    constraint_column=>"BS.biosequence_name",
    constraint_type=>"plain_text",
    constraint_name=>"Biosequence Name",
    constraint_value=>$parameters{biosequence_name_constraint} );
  return if ($biosequence_name_clause eq '-1');


  #### Build BIOSEQUENCE DESCRIPTION constraint
  my $description_clause = $sbeams->parseConstraint2SQL(
    constraint_column=>"BS.biosequence_desc",
    constraint_type=>"plain_text",
    constraint_name=>"Biosequence Description",
    constraint_value=>$parameters{description_constraint} );
  return if ($description_clause eq '-1');


  #### Build LOG10 RATIO constraint
  my $log10_ratio_clause = $sbeams->parseConstraint2SQL(
    constraint_column=>"GE.log10_ratio",
    constraint_type=>"flexible_float",
    constraint_name=>"log10 Ratio",
    constraint_value=>$parameters{log10_ratio_constraint} );
  return if ($log10_ratio_clause eq '-1');


  #### Build P VALUE constraint
  my $p_value_clause = $sbeams->parseConstraint2SQL(
    constraint_column=>"GE.p_value",
    constraint_type=>"flexible_float",
    constraint_name=>"P Value",
    constraint_value=>$parameters{p_value_constraint} );
  return if ($p_value_clause eq '-1');


  #### Build Lambda constraint
  my $lambda_clause = $sbeams->parseConstraint2SQL(
    constraint_column=>"GE.lambda",
    constraint_type=>"flexible_float",
    constraint_name=>"Lambda",
    constraint_value=>$parameters{lambda_constraint} );
  return if ($lambda_clause eq '-1');


  #### Build SORT ORDER
  my $order_by_clause = "";
  if ($parameters{sort_order}) {
    if ($parameters{sort_order} =~ /SELECT|TRUNCATE|DROP|DELETE|FROM|GRANT/i) {
      print "<H4>Cannot parse Sort Order!  Check syntax.</H4>\n\n";
      return;
    } else {
      $order_by_clause = " ORDER BY $parameters{sort_order}";
    }
  }


  #### Build ROWCOUNT constraint
  $parameters{row_limit} = 10000
    unless ($parameters{row_limit} > 0 && $parameters{row_limit}<=1000000);
  my $limit_clause = $sbeams->buildLimitClause(
   row_limit=>$parameters{row_limit});


  #### Define some variables needed to build the query
  my $group_by_clause = "";
  my @column_array;


  #### Get some information about the conditions involved
  my %condition_names;
  %condition_names = getConditionNames($parameters{condition_id})
    if ($parameters{condition_id});
  my @condition_names_and_ids;


  #### Define the available data columns
  my %available_columns = (
    "GE.log10_ratio"=>["log10_ratio","GE.log10_ratio","log10 Ratio"],
    "GE.log10_uncertainty"=>["log10_uncertainty","GE.log10_uncertainty","log10 Uncertainty"],
    "GE.log10_std_deviation"=>["log10_std_deviation","GE.log10_std_deviation","log10 Std Dev"],
    "GE.p_value"=>["p_value","GE.p_value","P Value"],
    "GE.lambda"=>["lambda","GE.lambda","Lambda"],
    "GE.mu_x"=>["mu_x","GE.mu_x","mu_x"],
    "GE.mu_y"=>["mu_y","GE.mu_y","mu_y"],
    "GE.mean_intensity"=>["mean_intensity","GE.mean_intensity","Mean Level"],
    "GE.quality_flag"=>["quality_flag","GE.quality_flag","Quality Flag"],
  );


  #### If the user does not choose which data columns to show, set defaults
  my @additional_columns = ();
  my $display_columns = $parameters{display_columns};
  unless (defined($parameters{display_columns}) &&
          $parameters{display_columns}) {
    #### If this is a pivoted query, just choose four interesting columns
    if ($parameters{display_options} =~ /PivotConditions/) {
      $display_columns = "GE.log10_ratio,GE.lambda,GE.mu_x,GE.mu_y";
    #### Else, select them all
    } else {
      $display_columns = "GE.log10_ratio,GE.log10_uncertainty,".
        "GE.log10_std_deviation,GE.p_value,".
        "GE.lambda,GE.mu_x,GE.mu_y,GE.mean_intensity,GE.quality_flag";
    }
  }


  #### Add the desired display columns to the assitional_columns array
  my @tmp = split(",",$display_columns);

  #### If this is a pivot query, design the aggregate data columns
  if ($parameters{display_options} =~ /PivotConditions/) {
    my @condition_ids = split(/,/,$parameters{condition_id});
    my $counter = 1;
    foreach my $id (@condition_ids) {
      foreach my $option (@tmp) {
  	if (defined($available_columns{$option})) {
          my @elements = @{$available_columns{$option}};
          $elements[0] = $condition_names{$id}.'__'.$elements[0];
          $elements[1] = "MAX(CASE WHEN GE.condition_id = $id ".
            "THEN $elements[1] ELSE NULL END)";
          $elements[2] = $condition_names{$id}.' '.$elements[2];
  	  push(@additional_columns,\@elements);
  	}
      }
      $counter++;
    }

    $group_by_clause = "   GROUP BY BS.biosequence_name,".
      "GE.reporter_name,GE.common_name,GE.canonical_name,".
      "GE.external_identifier,".
      "GE.gene_name,GE.second_name,GE.full_name";


  #### Else, no pivot, just use the columns as is
  } else {
    foreach my $option (@tmp) {
      if (defined($available_columns{$option})) {
  	push(@additional_columns,$available_columns{$option});
      }
    }
  }


  #### Define the desired columns in the query
  #### [friendly name used in url_cols,SQL,displayed column title]
  my @column_array = (
    ["condition_name","C.condition_name","Condition"],
    ["biosequence_name","BS.biosequence_name","Biosequence Name"],
    ["reporter_name","GE.reporter_name","Reporter Name"],
    ["common_name","GE.common_name","Common Name"],
    ["canonical_name","GE.canonical_name","Canonical Name"],
    ["external_identifier","GE.external_identifier","External Identifier"],
    ["gene_name","GE.gene_name","Gene Name"],
    ["second_name","GE.second_name","Other Name"],
    @additional_columns,
    ["full_name","GE.full_name","Full Name"],
  );


  #### Hack to remove first column if GROUPing
  shift(@column_array) if ($parameters{display_options} =~ /PivotConditions/);


  #### Adjust the columns definition based on user-selected options
  if ( $parameters{display_options} =~ /BSDesc/ ) {
    push(@column_array,
      ["biosequence_desc","BS.biosequence_desc","Biosequence Description"],
    );
    $group_by_clause .= ",BS.biosequence_desc" if ($group_by_clause);
  }


  #### Set the show_sql flag if the user requested
  if ( $parameters{display_options} =~ /ShowSQL/ ) {
    $show_sql = 1;
  }


  #### Build the columns part of the SQL statement
  my %colnameidx = ();
  my @column_titles = ();
  my $columns_clause = $sbeams->build_SQL_columns_list(
    column_array_ref=>\@column_array,
    colnameidx_ref=>\%colnameidx,
    column_titles_ref=>\@column_titles
  );


  #### In some cases, we need to have a subselect clause
  my $subselect_clause = '';
  if ( $parameters{display_options} =~ /AllConditions/ ) {
    $subselect_clause = qq~
          AND GE.gene_name IN (
              SELECT DISTINCT GE.gene_name
 	        FROM $TBIJ_CONDITION C
	       INNER JOIN $TBIJ_GENE_EXPRESSION GE
                     ON ( C.condition_id = GE.condition_id )
	        LEFT JOIN $TBIJ_BIOSEQUENCE BS
                     ON ( GE.biosequence_id = BS.biosequence_id )
	       WHERE 1 = 1
    	      $condition_clause
	      $gene_name_clause
  	      $biosequence_name_clause
              $description_clause
   	      $second_name_clause
	      $log10_ratio_clause
	      $p_value_clause
	      $lambda_clause
             )
    ~;
    #### Remove contraints that might limit conditions
    $log10_ratio_clause = '';
    $p_value_clause = '';
    $lambda_clause = '';
  }


  #### Define the SQL statement
  $sql = qq~
	SELECT $limit_clause->{top_clause} $columns_clause
	  FROM $TBIJ_CONDITION C
	 INNER JOIN $TBIJ_GENE_EXPRESSION GE
               ON ( C.condition_id = GE.condition_id )
	  LEFT JOIN $TBIJ_BIOSEQUENCE BS
               ON ( GE.biosequence_id = BS.biosequence_id )
	 WHERE 1 = 1
	$condition_clause
	$gene_name_clause
	$second_name_clause
	$biosequence_name_clause
        $description_clause
	$log10_ratio_clause
        $p_value_clause
	$lambda_clause

        $subselect_clause

	$group_by_clause
	$order_by_clause
	$limit_clause->{trailing_limit_clause}
       ~;


  #### Certain types of actions should be passed to links
  my $pass_action = "QUERY";
  $pass_action = $apply_action if ($apply_action =~ /QUERY/i); 

  #### Define the hypertext links for columns that need them
  %url_cols = (
  );


  #### Define columns that should be hidden in the output table
  %hidden_cols = ('xxx' => 1,
  );


  #########################################################################
  #### If QUERY or VIEWRESULTSET was selected, display the data
  if ($apply_action =~ /QUERY/i || $apply_action eq "VIEWRESULTSET") {

    #### If the action contained QUERY, then fetch the results from
    #### the database
    if ($apply_action =~ /QUERY/i) {

      #### Show the SQL that will be or was executed
      $sbeams->display_sql(sql=>$sql) if ($show_sql);

      #### Fetch the results from the database server
      $sbeams->fetchResultSet(
        sql_query=>$sql,
        resultset_ref=>$resultset_ref,
      );

      #### Store the resultset and parameters to disk resultset cache
      $rs_params{set_name} = "SETME";
      $sbeams->writeResultSet(
        resultset_file_ref=>\$rs_params{set_name},
        resultset_ref=>$resultset_ref,
        query_parameters_ref=>\%parameters,
        resultset_params_ref=>\%rs_params,
        query_name=>"$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME",
      );
    }

    #### Display the resultset
    $sbeams->displayResultSet(
      resultset_ref=>$resultset_ref,
      query_parameters_ref=>\%parameters,
      rs_params_ref=>\%rs_params,
      url_cols_ref=>\%url_cols,
      hidden_cols_ref=>\%hidden_cols,
      max_widths=>\%max_widths,
      column_titles_ref=>\@column_titles,
      base_url=>$base_url,
    );


    #### Display the resultset controls
    $sbeams->displayResultSetControls(
      resultset_ref=>$resultset_ref,
      query_parameters_ref=>\%parameters,
      rs_params_ref=>\%rs_params,
      base_url=>$base_url,
    );


    #### Display a plot of data from the resultset
    $sbeams->displayResultSetPlot(
      rs_params_ref=>\%rs_params,
      resultset_ref=>$resultset_ref,
      query_parameters_ref=>\%parameters,
      column_titles_ref=>\@column_titles,
      base_url=>$base_url,
    );


  #### If QUERY was not selected, then tell the user to enter some parameters
  } else {
    if ($sbeams->invocation_mode() eq 'http') {
      print "<H4>Select parameters above and press QUERY</H4>\n";
    } else {
      print "You need to supply some parameters to contrain the query\n";
    }
  }


} # end handle_request



###############################################################################
# evalSQL
#
# Callback for translating Perl variables into their values,
# especially the global table variables to table names
###############################################################################
sub evalSQL {
  my $sql = shift;

  return eval "\"$sql\"";

} # end evalSQL


###############################################################################
# getConditionNames: return a hash of the conditions
#         names of the supplied list of id's.
#         This might need to be more complicated if condition names
#         are duplicated under different projects or such.
###############################################################################
sub getConditionNames {
  my $condition_ids = shift || die "getConditionNames: missing condition_ids";

  my @condition_ids = split(/,/,$condition_ids);

  #### Get the data for all the specified condition_ids
  my $sql = qq~
      SELECT condition_id,condition_name
        FROM $TBIJ_CONDITION
       WHERE condition_id IN ( $condition_ids )
  ~;
  my %hash = $sbeams->selectTwoColumnHash($sql);

  return %hash;

} # end getConditionNames



