#!/usr/local/bin/perl

###############################################################################
# Program     : GetAnnotations
# Author      : Eric Deutsch <edeutsch@systemsbiology.org>
# $Id: GetAnnotations 3628 2005-06-09 21:09:20Z mjohnson $
#
# Description : This CGI program that allows users to
#               browse through annotated proteins very simply
#
# SBEAMS is Copyright (C) 2000-2005 Institute for Systems Biology
# This program is governed by the terms of the GNU General Public License (GPL)
# version 2 as published by the Free Software Foundation.  It is provided
# WITHOUT ANY WARRANTY.  See the full description of GPL terms in the
# LICENSE file distributed with this software.
#
###############################################################################


###############################################################################
# Set up all needed modules and objects
###############################################################################
use strict;
use Getopt::Long;
use FindBin;

use lib "$FindBin::Bin/../../lib/perl";
use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username
             $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE
             $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME
             @MENU_OPTIONS);

use SBEAMS::Connection qw($q);
use SBEAMS::Connection::Settings;
use SBEAMS::Connection::Tables;

use SBEAMS::ProteinStructure;
use SBEAMS::ProteinStructure::Settings;
use SBEAMS::ProteinStructure::Tables;

$sbeams = new SBEAMS::Connection;
$sbeamsMOD = new SBEAMS::ProteinStructure;
$sbeamsMOD->setSBEAMS($sbeams);
$sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR);


#use CGI;
use CGI::Carp qw(fatalsToBrowser croak);
#$q = new CGI;


###############################################################################
# Set program name and usage banner for command like use
###############################################################################
$PROG_NAME = $FindBin::Script;
$USAGE = <<EOU;
Usage: $PROG_NAME [OPTIONS] key=value key=value ...
Options:
  --verbose n         Set verbosity level.  default is 0
  --quiet             Set flag to print nothing at all except errors
  --debug n           Set debug flag

 e.g.:  $PROG_NAME [OPTIONS] [keyword=value],...

EOU

#### Process options
unless (GetOptions(\%OPTIONS,"verbose:s","quiet","debug:s")) {
  print "$USAGE";
  exit;
}

$VERBOSE = $OPTIONS{"verbose"} || 0;
$QUIET = $OPTIONS{"quiet"} || 0;
$DEBUG = $OPTIONS{"debug"} || 0;
if ($DEBUG) {
  print "Options settings:\n";
  print "  VERBOSE = $VERBOSE\n";
  print "  QUIET = $QUIET\n";
  print "  DEBUG = $DEBUG\n";
}


###############################################################################
# Set Global Variables and execute main()
###############################################################################
main();
exit(0);


###############################################################################
# Main Program:
#
# Call $sbeams->Authenticate() and exit if it fails or continue if it works.
###############################################################################
sub main {

  #### Do the SBEAMS authentication and exit if a username is not returned
  exit unless ($current_username = $sbeams->Authenticate(
    permitted_work_groups_ref=>['ProteinStructure_user',
      'ProteinStructure_admin','ProteinStructure_readonly','Admin'],
    #connect_read_only=>1,
    #allow_anonymous_access=>1,
  ));


  #### Read in the default input parameters
  my %parameters;
  my $n_params_found = $sbeams->parse_input_parameters(
    q=>$q,parameters_ref=>\%parameters);
  #$sbeams->printDebuggingInfo($q);


  #### Process generic "state" parameters before we start
  $sbeams->processStandardParameters(parameters_ref=>\%parameters);


  #### Decide what action to take based on information so far
  if ($parameters{action} eq "xxxx") {
  } else {
    $sbeamsMOD->display_page_header(
      navigation_bar=>$parameters{navigation_bar});
    handle_request(ref_parameters=>\%parameters);
    $sbeamsMOD->display_page_footer();
  }


} # end main



###############################################################################
# Handle Request
###############################################################################
sub handle_request {
  my %args = @_;


  #### Process the arguments list
  my $ref_parameters = $args{'ref_parameters'}
    || die "ref_parameters not passed";
  my %parameters = %{$ref_parameters};


  #### Define some generic varibles
  my ($i,$element,$key,$value,$line,$result,$sql);


  #### Define some variables for a query and resultset
  my %resultset = ();
  my $resultset_ref = \%resultset;
  my (%url_cols,%hidden_cols,%max_widths,$show_sql);


  #### Read in the standard form values
  my $apply_action  = $parameters{'action'} || $parameters{'apply_action'};
  my $TABLE_NAME = $parameters{'QUERY_NAME'};


  #### Set some specific settings for this program
  my $PROGRAM_FILE_NAME="GetAnnotations";
  my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME";


  #### Get the columns and input types for this table/query
  my @columns = ( 'search_scope','search_key' );
  my %input_types = ( 'optionlist','text' );


  #### Read the input parameters for each column
  my $n_params_found = $sbeams->parse_input_parameters(
    q=>$q,parameters_ref=>\%parameters,
    columns_ref=>\@columns,input_types_ref=>\%input_types);


  #### If the apply action was to recall a previous resultset, do it
  my %rs_params = $sbeams->parseResultSetParams(q=>$q);
  if ($apply_action eq "VIEWRESULTSET") {
    $sbeams->readResultSet(resultset_file=>$rs_params{set_name},
        resultset_ref=>$resultset_ref,query_parameters_ref=>\%parameters);
    $n_params_found = 99;
  }


  #### Set some reasonable defaults if no parameters supplied
  unless ($n_params_found) {
  }


  #### Apply any parameter adjustment logic
  #$parameters{display_options} = 'ShowSQL';


  #### Display the user-interaction input form
  if ($sbeams->output_mode() eq 'html') {
    my @options = ( 'All','GeneSymbol','ORFName','FullGeneName','ECNumbers' );
    my %options = ( 'GeneSymbol' => 'Gene Symbol',
		    'ORFName' => 'ORF Name',
		    'FullGeneName' => 'Full Gene Name',
		    'ECNumbers' => 'EC Number',
		    'All' => 'All Attributes',
		  );

    #### Build the option list
    my $optionlist = '';
    foreach my $key ( @options ) {
      my $flag = '';
      $flag = 'SELECTED' if ($parameters{search_scope} eq $key);
      $optionlist .= "<OPTION VALUE=\"$key\" $flag>$options{$key}</OPTION>\n";
    };

    print qq~
<tr><td><span class="sub_header">BROWSE THE PROTEOME</span></td></tr>

<tr><td height="1"><img src="$HTML_BASE_DIR/images/bg_Nav.gif" width="518" height="1" border="0"></td></tr>
<tr><td height="5"><img src="$HTML_BASE_DIR/images/clear.gif" width="1" height="1" border="0"></td></tr>

<tr><td>
<P><FORM ACTION="$base_url" METHOD="POST">
Search
<SELECT NAME="search_scope">
$optionlist
</SELECT> for
<INPUT NAME="search_key" TYPE="text" SIZE="35" VALUE="$parameters{search_key}">
<INPUT NAME="biosequence_set_id" TYPE="hidden" VALUE="$parameters{biosequence_set_id}">
<INPUT TYPE="submit" NAME="action" VALUE="GO">
</FORM></P>
    ~;
  }


  #### Finish the upper part of the page and go begin the full-width
  #### data portion of the page
#  $sbeams->display_page_footer(close_tables=>'YES',
#    separator_bar=>'YES',display_footer=>'NO')
#    unless ( $parameters{display_options} =~ /SequenceFormat/ &&
#      $apply_action =~ /HIDE/ );



  #########################################################################
  #### Process all the constraints

  #### Build BIOSEQUENCE_SET constraint
  my $form_test = $sbeams->parseConstraint2SQL(
    constraint_column=>"BS.biosequence_set_id",
    constraint_type=>"int_list",
    constraint_name=>"BioSequence Set",
    constraint_value=>$parameters{biosequence_set_id} );
  return if ($form_test eq '-1');

  #### Verify that the selected biosequence_sets are permitted
  if ($parameters{biosequence_set_id}) {
    my $sql = qq~
      SELECT biosequence_set_id,project_id
	FROM $TBPS_BIOSEQUENCE_SET
       WHERE biosequence_set_id IN ( $parameters{biosequence_set_id} )
	 AND record_status != 'D'
    ~;
    my %project_ids = $sbeams->selectTwoColumnHash($sql);
    my @accessible_project_ids = $sbeams->getAccessibleProjects();
    my %accessible_project_ids;
    foreach my $id ( @accessible_project_ids ) {
      $accessible_project_ids{$id} = 1;
    }

    my @input_ids = split(',',$parameters{biosequence_set_id});
    my @verified_ids;
    foreach my $id ( @input_ids ) {

      #### If the requested biosequence_set_id doesn't exist
      if (! defined($project_ids{$id})) {
	$sbeams->reportException(
          state => 'ERROR',
          type => 'BAD CONSTRAINT',
          message => "Non-existent biosequence_set_id = $id specified",
        );

      #### If the project for this biosequence_set is not accessible
      } elsif (! defined($accessible_project_ids{$project_ids{$id}})) {
	$sbeams->reportException(
          state => 'ERROR',
          type => 'PERMISSION DENIED',
          message => "Your current privilege settings do not allow you to access biosequence_set_id = $id.  See project owner to gain permission.",
        );

      #### Else, let it through
      } else {
	push(@verified_ids,$id);
      }

    }

    #### Set the input constraint to only allow that which is valid
    $parameters{biosequence_set_id} = join(',',@verified_ids);

  }

  #### If no valid biosequence_set_id was selected, stop here
  unless ($parameters{biosequence_set_id}) {
    $sbeams->reportException(
      state => 'ERROR',
      type => 'INSUFFICIENT CONSTRAINTS',
      message => "You must select at least one valid Biosequence Set",
    );
    return;
  }

  #### Build BIOSEQUENCE_SET constraint
  my $biosequence_set_clause = $sbeams->parseConstraint2SQL(
    constraint_column=>"BS.biosequence_set_id",
    constraint_type=>"int_list",
    constraint_name=>"BioSequence Set",
    constraint_value=>$parameters{biosequence_set_id} );
  return if ($biosequence_set_clause eq '-1');


  #### Build SEARCH SCOPE constraint
  my $search_scope_clause = $sbeams->parseConstraint2SQL(
    constraint_column=>"BS.search_scope",
    constraint_type=>"plain_text",
    constraint_name=>"Search Scope",
    constraint_value=>$parameters{search_scope},
  );
  return if ($search_scope_clause eq '-1');


  #### Build SEARCH KEY constraint
  my $search_key_clause = $sbeams->parseConstraint2SQL(
    constraint_column=>"BS.search_key",
    constraint_type=>"plain_text",
    constraint_name=>"Search Key",
    constraint_value=>$parameters{search_key},
  );
  return if ($search_scope_clause eq '-1');


  #### Build ORF NAME constraint
  my $orf_name_clause = '';
  if ($parameters{search_scope} =~ /(ORFName|All)/) {
    $orf_name_clause = $sbeams->parseConstraint2SQL(
      constraint_column=>"BS.biosequence_name",
      constraint_type=>"plain_text",
      constraint_name=>"ORF Name",
      constraint_value=>$parameters{search_key},
    );
  }
  return if ($orf_name_clause eq '-1');
  $orf_name_clause =~ s/AND/ OR/;


  #### Build GENE SYMBOL constraint
  my $gene_symbol_clause = '';
  if ($parameters{search_scope} =~ /(GeneSymbol|All)/) {
    my $search_key = $parameters{search_key};
    if (defined($search_key) &&
	$search_key gt '' &&
	$search_key !~ /[%_]/) {
      $search_key = "$search_key\%";
    };
    $gene_symbol_clause = $sbeams->parseConstraint2SQL(
      constraint_column=>"BSA.gene_symbol",
      constraint_type=>"plain_text",
      constraint_name=>"Gene Symbol",
      constraint_value=>$search_key,
    );
  }
  return if ($gene_symbol_clause eq '-1');
  $gene_symbol_clause =~ s/AND/ OR/;


  #### Build EC NUMBER constraint
  my $ec_number_clause = '';
  if ($parameters{search_scope} =~ /(ECNumbers|ECNumbers_exact|All)/) {
    my $search_key = $parameters{search_key};
    if (defined($search_key) &&
    	$search_key gt '' &&
    	$search_key !~ /[%_]/ &&
        $parameters{search_scope} !~ /ECNumbers_exact/) {
      $search_key = "\%$search_key\%";
    };
    $ec_number_clause = $sbeams->parseConstraint2SQL(
      constraint_column=>"BSA.EC_numbers",
      constraint_type=>"plain_text",
      constraint_name=>"EC Number",
      constraint_value=>$search_key,
    );
  }
  return if ($ec_number_clause eq '-1');
  $ec_number_clause =~ s/AND/ OR/;


  #### Build FULL GENE NAME constraint
  my $full_gene_name_clause = '';
  if ($parameters{search_scope} =~ /(FullGeneName|All)/) {
    my $search_key = $parameters{search_key};
    if (defined($search_key) &&
	$search_key gt '' &&
	$search_key !~ /[%_]/) {
      $search_key = "\%$search_key\%";
    };
    $full_gene_name_clause = $sbeams->parseConstraint2SQL(
      constraint_column=>"BSA.functional_description",
      constraint_type=>"plain_text",
      constraint_name=>"Full Gene Name",
      constraint_value=>$search_key,
    );
  }
  return if ($full_gene_name_clause eq '-1');
  $full_gene_name_clause =~ s/AND/ OR/;


  #### Sepcial handling for scope of 'All'
  if ($parameters{search_scope} =~ /All/) {
    my $result = searchExternal(
      query_parameters_ref => \%parameters,
    );
    if ($result) {
      $orf_name_clause = $sbeams->parseConstraint2SQL(
        constraint_column=>"BS.biosequence_name",
        constraint_type=>"plain_text",
        constraint_name=>"ORF Name",
        constraint_value=>$result,
      );
      $gene_symbol_clause = '';
      $ec_number_clause = '';
      $full_gene_name_clause = '';
      $orf_name_clause =~ s/AND/ OR/;
    }

  }


  #### No LIMITs
  my $limit_clause = '';


  #### Define some variables needed to build the query
  my $group_by_clause = "";
  my $final_group_by_clause = "";
  my @column_array;
  my $peptide_column = "";
  my $count_column = "";


  #### Define the desired columns in the query
  #### [friendly name used in url_cols references,SQL,displayed column title]
  my @column_array;
  if ($parameters{biosequence_set_id} == 2 ||
	  $parameters{biosequence_set_id} == 5) {
	@column_array = (
      ["biosequence_name","BS.biosequence_name","ORF Name"],
      ["gene_symbol","BSA.gene_symbol","Gene Symbol"],
      ["protein_EC_numbers","BSA.EC_numbers","EC Numbers"],
      ["functional_description","BSA.functional_description","Gene Name/Function"],
      ["domain_hits","' [View Domain Hits] '","Links"],

      ["project_id","BSS.project_id","project_id"],
      ["biosequence_set_id","BS.biosequence_set_id","biosequence_set_id"],
      ["biosequence_id","BS.biosequence_id","biosequence_id"],
      ["biosequence_annotation_id","BSA.biosequence_annotation_id","biosequence_annotation_id"],
      ["accessor","DBX.accessor","accessor"],
	  ["biosequence_accession","BS.biosequence_accession","biosequence_accession"],
	);
  }else {
	@column_array = (
      ["biosequence_name","BS.biosequence_name","ORF Name"],
      ["gene_symbol","BSA.gene_symbol","Gene Symbol"],
      ["protein_EC_numbers","BSA.EC_numbers","EC Numbers"],
      ["full_gene_name","BSA.full_gene_name","Gene Name/Function"],
      ["domain_hits","' [View Domain Hits] '","Links"],

      ["project_id","BSS.project_id","project_id"],
      ["biosequence_set_id","BS.biosequence_set_id","biosequence_set_id"],
      ["biosequence_id","BS.biosequence_id","biosequence_id"],
      ["biosequence_annotation_id","BSA.biosequence_annotation_id","biosequence_annotation_id"],
      ["accessor","DBX.accessor","accessor"],
	  ["biosequence_accession","BS.biosequence_accession","biosequence_accession"],
	);
  }

  #### Adjust the columns definition based on user-selected options
  if ( $parameters{display_options} =~ /ShowSQL/ ) {
    $show_sql = 1;
  }


  #### Build the columns part of the SQL statement
  my %colnameidx = ();
  my @column_titles = ();
  my $columns_clause = $sbeams->build_SQL_columns_list(
    column_array_ref=>\@column_array,
    colnameidx_ref=>\%colnameidx,
    column_titles_ref=>\@column_titles
  );

  #### Define the SQL statement
  $sql = qq~
      SELECT $limit_clause $columns_clause
        FROM $TBPS_BIOSEQUENCE BS
        LEFT JOIN $TBPS_BIOSEQUENCE_SET BSS
             ON ( BS.biosequence_set_id = BSS.biosequence_set_id )
        LEFT JOIN $TB_DBXREF DBX ON ( BS.dbxref_id = DBX.dbxref_id )
        LEFT JOIN $TBPS_BIOSEQUENCE_ANNOTATION BSA
	     ON ( BS.biosequence_id = BSA.biosequence_id )
       WHERE 1 = 1
      $biosequence_set_clause
         AND ( 0 = 1
           $orf_name_clause
           $gene_symbol_clause
           $ec_number_clause
           $full_gene_name_clause
             )
      ORDER BY BS.biosequence_accession
   ~;


  #### Certain types of actions should be passed to links
  my $pass_action = "QUERY";
  $pass_action = $apply_action if ($apply_action =~ /QUERY/i);


  #### Define the hypertext links for columns that need them
  %url_cols = ('ORF Name' => "$CGI_BASE_DIR/$SBEAMS_SUBDIR/BrowseBioSequence.cgi?project_id=\%$colnameidx{project_id}V&biosequence_set_id=\%$colnameidx{biosequence_set_id}V&biosequence_accession_constraint=\%V&action=QUERYHIDE",
	       'ORF Name_ATAG' => 'TARGET="Win1"',

               'Gene Name/Function' => "$CGI_BASE_DIR/$SBEAMS_PART/ManageTable.cgi?TABLE_NAME=PS_biosequence_annotation&biosequence_annotation_id=\%$colnameidx{biosequence_annotation_id}V&biosequence_id=\%$colnameidx{biosequence_id}V&ShowEntryForm=1",
	       'Gene Name/Function_ATAG' => 'TARGET="Win1"',

               'Gene Symbol' => "$CGI_BASE_DIR/$SBEAMS_PART/ManageTable.cgi?TABLE_NAME=PS_biosequence_annotation&biosequence_annotation_id=\%$colnameidx{biosequence_annotation_id}V",
	       'Gene Symbol_ATAG' => 'TARGET="Win1"',

	       'Links' => "$CGI_BASE_DIR/$SBEAMS_SUBDIR/GetDomainHits?project_id=\%$colnameidx{project_id}V&biosequence_set_id=\%$colnameidx{biosequence_set_id}V&biosequence_accession_constraint=\%$colnameidx{biosequence_accession}V&action=QUERYHIDE&display_options=ApplyChilliFilter,ShowExtraProteinProps",
	       'Links_ATAG' => 'TARGET="Win1"',
               'Links_ISNULL' => ' [View Domain Hits] ',

    );


  #### Define columns that should be hidden in the output table
  %hidden_cols = ('project_id' => 1,
                  'biosequence_set_id' => 1,
                  'biosequence_id' => 1,
                  'biosequence_annotation_id' => 1,
                  'accessor' => 1,
                  'accessor_suffix' => 1,
				  'biosequence_accession' => 1,
   );



  #########################################################################
  #### If QUERY or VIEWRESULTSET was selected, display the data
  $apply_action = 'GO' unless ($apply_action);
  if ($apply_action =~ /(QUERY|GO|VIEWRESULTSET)/) {

    #### Show the SQL that will be or was executed
    $sbeams->display_sql(sql=>$sql) if ($show_sql);

    #### If the action contained QUERY, then fetch the results from
    #### the database
    if ($apply_action =~ /(QUERY|GO)/i) {

      #### Fetch the results from the database server
      $sbeams->fetchResultSet(sql_query=>$sql,
        resultset_ref=>$resultset_ref);

      #### Store the resultset and parameters to disk resultset cache
      $rs_params{set_name} = "SETME";
      $sbeams->writeResultSet(resultset_file_ref=>\$rs_params{set_name},
        resultset_ref=>$resultset_ref,query_parameters_ref=>\%parameters);
    }


    #### Display the result
    if ( 1 == 1) {

      if ($sbeams->output_mode() eq 'html') {
	print qq~<TABLE WIDTH="500"><TR><TD>
          <font color="red">NOTE</font>:
          Since you searched "All Attributes",
          some proteins in this resultset may match the search constraint
          in fields not displayed.  To see the complete domain search
          information, click on [View Domain Hits].</TD></TR></TABLE><BR>
        ~ if ($parameters{search_scope} =~ /All/);
      }

      $sbeams->displayResultSet(rs_params_ref=>\%rs_params,
  	  url_cols_ref=>\%url_cols,hidden_cols_ref=>\%hidden_cols,
  	  max_widths=>\%max_widths,resultset_ref=>$resultset_ref,
  	  column_titles_ref=>\@column_titles,
          base_url=>$base_url,query_parameters_ref=>\%parameters,
      );

      #### Display the resultset controls
      $sbeams->displayResultSetControls(rs_params_ref=>\%rs_params,
        resultset_ref=>$resultset_ref,query_parameters_ref=>\%parameters,
        base_url=>$base_url
      );

    }



  #### If QUERY was not selected, then tell the user to enter some parameters
  } else {
    if ($sbeams->invocation_mode() eq 'http') {
      print "<H4>Select parameters above and press QUERY</H4>\n";
    } else {
      print "You need to supply some parameters to contrain the query\n";
    }
  }


} # end handle_request



###############################################################################
# evalSQL: Callback for translating global table variables to names
###############################################################################
sub evalSQL {
  my $sql = shift;

  return eval "\"$sql\"";

} # end evalSQL



################################################################################
# searchExternal: A method to search an external file for any matching info
###############################################################################
sub searchExternal {
  my %args = @_;

  #### Process the arguments list
  my $query_parameters_ref = $args{'query_parameters_ref'};

  #### Determine which external data source to search
  my %abbreviations = (
    '3' => 'Hm', # Hm
    '2' => 'Halo', # Halobac
  );

  my $abbrevation = $abbreviations{$query_parameters_ref->{biosequence_set_id}};
  unless ($abbrevation) {
    print "ERROR: Unable to find a abbrevation for this dataset<BR>\n";
    return 0;
  }


  #### Search both the DomainHits and Biosequences files
  my %biosequence_accessions;
  foreach my $filetype ( qw (DomainHits Biosequences) ) {

    #### Open the file
    my $file = "${abbrevation}_$filetype.tsv";
    my $fullfile = "/net/dblocal/www/html/sbeams/var/$SBEAMS_SUBDIR/$file";
    open(INFILE,$fullfile) || die("ERROR: Unable to open $fullfile");

    #### Parse header line
    my $line = <INFILE>;
    $line =~ s/[\r\n]//g;

    my @column_list = split("\t",$line);

    #### Convert the array into a hash of names to column numbers
    my $i = 0;
    my %column_hash;
    foreach my $element (@column_list) {
      $column_hash{$element} = $i;
      $i++;
    }

    my $col = $column_hash{'biosequence_name'};

    unless ($col) {
      print "ERROR: Could not find column 'biosequence_accession'<BR>";
      $col = 0;
    }

    #### Get the search_spec
    my $search_spec = $query_parameters_ref->{search_key};
    $search_spec =~ s/\./\\./g;

    my @specs = split(/\s+/,$search_spec);

    #### Search through the file looking for matches
    while ($line = <INFILE>) {
      $line =~ s/[\r\n]//g;
      my $match = 1;
      foreach my $spec (@specs) {
	if ($line !~ /$spec/i) {
	  $match = 0;
	  last;
	}
      }

      #### If there was a match, save this accession
      if ($match) {
	my @columns = split("\t",$line);
	$biosequence_accessions{$columns[$col]}++;
      }
    }

    close(INFILE);

  }

  #print join(";",keys(%biosequence_accessions)),"\n";

  return join(";",keys(%biosequence_accessions));

} # end searchExternal


