#!/usr/local/bin/perl

###############################################################################
# Program     : GetProtein
# $Id: GetProtein 4168 2005-11-30 01:17:57Z edeutsch $
#
# Description : Prints summary of a given protein given selection
#               atlas build and protein name.
#
# SBEAMS is Copyright (C) 2000-2005 Institute for Systems Biology
# This program is governed by the terms of the GNU General Public License (GPL)
# version 2 as published by the Free Software Foundation.  It is provided
# WITHOUT ANY WARRANTY.  See the full description of GPL terms in the
# LICENSE file distributed with this software.
#
###############################################################################


###############################################################################
# Set up all needed modules and objects
###############################################################################
use strict;
use Getopt::Long;
use FindBin;

use lib "$FindBin::Bin/../../lib/perl";
use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username
             $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE
             $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME
             @MENU_OPTIONS);

use SBEAMS::Connection qw($q);
use SBEAMS::Connection::Settings;
use SBEAMS::Connection::Tables;
use SBEAMS::Connection::TabMenu;

use SBEAMS::PeptideAtlas;
use SBEAMS::PeptideAtlas::Settings;
use SBEAMS::PeptideAtlas::Tables;

$sbeams = new SBEAMS::Connection;
$sbeamsMOD = new SBEAMS::PeptideAtlas;
$sbeamsMOD->setSBEAMS($sbeams);
$sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR);

use SBEAMS::PeptideAtlas::KeySearch;
my $keySearch = new SBEAMS::PeptideAtlas::KeySearch;
$keySearch->setSBEAMS($sbeams);


###############################################################################
# Set program name and usage banner for command like use
###############################################################################
$PROG_NAME = $FindBin::Script;
$USAGE = <<EOU;
Usage: $PROG_NAME [OPTIONS] key=value key=value ...
Options:
  --verbose n         Set verbosity level.  default is 0
  --quiet             Set flag to print nothing at all except errors
  --debug n           Set debug flag

 e.g.:  $PROG_NAME [OPTIONS] [keyword=value],...

EOU

#### Process options
unless (GetOptions(\%OPTIONS,"verbose:s","quiet","debug:s")) {
  print "$USAGE";
  exit;
}

$VERBOSE = $OPTIONS{"verbose"} || 0;
$QUIET = $OPTIONS{"quiet"} || 0;
$DEBUG = $OPTIONS{"debug"} || 0;

if ($DEBUG) {
  print "Options settings:\n";
  print "  VERBOSE = $VERBOSE\n";
  print "  QUIET = $QUIET\n";
  print "  DEBUG = $DEBUG\n";
}



###############################################################################
# Set Global Variables and execute main()
###############################################################################
main();
exit(0);



###############################################################################
# Main Program:
#
# Call $sbeams->Authenticate() and exit if it fails or continue if it works.
###############################################################################
sub main {

  #### Do the SBEAMS authentication and exit if a username is not returned
  exit unless ($current_username = $sbeams->Authenticate(
      permitted_work_groups_ref=>['PeptideAtlas_user','PeptideAtlas_admin',
      'PeptideAtlas_readonly'],
      #connect_read_only=>1,
      allow_anonymous_access=>1,
  ));


  #### Read in the default input parameters
  my %parameters;
  my $n_params_found = $sbeams->parse_input_parameters(
      q=>$q,
      parameters_ref=>\%parameters);
  #$sbeams->printDebuggingInfo($q);

  #### Process generic "state" parameters before we start
  $sbeams->processStandardParameters(parameters_ref=>\%parameters);

  #### Decide what action to take based on information so far
  if ($parameters{action} eq "???") {
      # Some action

  } else {
      my $project_id = $sbeamsMOD->getProjectID(
          atlas_build_id => $parameters{atlas_build_id}
          );
      $sbeamsMOD->display_page_header(project_id => $project_id);
      handle_request(ref_parameters=>\%parameters);
      $sbeamsMOD->display_page_footer();
  }


} # end main



###############################################################################
# Handle Request
###############################################################################
sub handle_request {

  my %args = @_;

  #### Process the arguments list
  my $ref_parameters = $args{'ref_parameters'}
      || die "ref_parameters not passed";

  my %parameters = %{$ref_parameters};


  #### Show current user context information
  print "<BR>\n" if ($sbeams->output_mode() eq 'html');
  #$sbeams->printUserContext();

  #### Get the HTML to display the tabs
  my $tabMenu = $sbeamsMOD->getTabMenu(
    parameters_ref => \%parameters,
    program_name => $PROG_NAME,
  );
  print $tabMenu->asHTML() if ($sbeams->output_mode() eq 'html');


  #### Get the current atlas_build_id based on parameters or session
  my $atlas_build_id = $sbeamsMOD->getCurrentAtlasBuildID(
    parameters_ref => \%parameters,
  );
  if (defined($atlas_build_id) && $atlas_build_id < 0) {
    return;
  }


  #### Get the search keyword
  my $protein_name = $parameters{"protein_name"};

  #### If a new protein_name was supplied, store it
  if ($protein_name) {
    $sbeams->setSessionAttribute(
      key => 'PeptideAtlas_protein_name',
      value => $protein_name,
    );

  #### Else see if we had one stored
  } else {
    $protein_name = $sbeams->getSessionAttribute(
      key => 'PeptideAtlas_protein_name',
    );
    if ($protein_name) {
      $parameters{'apply_action'} = 'GO';
    }
  }


  #### Define some variables for a query and resultset
  my %resultset = ();
  my $resultset_ref = \%resultset;
  my (%url_cols,%hidden_cols,%max_widths,$show_sql);


  #### Read in the standard form values
  my $apply_action  = $parameters{'action'} || $parameters{'apply_action'};
  my $TABLE_NAME = $parameters{'QUERY_NAME'};


  #### Set some specific settings for this program
  my $CATEGORY="Get Protein";
  my $PROGRAM_FILE_NAME = $PROG_NAME;
  my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME";
  my $help_url = "$CGI_BASE_DIR/help_popup.cgi";


  #### If the apply action was to recall a previous resultset, do it
  my %rs_params = $sbeams->parseResultSetParams('q' => $q);
  my $n_params_found = 0;

  if ($apply_action eq "VIEWRESULTSET") {
    $sbeams->readResultSet(
      resultset_file=>$rs_params{set_name},
      resultset_ref=>$resultset_ref,
      query_parameters_ref=>\%parameters,
      resultset_params_ref=>\%rs_params,
    );
    $n_params_found = 99;
  }

  #### Get a list of accessible project_ids
  my @accessible_project_ids = $sbeams->getAccessibleProjects();
  my $accessible_project_ids = join( ",", @accessible_project_ids ) || '0';


  #### Get a hash of available atlas builds
  my $sql = qq~
      SELECT atlas_build_id,atlas_build_name
      FROM $TBAT_ATLAS_BUILD
      WHERE project_id IN ( $accessible_project_ids )
      AND record_status!='D'
  ~;
  my %atlas_build_names = $sbeams->selectTwoColumnHash($sql);


  #### Get a list of id's sorted by name
  my $sql = qq~
      SELECT atlas_build_id,atlas_build_name
      FROM $TBAT_ATLAS_BUILD
      WHERE project_id IN ( $accessible_project_ids )
      AND record_status!='D'
      ORDER BY atlas_build_name
  ~;
  my @ordered_atlas_build_ids = $sbeams->selectOneColumn($sql);


  #### If the output_mode is HTML, then display the form
  if ($sbeams->output_mode() eq 'html') {

      print qq~
        <script LANGUAGE="Javascript">
          function switchAtlasBuild() {
            document.SearchForm.apply_action.value = "GO";
            document.SearchForm.submit();
          }
        </script>
      ~;

      print "<P>";

      print "<nobr>";

      print $q->start_form(-method=>"POST",
                           -action=>"$base_url",
			   -name=>"SearchForm",
                          );

      print "PeptideAtlas Build: ";

      print $q->popup_menu(-name => "atlas_build_id",
                           -values => [ @ordered_atlas_build_ids ],
                           -labels => \%atlas_build_names,
                           -default => $atlas_build_id,
                           -onChange => 'switchAtlasBuild()',
			  );

      #print "&nbsp;&nbsp;&nbsp;";
      print "<BR>";

      print "Protein Name: ";

      print $q->textfield( "protein_name", $protein_name);
      print $q->hidden( "apply_action", '');

      print "&nbsp;&nbsp;&nbsp;";

      print $q->submit(-name => "action",
                       -value => 'QUERY',
                       -label => 'GO');
      print $q->endform;

      print "</nobr>";

      print "</P>";
  }



  #########################################################################
  #### Process all the constraints

  #### If atlas_build_id was not selected, stop here
  unless ($atlas_build_id) {
    if ($sbeams->output_mode() eq 'html') {
      print "Please select an atlas build, type in a protein name ".
        "(e.g. ENSP00000290230 for human Ensembl protein), and ".
	"click GO";
    } else {
      $sbeams->reportException(
          state => 'ERROR',
          type => 'INSUFFICIENT CONSTRAINTS',
          message => 'You must provide an atlas_build_id',
      );
    }
    return;
  }


  #### If biosequence_name was not selected, stop here
  unless ($protein_name) {
    if ($sbeams->output_mode() eq 'html') {
      print "Please type in a protein name ".
        "(e.g. ENSP00000290230 for human Ensembl protein), and ".
	"click GO";
    } else {
      $sbeams->reportException(
          state => 'ERROR',
          type => 'INSUFFICIENT CONSTRAINTS',
          message => 'You must provide a protein_name',
      );
    }
    return;
  }



  #### Build ATLAS_BUILD constraint
  my $atlas_build_clause = $sbeams->parseConstraint2SQL(
      constraint_column=>"AB.atlas_build_id",
      constraint_type=>"int_list",
      constraint_name=>"Atlas Build",
      constraint_value=>$atlas_build_id );
  return if ($atlas_build_clause eq '-1');


  #### Build PROTEIN_NAME constraint
  my $biosequence_name_clause = $sbeams->parseConstraint2SQL(
      constraint_column=>"BS.biosequence_name",
      constraint_type=>"plain_text",
      constraint_name=>"Protein Name",
      constraint_value=>$protein_name );
  return if ($biosequence_name_clause eq '-1');


  #### Define the SQL statement to find the biosequence
  $sql = qq~
       SELECT BS.biosequence_id, BS.biosequence_name
         FROM $TBAT_ATLAS_BUILD AB
        INNER JOIN $TBAT_BIOSEQUENCE_SET BSS
              ON ( AB.biosequence_set_id = BSS.biosequence_set_id )
        INNER JOIN $TBAT_BIOSEQUENCE BS
              ON ( BSS.biosequence_set_id = BS.biosequence_set_id )
       WHERE 1 = 1
          $atlas_build_clause
          $biosequence_name_clause
  ~;


  my %biosequences = $sbeams->selectTwoColumnHash($sql);
  my $n_biosequences = scalar(keys(%biosequences));


  #### If the protein was not found, report the problem
  if ($n_biosequences == 0) {
    if ($sbeams->output_mode() eq 'html') {
      print qq~
        The protein entered '$protein_name' was not found in the
        atlas build '$atlas_build_names{$atlas_build_id}'. Please check
        that the correct build was selected and that the protein name
        was correctly specified.
      ~;
    } else {
      $sbeams->reportException(
          state => 'ERROR',
          type => 'RECORD NOT FOUND',
          message => "The protein entered '$protein_name' was not found ".
            "in the atlas build '$atlas_build_names{$atlas_build_id}'.",
      );
    }
    return;
  }


  #### If more than one protein was found, list them
  if ($n_biosequences > 1) {
    if ($sbeams->output_mode() eq 'html') {
      print qq~
        Several ($n_biosequences) entries were returned from your request
        for '$protein_name' in the
        atlas build '$atlas_build_names{$atlas_build_id}'. Please select
        the appropriate one:<BR><BR>
      ~;
      while (my ($key,$value) = each (%biosequences)) {
	print "$value<BR>";
      }

    } else {
      $sbeams->reportException(
          state => 'ERROR',
          type => 'TOO MANY RECORDS',
          message => qq~More than one ($n_biosequences) was returned from 
             your request
             for '$protein_name' in the
             atlas build '$atlas_build_names{$atlas_build_id}'.~,
      );
    }
    return;
  }


  #### Extract the one biosequence_id
  my ($biosequence_id) = keys(%biosequences);


  #### Return some information about this biosequence
  $sql = qq~
       SELECT BS.biosequence_id, BS.biosequence_name,BS.biosequence_gene_name,
              BS.biosequence_accession, BS.biosequence_desc,
              BS.biosequence_seq,BSS.set_name,
              DBX.accessor,DBX.accessor_suffix
         FROM $TBAT_BIOSEQUENCE_SET BSS
        INNER JOIN $TBAT_BIOSEQUENCE BS
              ON ( BSS.biosequence_set_id = BS.biosequence_set_id )
         LEFT JOIN $TBAT_DBXREF DBX
              ON ( BS.dbxref_id = DBX.dbxref_id )
       WHERE BS.biosequence_id = $biosequence_id
  ~;
  my @rows = $sbeams->selectHashArray($sql);
  my $biosequence = $rows[0];


  #### Print out a summary of the protein
  if ($sbeams->output_mode() eq 'html') {
    my $section_header = $sbeamsMOD->encodeSectionHeader(
      text=>$biosequence->{biosequence_name},
    );

    print qq~
      <TABLE width="600">
        $section_header
    ~;
    print $sbeamsMOD->encodeSectionItem(
      key=>'Protein Name',
      value=>$biosequence->{biosequence_name},
      #url=>"$biosequence->{accessor}$biosequence->{biosequence_accession}$biosequence->{accessor_suffix}",
    );

    print $sbeamsMOD->encodeSectionItem(
      key=>'Gene Name',
      value=>$biosequence->{biosequence_gene_name},
    );

    print $sbeamsMOD->encodeSectionItem(
      key=>'Description',
      value=>$biosequence->{biosequence_desc},
    );

    my @synonyms = $keySearch->getProteinSynonyms(
      resource_name => $biosequence->{biosequence_name}
    );

    foreach my $synonym ( @synonyms ) {
      my $url;
      $url = "$synonym->[2]$synonym->[0]$synonym->[3]" if ($synonym->[2]);
      print $sbeamsMOD->encodeSectionItem(
        key=>$synonym->[1],
        value=>$synonym->[0],
        url=>$url,
      );
    }


    print qq~
      </TABLE>
      <BR>
    ~;

    $section_header = $sbeamsMOD->encodeSectionHeader(
      text=>'SEQUENCE',
    );
    print qq~
      <TABLE width="600">
        $section_header
      </TABLE>
    ~;
  }


  #### Define the desired columns in the query
  #### [friendly name used in url_cols,SQL,displayed column title]
  my @column_array = (
    ["peptide_accession","P.peptide_accession","Peptide Accession"],
    ["peptide_sequence","P.peptide_sequence","Peptide Sequence"],
    ["best_probability","STR(PI.best_probability,7,3)","Best Prob"],
    ["n_observations","PI.n_observations","N Obs"],
    ["sample_ids","PI.sample_ids","Sample IDs"],
    ["n_protein_mappings","PI.n_protein_mappings","N Protein Mappings"],
    ["n_genome_locations","PI.n_genome_locations","N Genome Locations"],
    ["is_subpeptide_of","PI.is_subpeptide_of","Parent Peptides"],
  );

  #### Build the columns part of the SQL statement
  my %colnameidx = ();
  my @column_titles = ();
  ## Sends @column_array_ref to build_SQL_columns_list, which
  ## (1) appends the 2nd element in array to $columns_clause
  ## (2) fills %colnameidx_ref as a hash with key = 1st element
  ## and value = 3rd element, and (3) fills @column_titles_ref
  ## array with the 3rd element
  my $columns_clause = $sbeams->build_SQL_columns_list(
    column_array_ref=>\@column_array,
    colnameidx_ref=>\%colnameidx,
    column_titles_ref=>\@column_titles
  );


  #### Define a query to return peptides for this protein
  $sql = qq~
     SELECT DISTINCT $columns_clause
     FROM $TBAT_PEPTIDE_INSTANCE PI
    INNER JOIN $TBAT_PEPTIDE P
          ON ( PI.peptide_id = P.peptide_id )
     LEFT JOIN $TBAT_PEPTIDE_MAPPING PM
          ON ( PI.peptide_instance_id = PM.peptide_instance_id )
    INNER JOIN $TBAT_ATLAS_BUILD AB
          ON ( PI.atlas_build_id = AB.atlas_build_id )
     LEFT JOIN $TBAT_BIOSEQUENCE_SET BSS
          ON ( AB.biosequence_set_id = BSS.biosequence_set_id )
     LEFT JOIN $TB_ORGANISM O
          ON ( BSS.organism_id = O.organism_id )
     LEFT JOIN $TBAT_BIOSEQUENCE BS
          ON ( PM.matched_biosequence_id = BS.biosequence_id )
     LEFT JOIN $TBAT_DBXREF DBX ON ( BS.dbxref_id = DBX.dbxref_id )
    WHERE 1 = 1
          AND AB.atlas_build_id IN ( $atlas_build_id )
	  AND BS.biosequence_id = $biosequence_id
    ORDER BY P.peptide_accession
  ~;


  #### Define the hypertext links for columns that need them
  %url_cols = (
    	       'Peptide Accession' => "$CGI_BASE_DIR/PeptideAtlas/GetPeptide?_tab=3&atlas_build_id=$atlas_build_id&searchWithinThis=Peptide+Name&searchForThis=%V&action=QUERY",
  );



  #########################################################################
  #### If QUERY or VIEWRESULTSET was selected, display the data
  if ($apply_action =~ /(QUERY|GO|VIEWRESULTSET)/) {

    #### Show the SQL that will be or was executed
    $sbeams->display_sql(sql=>$sql) if ($show_sql);

    #### If the action contained QUERY, then fetch the results from
    #### the database
    if ($apply_action =~ /(QUERY|GO)/i) {

      #### Fetch the results from the database server
      $sbeams->fetchResultSet(
        sql_query=>$sql,
        resultset_ref=>$resultset_ref,
      );

      #### Store the resultset and parameters to disk resultset cache
      $rs_params{set_name} = "SETME";
      $sbeams->writeResultSet(
        resultset_file_ref=>\$rs_params{set_name},
        resultset_ref=>$resultset_ref,
        query_parameters_ref=>\%parameters,
        resultset_params_ref=>\%rs_params,
        query_name=>"$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME",
      );
    }

    #### Display the annotated sequence
    displayAnnotatedSequence(
      biosequence => $biosequence,
      resultset_ref=>$resultset_ref,
    );

    print qq~
      <TABLE width="600">
        <TR><TD colspan="2" background="$HTML_BASE_DIR/images/fade_orange_header_2.png" width="680"><font color="white">Observed Peptides</font></TD></TR>
      </TABLE>
    ~ if ($sbeams->output_mode() eq 'html');


    #### Display the resultset
    $sbeams->displayResultSet(
      resultset_ref=>$resultset_ref,
      query_parameters_ref=>\%parameters,
      rs_params_ref=>\%rs_params,
      url_cols_ref=>\%url_cols,
      hidden_cols_ref=>\%hidden_cols,
      max_widths=>\%max_widths,
      column_titles_ref=>\@column_titles,
      base_url=>$base_url,
    );


    #### Display the resultset controls
    $sbeams->displayResultSetControls(
      resultset_ref=>$resultset_ref,
      query_parameters_ref=>\%parameters,
      rs_params_ref=>\%rs_params,
      base_url=>$base_url,
    ) if (0);


    #### Display the samples list
    displaySamples(
      resultset_ref=>$resultset_ref,
    );


  #### If QUERY was not selected, then tell the user to enter some parameters
  } else {
    if ($sbeams->invocation_mode() eq 'http') {
      print "<H4>Select parameters above and press QUERY</H4>\n";
    } else {
      print "You need to supply some parameters to contrain the query\n";
    }
  }


} # end handle_request




###############################################################################
# displayAnnotatedSequence
###############################################################################
sub displayAnnotatedSequence {
  my %args = @_;
  my $SUB_NAME = 'displayAnnotatedSequence';


  #### Decode the argument list
  my $resultset_ref = $args{'resultset_ref'}
   || die "ERROR[$SUB_NAME]: resultset_ref not passed";
  my $biosequence = $args{'biosequence'}
   || die "ERROR[$SUB_NAME]: biosequence not passed";


  #### Get the hash of indices of the columns
  my %col = %{$resultset_ref->{column_hash_ref}};

  #### Loop over all the peptides
  my $data_ref = $resultset_ref->{data_ref};
  my @peptides = ();
  foreach my $row (@{$data_ref}) {
    push(@peptides,$row->[$col{peptide_sequence}]);
  }


  my $sequence = $biosequence->{biosequence_seq};
  my %start_positions;
  my %end_positions;

  foreach my $label_peptide (@peptides) {
    if ($label_peptide) {
      my $pos = -1;
      while (($pos = index($sequence,$label_peptide,$pos)) > -1) {
	$start_positions{$pos}++;
	$end_positions{$pos+length($label_peptide)}++;
	$pos++;
      }
    }
  }

  print "<PRE>\n";


  my $seq_length = length($sequence);
  my $i = 0;
  my $color_level = 0;
  my $observed_residues = 0;

  while ($i < $seq_length) {

    if ($end_positions{$i}) {
      if ($color_level == $end_positions{$i}) {
	print "</B></font>";
      }
      $color_level -= $end_positions{$i} unless ($color_level == 0);
    }


    if ($start_positions{$i}) {
      if ($color_level == 0) {
	print "<font color=\"red\"><B>";
      }
      $color_level += $start_positions{$i};
    }

    if ($color_level) {
      $observed_residues++;
    }

    print substr($sequence,$i,1);
    $i++;
    if ($i %70 == 0) {
      print "\n";
    } elsif ($i % 10 == 0) {
      print " ";
    }

  }


  if ($color_level) {
    print "</B></font>";
  }

  print "<BR><BR>";
  print "Protein Coverage = ",int($observed_residues/$seq_length*1000)/10,
    "%";


  print "<BR>";

} # end displayAnnotatedSequence


###############################################################################
# getSamples
###############################################################################
sub getSamples {
  my %args = @_;
  my $SUB_NAME = 'getSamples';

  my $sql = qq~
    SELECT sample_id,sample_title
      FROM $TBAT_SAMPLE
     WHERE record_status != 'D'
     ORDER BY sample_id
  ~;

  my @samples = $sbeams->selectSeveralColumns($sql);

  return \@samples;

} # end getSamples


###############################################################################
# displaySamples
###############################################################################
sub displaySamples {
  my %args = @_;
  my $SUB_NAME = 'displaySamples';

  #### Decode the argument list
  my $resultset_ref = $args{'resultset_ref'}
   || die "ERROR[$SUB_NAME]: resultset_ref not passed";

  #### Get the hash of indices of the columns
  my %col = %{$resultset_ref->{column_hash_ref}};

  #### Loop over all the peptides
  my $data_ref = $resultset_ref->{data_ref};
  my %observed_samples;
  foreach my $row (@{$data_ref}) {
    my $observed_sample_list = $row->[$col{sample_ids}];
    my @all = split(/[,;]/,$observed_sample_list);
    foreach my $element ( @all ) {
      $observed_samples{$element} = 1;
    }
  }

  my $samples = getSamples();

  my $section_header = $sbeamsMOD->encodeSectionHeader(
    text=>'Observed in Samples:',
  );

  print qq~
      <TABLE width="600">
        $section_header
    ~;

  foreach my $sample (@{$samples}) {
    my ($sample_id,$sample_title) = @{$sample};
    if ($observed_samples{$sample_id}) {
      print $sbeamsMOD->encodeSectionItem(
        key=>$sample_id,
        value=>$sample_title,
        url=>"$CGI_BASE_DIR/$SBEAMS_PART/ManageTable.cgi?TABLE_NAME=AT_SAMPLE&sample_id=$sample_id",
      );
    }

  }

  print qq~
      </TABLE>
      <BR>
  ~;


} # end displaySamples



