#!/usr/local/bin/perl ############################################################################### # Program : GetProtein # $Id$ # # Description : Prints summary of a given protein given selection # atlas build and protein name. # # SBEAMS is Copyright (C) 2000-2005 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### ############################################################################### # Set up all needed modules and objects ############################################################################### use strict; use Getopt::Long; use FindBin; use lib "$FindBin::Bin/../../lib/perl"; use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME @MENU_OPTIONS); use SBEAMS::Connection qw($q $log); use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::Connection::TabMenu; $sbeams = new SBEAMS::Connection; use SBEAMS::BioLink; my $biolink = SBEAMS::BioLink->new(); $biolink->setSBEAMS($sbeams); use SBEAMS::PeptideAtlas; use SBEAMS::PeptideAtlas::Settings; use SBEAMS::PeptideAtlas::Tables; $sbeamsMOD = new SBEAMS::PeptideAtlas; $sbeamsMOD->setSBEAMS($sbeams); $sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR); use SBEAMS::PeptideAtlas::KeySearch; my $keySearch = new SBEAMS::PeptideAtlas::KeySearch; $keySearch->setSBEAMS($sbeams); ############################################################################### # Set program name and usage banner for command like use ############################################################################### $PROG_NAME = $FindBin::Script; $USAGE = <Authenticate() and exit if it fails or continue if it works. ############################################################################### sub main { #### Do the SBEAMS authentication and exit if a username is not returned exit unless ($current_username = $sbeams->Authenticate( permitted_work_groups_ref=>['PeptideAtlas_user','PeptideAtlas_admin', 'PeptideAtlas_readonly'], #connect_read_only=>1, allow_anonymous_access=>1, )); #### Read in the default input parameters my %parameters; my $n_params_found = $sbeams->parse_input_parameters( q=>$q, parameters_ref=>\%parameters); #$sbeams->printDebuggingInfo($q); #### Process generic "state" parameters before we start $sbeams->processStandardParameters(parameters_ref=>\%parameters); #### Decide what action to take based on information so far if ($parameters{action} eq "???") { # Some action } else { my $project_id = $sbeamsMOD->getProjectID( atlas_build_id => $parameters{atlas_build_id} ); $sbeamsMOD->display_page_header(project_id => $project_id); handle_request(ref_parameters=>\%parameters); $sbeamsMOD->display_page_footer(); } } # end main ############################################################################### # Handle Request ############################################################################### sub handle_request { my %args = @_; #### Process the arguments list my $ref_parameters = $args{'ref_parameters'} || die "ref_parameters not passed"; my %parameters = %{$ref_parameters}; #### Show current user context information print "
\n" if ($sbeams->output_mode() eq 'html'); #$sbeams->printUserContext(); #### Get the HTML to display the tabs my $tabMenu = $sbeamsMOD->getTabMenu( parameters_ref => \%parameters, program_name => $PROG_NAME, ); print $tabMenu->asHTML() if ($sbeams->output_mode() eq 'html'); #### Get the current atlas_build_id based on parameters or session my $atlas_build_id = $sbeamsMOD->getCurrentAtlasBuildID( parameters_ref => \%parameters, ); if (defined($atlas_build_id) && $atlas_build_id < 0) { return; } #### Get the search keyword my $protein_name = $parameters{"protein_name"}; #### If a new protein_name was supplied, store it if ($protein_name) { $sbeams->setSessionAttribute( key => 'PeptideAtlas_protein_name', value => $protein_name, ); #### Else see if we had one stored } else { $protein_name = $sbeams->getSessionAttribute( key => 'PeptideAtlas_protein_name', ); if ($protein_name) { $parameters{'apply_action'} = 'GO'; } } #### Define some variables for a query and resultset my %resultset = (); my $resultset_ref = \%resultset; my (%url_cols,%hidden_cols,%max_widths,$show_sql); #### Read in the standard form values my $apply_action = $parameters{'action'} || $parameters{'apply_action'}; my $TABLE_NAME = $parameters{'QUERY_NAME'}; #### Set some specific settings for this program my $CATEGORY="Get Protein"; my $PROGRAM_FILE_NAME = $PROG_NAME; my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME"; my $help_url = "$CGI_BASE_DIR/help_popup.cgi"; #### If the apply action was to recall a previous resultset, do it my %rs_params = $sbeams->parseResultSetParams('q' => $q); my $n_params_found = 0; if ($apply_action eq "VIEWRESULTSET") { $sbeams->readResultSet( resultset_file=>$rs_params{set_name}, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, resultset_params_ref=>\%rs_params, ); $n_params_found = 99; } #### Get a list of accessible project_ids my @accessible_project_ids = $sbeams->getAccessibleProjects(); my $accessible_project_ids = join( ",", @accessible_project_ids ) || '0'; #### Get a hash of available atlas builds my $sql = qq~ SELECT atlas_build_id,atlas_build_name FROM $TBAT_ATLAS_BUILD WHERE project_id IN ( $accessible_project_ids ) AND record_status!='D' ~; my %atlas_build_names = $sbeams->selectTwoColumnHash($sql); #### Get a list of id's sorted by name my $sql = qq~ SELECT atlas_build_id,atlas_build_name FROM $TBAT_ATLAS_BUILD WHERE project_id IN ( $accessible_project_ids ) AND record_status!='D' ORDER BY atlas_build_name ~; my @ordered_atlas_build_ids = $sbeams->selectOneColumn($sql); #### If the output_mode is HTML, then display the form if ($sbeams->output_mode() eq 'html') { print qq~ ~; print "

"; print ""; print $q->start_form(-method=>"POST", -action=>"$base_url", -name=>"SearchForm", ); print "PeptideAtlas Build: "; print $q->popup_menu(-name => "atlas_build_id", -values => [ @ordered_atlas_build_ids ], -labels => \%atlas_build_names, -default => $atlas_build_id, -onChange => 'switchAtlasBuild()', ); #print "   "; print "
"; print "Protein Name: "; print $q->textfield( "protein_name", $protein_name); print $q->hidden( "apply_action", ''); print "   "; print $q->submit(-name => "action", -value => 'QUERY', -label => 'GO'); print $q->endform; print "
"; print "

"; } ######################################################################### #### Process all the constraints #### If atlas_build_id was not selected, stop here unless ($atlas_build_id) { if ($sbeams->output_mode() eq 'html') { print "Please select an atlas build, type in a protein name ". "(e.g. ENSP00000290230 for human Ensembl protein), and ". "click GO"; } else { $sbeams->reportException( state => 'ERROR', type => 'INSUFFICIENT CONSTRAINTS', message => 'You must provide an atlas_build_id', ); } return; } #### If biosequence_name was not selected, stop here unless ($protein_name) { if ($sbeams->output_mode() eq 'html') { print "Please type in a protein name ". "(e.g. ENSP00000290230 for human Ensembl protein), and ". "click GO"; } else { $sbeams->reportException( state => 'ERROR', type => 'INSUFFICIENT CONSTRAINTS', message => 'You must provide a protein_name', ); } return; } #### Build ATLAS_BUILD constraint my $atlas_build_clause = $sbeams->parseConstraint2SQL( constraint_column=>"AB.atlas_build_id", constraint_type=>"int_list", constraint_name=>"Atlas Build", constraint_value=>$atlas_build_id ); return if ($atlas_build_clause eq '-1'); #### Build PROTEIN_NAME constraint my $biosequence_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_name", constraint_type=>"plain_text", constraint_name=>"Protein Name", constraint_value=>$protein_name ); return if ($biosequence_name_clause eq '-1'); #### Define the SQL statement to find the biosequence $sql = qq~ SELECT BS.biosequence_id, BS.biosequence_name FROM $TBAT_ATLAS_BUILD AB INNER JOIN $TBAT_BIOSEQUENCE_SET BSS ON ( AB.biosequence_set_id = BSS.biosequence_set_id ) INNER JOIN $TBAT_BIOSEQUENCE BS ON ( BSS.biosequence_set_id = BS.biosequence_set_id ) WHERE 1 = 1 $atlas_build_clause $biosequence_name_clause ~; my %biosequences = $sbeams->selectTwoColumnHash($sql); my $n_biosequences = scalar(keys(%biosequences)); #### If the protein was not found, report the problem if ($n_biosequences == 0) { if ($sbeams->output_mode() eq 'html') { print qq~ The protein entered '$protein_name' was not found in the atlas build '$atlas_build_names{$atlas_build_id}'. Please check that the correct build was selected and that the protein name was correctly specified. ~; } else { $sbeams->reportException( state => 'ERROR', type => 'RECORD NOT FOUND', message => "The protein entered '$protein_name' was not found ". "in the atlas build '$atlas_build_names{$atlas_build_id}'.", ); } return; } #### If more than one protein was found, list them if ($n_biosequences > 1) { if ($sbeams->output_mode() eq 'html') { print qq~ Several ($n_biosequences) entries were returned from your request for '$protein_name' in the atlas build '$atlas_build_names{$atlas_build_id}'. Please select the appropriate one:

~; while (my ($key,$value) = each (%biosequences)) { print "$value
"; } } else { $sbeams->reportException( state => 'ERROR', type => 'TOO MANY RECORDS', message => qq~More than one ($n_biosequences) was returned from your request for '$protein_name' in the atlas build '$atlas_build_names{$atlas_build_id}'.~, ); } return; } #### Extract the one biosequence_id my ($biosequence_id) = keys(%biosequences); #### Return some information about this biosequence $sql = qq~ SELECT BS.biosequence_id, BS.biosequence_name,BS.biosequence_gene_name, BS.biosequence_accession, BS.biosequence_desc, BS.biosequence_seq,BSS.set_name, DBX.accessor,DBX.accessor_suffix, BAG.annotated_gene_id FROM $TBAT_BIOSEQUENCE_SET BSS INNER JOIN $TBAT_BIOSEQUENCE BS ON ( BSS.biosequence_set_id = BS.biosequence_set_id ) LEFT JOIN $TBAT_DBXREF DBX ON ( BS.dbxref_id = DBX.dbxref_id ) LEFT JOIN $TBAT_BIOSEQUENCE_ANNOTATED_GENE BAG ON ( BAG.biosequence_id = BS.biosequence_id ) LEFT JOIN $TBAT_BIOSEQUENCE_PROPERTY_SET BPS ON ( BS.biosequence_id = BPS.biosequence_id ) WHERE BS.biosequence_id = $biosequence_id ~; my @rows = $sbeams->selectHashArray($sql); my $biosequence = $rows[0]; #### Print out a summary of the protein if ($sbeams->output_mode() eq 'html') { my $section_header = $sbeamsMOD->encodeSectionHeader( text=>$biosequence->{biosequence_name}, ); print qq~ $section_header ~; print $sbeamsMOD->encodeSectionItem( key=>'Protein Name', value=>$biosequence->{biosequence_name}, #url=>"$biosequence->{accessor}$biosequence->{biosequence_accession}$biosequence->{accessor_suffix}", ); print $sbeamsMOD->encodeSectionItem( key=>'Gene Name', value=>$biosequence->{biosequence_gene_name}, ); print $sbeamsMOD->encodeSectionItem( key=>'Description', value=>$biosequence->{biosequence_desc}, ); my @synonyms = $keySearch->getProteinSynonyms( resource_name => $biosequence->{biosequence_name} ); foreach my $synonym ( @synonyms ) { my $url; $url = "$synonym->[2]$synonym->[0]$synonym->[3]" if ($synonym->[2]); print $sbeamsMOD->encodeSectionItem( key=>$synonym->[1], value=>$synonym->[0], url=>$url, ); } # If we have annotations if ( $biosequence->{annotated_gene_id} ){ my $annot = $biolink->get_leaf_annotations( annotated_gene_id => $biosequence->{annotated_gene_id} ); my %fx = ( C => 'Cellular Location', F => 'Molecular Function', P => 'Biological Process' ); for my $type ( qw(F P C) ) { my $key = $type . 'pri'; if ( $annot->{$key} ) { my $display = $sbeams->truncateStringWithMouseover( len => 80, string => $annot->{$key}); print $sbeamsMOD->encodeSectionItem( key => $fx{$type}, value => $display ); } } } print qq~

~; $section_header = $sbeamsMOD->encodeSectionHeader( text=>'Sequence', ); print qq~ $section_header
~; } #### Define the desired columns in the query #### [friendly name used in url_cols,SQL,displayed column title] my @column_array = ( ["peptide_accession","P.peptide_accession","Peptide Accession"], ["peptide_sequence","P.peptide_sequence","Peptide Sequence"], ["best_probability","STR(PI.best_probability,7,3)","Best Prob"], ["n_observations","PI.n_observations","N Obs"], ["empirical_proteotypic_score","STR(PI.empirical_proteotypic_score,7,2)","Empirical Proteotypic Score"], ["SSRCalc_relative_hydrophobicity","STR(P.SSRCalc_relative_hydrophobicity,7,2)","SSRCalc Relative Hydrophob"], ["n_protein_mappings","PI.n_protein_mappings","N Protein Mappings"], ["n_genome_locations","PI.n_genome_locations","N Genome Locations"], ["sample_ids","PI.sample_ids","Sample IDs"], ["is_subpeptide_of","PI.is_subpeptide_of","Parent Peptides"], ); #### Build the columns part of the SQL statement my %colnameidx = (); my @column_titles = (); ## Sends @column_array_ref to build_SQL_columns_list, which ## (1) appends the 2nd element in array to $columns_clause ## (2) fills %colnameidx_ref as a hash with key = 1st element ## and value = 3rd element, and (3) fills @column_titles_ref ## array with the 3rd element my $columns_clause = $sbeams->build_SQL_columns_list( column_array_ref=>\@column_array, colnameidx_ref=>\%colnameidx, column_titles_ref=>\@column_titles ); #### Define a query to return peptides for this protein $sql = qq~ SELECT DISTINCT $columns_clause FROM $TBAT_PEPTIDE_INSTANCE PI INNER JOIN $TBAT_PEPTIDE P ON ( PI.peptide_id = P.peptide_id ) LEFT JOIN $TBAT_PEPTIDE_MAPPING PM ON ( PI.peptide_instance_id = PM.peptide_instance_id ) INNER JOIN $TBAT_ATLAS_BUILD AB ON ( PI.atlas_build_id = AB.atlas_build_id ) LEFT JOIN $TBAT_BIOSEQUENCE_SET BSS ON ( AB.biosequence_set_id = BSS.biosequence_set_id ) LEFT JOIN $TB_ORGANISM O ON ( BSS.organism_id = O.organism_id ) LEFT JOIN $TBAT_BIOSEQUENCE BS ON ( PM.matched_biosequence_id = BS.biosequence_id ) LEFT JOIN $TBAT_DBXREF DBX ON ( BS.dbxref_id = DBX.dbxref_id ) WHERE 1 = 1 AND AB.atlas_build_id IN ( $atlas_build_id ) AND BS.biosequence_id = $biosequence_id ORDER BY P.peptide_accession ~; #### Define the hypertext links for columns that need them %url_cols = ( 'Peptide Accession' => "$CGI_BASE_DIR/PeptideAtlas/GetPeptide?_tab=3&atlas_build_id=$atlas_build_id&searchWithinThis=Peptide+Name&searchForThis=%V&action=QUERY", ); ######################################################################### #### If QUERY or VIEWRESULTSET was selected, display the data if ($apply_action =~ /(QUERY|GO|VIEWRESULTSET)/) { #### Show the SQL that will be or was executed $sbeams->display_sql(sql=>$sql) if ($show_sql); #### If the action contained QUERY, then fetch the results from #### the database if ($apply_action =~ /(QUERY|GO)/i) { #### Fetch the results from the database server $sbeams->fetchResultSet( sql_query=>$sql, resultset_ref=>$resultset_ref, ); #### Store the resultset and parameters to disk resultset cache $rs_params{set_name} = "SETME"; $sbeams->writeResultSet( resultset_file_ref=>\$rs_params{set_name}, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, resultset_params_ref=>\%rs_params, query_name=>"$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME", ); } #### Get protein structure information my $protein_structure = getProteinStructure( biosequence_id => $biosequence_id, ); #### Display the annotated sequence displayAnnotatedSequence( biosequence => $biosequence, resultset_ref=>$resultset_ref, protein_structure => $protein_structure, ); #### Display the annotated sequence #displayAnnotatedSequence( # biosequence => $biosequence, # resultset_ref=>$resultset_ref, # line_length => 700000, # enzyme => 'trypsin', # protein_structure => $protein_structure, #); my $observed_header = $sbeamsMOD->encodeSectionHeader( text => 'Observed Peptides' ); print qq~ $observed_header
~ if ($sbeams->output_mode() eq 'html'); #### Display the resultset $sbeams->displayResultSet( resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, rs_params_ref=>\%rs_params, url_cols_ref=>\%url_cols, hidden_cols_ref=>\%hidden_cols, max_widths=>\%max_widths, column_titles_ref=>\@column_titles, base_url=>$base_url, ); #### Display the resultset controls $sbeams->displayResultSetControls( resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, rs_params_ref=>\%rs_params, base_url=>$base_url, ) if (0); #### Display the samples list my $sample_ids = getSampleList( resultset_ref=>$resultset_ref,); my $sampleHTML = $sbeamsMOD->getSampleDisplay( sample_ids => $sample_ids ); print "$sampleHTML
"; #### If QUERY was not selected, then tell the user to enter some parameters } else { if ($sbeams->invocation_mode() eq 'http') { print "

Select parameters above and press QUERY

\n"; } else { print "You need to supply some parameters to contrain the query\n"; } } } # end handle_request ############################################################################### # displayAnnotatedSequence ############################################################################### sub displayAnnotatedSequence { my %args = @_; my $SUB_NAME = 'displayAnnotatedSequence'; #### Decode the argument list my $resultset_ref = $args{'resultset_ref'} || die "ERROR[$SUB_NAME]: resultset_ref not passed"; my $biosequence = $args{'biosequence'} || die "ERROR[$SUB_NAME]: biosequence not passed"; my $line_length = $args{'line_length'} || 70; my $word_length = $args{'word_length'} || 10; my $enzyme = $args{'enzyme'} || ''; my $protein_structure = $args{'protein_structure'}; #### Get the hash of indices of the columns my %col = %{$resultset_ref->{column_hash_ref}}; #### Loop over all the peptides my $data_ref = $resultset_ref->{data_ref}; my @peptides = (); foreach my $row (@{$data_ref}) { push(@peptides,$row->[$col{peptide_sequence}]); } my $sequence = $biosequence->{biosequence_seq}; my %start_positions; my %end_positions; foreach my $label_peptide (@peptides) { if ($label_peptide) { my $pos = -1; while (($pos = index($sequence,$label_peptide,$pos)) > -1) { $start_positions{$pos}++; $end_positions{$pos+length($label_peptide)}++; $pos++; } } } #### If transmembrane regions topology has been supplied, find the TMRs my %tmr_start_positions; my %tmr_end_positions; my %tmr_color; my $notes_buffer = ''; if ($protein_structure->{transmembrane_topology}) { my $start_side = substr($protein_structure->{transmembrane_topology},0,1); my $tmp = substr($protein_structure->{transmembrane_topology},1,9999); my @regions = split(/[io]/,$tmp); foreach my $region (@regions) { my ($start,$end) = split(/-/,$region); $tmr_start_positions{$start-1} = $start_side; $tmr_color{$start-1} = 'orange'; if ($start_side eq 'i') { $start_side = 'o'; } elsif ($start_side eq 'o') { $start_side = 'i'; } else { $start_side = '?'; } $tmr_end_positions{$end} = $start_side; $tmr_color{$end} = 'orange'; } $notes_buffer .= "(Used TMR topology string: $protein_structure->{transmembrane_topology})
\n"; #print "[See full TMHMM result]
\n"; } #### If there's a signal peptide, mark it as a blue if ($protein_structure->{has_signal_peptide} eq 'Y') { $tmr_start_positions{0} = ''; $tmr_color{0} = 'blue'; $tmr_end_positions{$protein_structure->{signal_peptide_length}} = ''; $tmr_end_positions{$protein_structure->{signal_peptide_length}} = '/' if ($protein_structure->{signal_peptide_is_cleaved} eq 'Y'); $tmr_color{$protein_structure->{signal_peptide_length}} = 'orange'; $notes_buffer = "(signal peptide: Y, length: $protein_structure->{signal_peptide_length}, cleaved: $protein_structure->{signal_peptide_is_cleaved}, probability: $protein_structure->{has_signal_peptide_probability})\n".$notes_buffer; } print "
\n";


  my $seq_length = length($sequence);
  my $i = 0;
  my $color_level = 0;
  my $observed_residues = 0;

  my @annotation_lines;


  while ($i < $seq_length) {

    if ($end_positions{$i}) {
      if ($color_level == $end_positions{$i}) {
	print "";
      }
      $color_level -= $end_positions{$i} unless ($color_level == 0);
    }


    if ($start_positions{$i}) {
      if ($color_level == 0) {
	print "";
      }
      $color_level += $start_positions{$i};
    }

    if ($color_level) {
      $observed_residues++;
    }

    print substr($sequence,$i,1);
    $i++;
    if ($i %$line_length == 0) {
      print "\n";
    } elsif ($enzyme && $enzyme eq 'trypsin') {
      if (substr($sequence,$i-1,2) =~ /[RK][A-O,Q-Z]/) {
	print " ";
      }
    } elsif ($i % $word_length == 0) {
      print " ";
    }


  }



  if ($color_level) {
    print "";
  }

  print "

"; print "Protein Coverage = ",int($observed_residues/$seq_length*1000)/10, "%"; #print "

Notes:
\n$notes_buffer" if ($notes_buffer); print "
"; } # end displayAnnotatedSequence ############################################################################### # getSamples ############################################################################### sub getSamples { my %args = @_; my $SUB_NAME = 'getSamples'; my $sql = qq~ SELECT sample_id,sample_title FROM $TBAT_SAMPLE WHERE record_status != 'D' ORDER BY sample_id ~; my @samples = $sbeams->selectSeveralColumns($sql); return \@samples; } # end getSamples sub getSampleList { my %args = @_; my $SUB_NAME = 'getSampleList'; #### Decode the argument list my $resultset_ref = $args{'resultset_ref'} || die "ERROR[$SUB_NAME]: resultset_ref not passed"; #### Get the hash of indices of the columns my %col = %{$resultset_ref->{column_hash_ref}}; #### Loop over all the peptides my $data_ref = $resultset_ref->{data_ref}; my %observed_samples; foreach my $row (@{$data_ref}) { my $observed_sample_list = $row->[$col{sample_ids}]; my @all = split(/[,;]/,$observed_sample_list); foreach my $element ( @all ) { $observed_samples{$element}++; } } my @keys = keys( %observed_samples ); return \@keys; } ############################################################################### # getProteinStructure ############################################################################### sub getProteinStructure { my %args = @_; my $SUB_NAME = 'getProteinStructure'; #### Decode the argument list my $biosequence_id = $args{'biosequence_id'} || die "ERROR[$SUB_NAME]: biosequence_id not passed"; #### Define query to get information my $sql = qq~ SELECT n_transmembrane_regions,transmembrane_class,transmembrane_topology, has_signal_peptide,has_signal_peptide_probability, signal_peptide_length,signal_peptide_is_cleaved FROM $TBAT_BIOSEQUENCE_PROPERTY_SET WHERE biosequence_id = $biosequence_id ~; my @rows = $sbeams->selectHashArray($sql); if (scalar(@rows) != 1) { my %tmp = (); return(\%tmp); } return($rows[0]); }