#!/usr/local/bin/perl -w ############################################################################### # Program treatment.cgi # $Id: $ # # Description : Form and processing logic for applying laboratory # manipulation or treatment to a set of samples. # # SBEAMS is Copyright (C) 2000-2007 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### use strict; use lib qw (../../lib/perl); use File::Basename; use Benchmark; use SBEAMS::Connection qw($q $log); use SBEAMS::Connection::Settings; use SBEAMS::BioLink::KeggMaps; use SBEAMS::Glycopeptide; use SBEAMS::Glycopeptide::Tables; ## Globals ## my $sbeams = new SBEAMS::Connection; my $glyco = new SBEAMS::Glycopeptide; $glyco->setSBEAMS($sbeams); my $keggmap = SBEAMS::BioLink::KeggMaps->new(); my $kegg_organism; my $params = {}; my $base_url = $q->url( -absolute => 1 ); my $program = basename( $0 ); # Don't buffer output $|++; { # Main # Authenticate user. my $current_username = $sbeams->Authenticate( allow_anonymous_access => 1) || die "Authentication failed"; # Process cgi parameters $params = process_params(); my $spc = $sbeams->getGifSpacer(800); my $content = "$spc
\n"; $kegg_organism = $glyco->getKeggOrganism( ); # for my $p ( keys( %$params ) ) { $log->debug( "hey, $p => $params->{$p}"); } if ( $sbeams->output_mode() !~ /cytoscape/ ) { my $sql = ''; if ( $params->{apply_action} eq 'kegg_pathway' ) { # Check for params if ( $params->{path_id} ) { $sql = get_pathway_sql(); } # create SQL } elsif ( $params->{apply_action} eq 'gene_list' ) { # Check for params if ( $params->{accession} ) { # create SQL $sql = get_genelist_sql( gene_list => $params->{accession} ); } } else { $content .= $sbeams->makeErrorText( "Missing or erroneous action" ); } # Cache sql, pass key to link below if ( !$sql ) { $content .= $sbeams->makeErrorText( "Missing required information" ); } else { my $key = $sbeams->getRandomString( num_chars => 20 ); # $sbeams->getSessionCookie(); $sbeams->setSessionAttribute( key => $key, value => $sql ); my $sp = ' ' x 2; my @buttons = $sbeams->getFormButtons( types => [ 'submit' ], name => 'Launch Cytoscape', value => 'Submit' ); my $info_text = get_info_text(); # A few fly-specific options, currently unimplemented for other orgs. my $primary_identifier = ''; my $interactions_option =<<" END"; Show known interactions as network edges:
$sp END my $fb_interaction_network = ''; if ( $params->{path_id} && $kegg_organism eq 'dme' ) { $primary_identifier =<<" END"; Primary identifier:
$sp Locus tag (CGxxxxx)
$sp Flybase ID (FBgnxxxxxxx)
END $interactions_option =<<" END"; Show known interactions as network edges (Locus tag only):
$sp END $fb_interaction_network =<<" END"; If desired, launch the Interaction Network from Flybase. (works best with FBgn accessions)
END } $content .=<<" END";

Click links below to launch a cytoscape network to explore the proteins you've selected

First, launch the Gaggle Boss (facilitates communication between 'geese').

Second, launch Cytoscape with specified list.
Include peptides in network:
$sp Phosphopeptides
$sp Non-phosphopeptides
$primary_identifier $interactions_option

$fb_interaction_network

$info_text END } } else { # Check for params if ( !$params->{key} ) { $content .= $sbeams->makeErrorText( "Missing required parameter 'key'" ); $sbeams->output_mode( 'html' ); } else { # fetch SQL my $sql = $sbeams->getSessionAttribute( key => $params->{key} ); $sbeams->deleteSessionAttribute( key => $params->{key} ); if ( $sbeams->isTaintedSQL( $sql ) ) { $content .= $sbeams->makeErrorText( "Missing required parameter 'key'" ); $sbeams->output_mode( 'html' ); } else { # create & display resultset launch_cytoscape( $sql ); exit; } } } $glyco->printPageHeader(); print $content; $glyco->printPageFooter( close_tables=>'NO'); } # end Main sub get_info_text { my $content =<<" END";
   
This cytoscape view depicts the relationship between the chosen proteins and any peptides that they may have. Proteins are shown as blue oval-shaped nodes. Peptides are shown as rectangular nodes, and their color is determined by the 'type' of peptide they are: - Green for high confidence (dCn) phosphopeptides - Yellow for lower dCn phosphopeptides - Red for non-phosphopeptides A line (edge) between a protein and a peptide indicates that that peptide is contained within the protein. You can choose whether to display either type of peptide, and which protein identifier to use. To use Flybase interactions goose, it is better to use FBgn accessions. Peptides are identified by their sequences, where & and * are observed phosphorylation sites.
END my @toggle = $sbeams->make_toggle_section( textlink => 1, hidetext => "Hide Explanation", showtext => "Show Explanation", content => $content ); return "$toggle[1]

$toggle[0]"; } sub process_params { # Standard SBEAMS processing $sbeams->parse_input_parameters( parameters_ref => $params, q => $q ); # for ( keys( %$params ) ){ print "$_ = $params->{$_}
" } # Process "state" parameters $sbeams->processStandardParameters( parameters_ref => $params ); $log->debug( $q->param() ); return $params; } sub get_pathway_sql { # Fetch pathway genes $keggmap->setPathway( pathway => $params->{path_id} ); # Look up in database my $genes = $keggmap->getPathwayGenes(); # pathway => $params->{path_id} ); my $gene_list; my $delim = ''; for my $gene ( @$genes ) { $gene_list .= $delim . $gene; $delim = ','; } return get_genelist_sql( gene_list => $gene_list ); } sub launch_cytoscape { my $sql = shift; # Make resultset my $rs_ref = {}; my %rs_params = $sbeams->parseResultSetParams('q' => $q); $log->debug( "$sql" ); #### Fetch the results from the database server $sbeams->fetchResultSet( sql_query=>$sql, resultset_ref=>$rs_ref, ); #### Store the resultset and parameters to disk resultset cache $rs_params{set_name} = "SETME"; $sbeams->writeResultSet( resultset_file_ref=>\$rs_params{set_name}, resultset_ref=>$rs_ref, query_parameters_ref=>$params, resultset_params_ref=>\%rs_params, query_name=>"$SBEAMS_SUBDIR/getCytoscapeWebstart", ); # prepareJavaWebStartFiles(top 10 # rs_params_ref=>\%rs_params, # resultset_ref=>$resultset_ref, # query_parameters_ref=>\%parameters, # url_cols_ref=>\%url_cols, # column_titles_ref=>\@column_titles, # hidden_cols_ref=>\%hidden_cols, # java_web_start=> { template => 'phosphopeptides' } # ); # $rs_params{page_size} = 500; my %cytoscape = ( template => 'PhosphoPep' ); my %webstart_files = ( 'commonName.noa' => 'Synonym', 'bioseq_type.noa' => 'bioseq_type', 'phospho_status.noa' => 'phospho_status', 'deltaCn.noa' => 'dCn', 'network.sif' => undef, 'm2z.noa' => 'Parent ion m/z', 'sequence.noa' => 'Peptide Sequence', 'prophet.noa' => 'Peptide Prophet Score', ); for my $k ( keys ( %webstart_files ) ) { $cytoscape{files}->{$k} = [ $webstart_files{$k} ]; } # SELECT ipi_accession_number, synonyms, ID.ipi_data_id, delta_cn, # experimental_mass, peptide_prophet_score, matching_sequence my $data = $rs_ref->{data_ref}; my %proteins; my %peptides; my %edges; my $relationships; if ( $params->{network_edges} && $params->{path_id} && $params->{identifier} ne 'flybase' ) { $relationships = $keggmap->getPathwayRelationships( pathway => $params->{path_id} ); for my $r ( @$relationships ) { my $edge = "$r->[0] $r->[3] $r->[1]"; $edge =~ s/dme://g; $edge =~ s/Dmel_//g; $log->debug( $edge ); if ( !$edges{$edge} ) { push ( @{$cytoscape{files}->{'network.sif'}}, $edge ); $edges{$edge}++; } } } for my $row ( @$data ) { # my $canon = ( $row->[0] =~ /^CG/ ) ? $row->[0] : $row->[1]; # my $common = ( $row->[0] eq $canon ) ? $row->[1] : $row->[0]; # $canon =~ s/^Dmel_//g; # User defines which identified to use, default to accession my $canon = $row->[0]; my $common = $row->[1]; if ( $params->{identifier} ne 'flybase' ) { $canon = $row->[1]; $common = $row->[0]; # Required to use with interactions $canon =~ s/-P[A-Z]$//; } if ( !$proteins{$canon} ) { # Only once per protein # New protein push ( @{$cytoscape{files}->{'network.sif'}}, $canon ); push ( @{$cytoscape{files}->{'commonName.noa'}}, "$canon = $common" ); push ( @{$cytoscape{files}->{'bioseq_type.noa'}}, "$canon = protein" ); push ( @{$cytoscape{files}->{'phospho_status.noa'}}, "$canon = 99" ); } my $pstatus = ( $row->[6] =~ /\&|\*/ ) ? int($row->[3]*10) : -99; if ( $pstatus == -99 && !$params->{non_phospho} ) { next; } elsif ( $pstatus > -99 && !$params->{phospho} ) { next; } my $edge = "$canon contains $row->[6]"; if ( !$edges{$edge} ) { push ( @{$cytoscape{files}->{'network.sif'}}, $edge ); } $edges{$edge}++; next if $peptides{$row->[6]}; $peptides{$row->[6]}++; #my $pname = $sbeams->truncateString( string => $row->[6], 'length' => 12 ); push ( @{$cytoscape{files}->{'phospho_status.noa'}}, "$row->[6] = $pstatus" ); push ( @{$cytoscape{files}->{'network.sif'}}, $row->[6] ); push ( @{$cytoscape{files}->{'bioseq_type.noa'}}, "$row->[6] = peptide" ); push ( @{$cytoscape{files}->{'deltaCn.noa'}}, "$row->[6] = " . sprintf("%.3f", $row->[3]) ); push ( @{$cytoscape{files}->{'m2z.noa'}}, "$row->[6] = " . sprintf("%.4f", $row->[4]) ); push ( @{$cytoscape{files}->{'prophet.noa'}}, "$row->[6] = " . sprintf("%.2f", $row->[5]) ); push ( @{$cytoscape{files}->{'sequence.noa'}}, "$row->[6] = $row->[6]" ); } #### Display the resultset $sbeams->displayResultSet( resultset_ref => $rs_ref, query_parameters_ref => $params, rs_params_ref => \%rs_params, base_url => $base_url, cytoscape => \%cytoscape ); # return $rs_ref; # make webstart exit; } sub get_genelist_sql { my %args = @_; my $gene_list = $args{gene_list} || ''; my $cutoff = $glyco->get_current_prophet_cutoff(); my $sql = ''; $log->debug( "genelist sql" ); if ( $kegg_organism eq 'dme' ) { $log->debug( "Ain't no DME baby" ); my $like_clause = ''; my $in_clause = ''; if ( $gene_list =~ /FBgn/ ) { my $delim = ''; my %redundant; $in_clause = "WHERE ipi_accession_number IN ( "; for my $gene ( split ",", $gene_list ) { next if $redundant{$gene}; $redundant{$gene}++; $gene =~ s/\'//g; $in_clause .= $delim . "'$gene'"; $delim = ','; } $in_clause .= ' )'; } else { my $delim = ' '; for my $gene ( split ",", $gene_list ) { $gene =~ s/Dmel_//g; $gene =~ s/\'//g; $like_clause .= $delim . " synonyms like '$gene%' "; $delim = "\n OR "; } $like_clause = "WHERE ( $like_clause )\n"; } $sql =<<" END"; SELECT ipi_accession_number, synonyms, ID.ipi_data_id, delta_cn, experimental_mass, peptide_prophet_score, observed_peptide_sequence FROM $TBGP_IPI_DATA ID LEFT JOIN $TBGP_OBSERVED_TO_IPI ITI ON ITI.ipi_data_id = ID.ipi_data_id LEFT JOIN $TBGP_OBSERVED_PEPTIDE IP ON ITI.OBSERVED_peptide_id = IP.OBSERVED_peptide_id $in_clause $like_clause AND ( peptide_prophet_score >= $cutoff OR peptide_prophet_score IS NULL ) END } elsif ( 0 ) { $log->debug( "No freaking way" ); my $delim = ''; my $gene_string = ''; for my $gene ( split ",", $gene_list ) { $gene_string .= $delim . "'" . $gene . "'"; $delim = ','; } $sql =<<" END"; SELECT entrez_id, ipi_accessions FROM dcampbel.dbo.ipi_xrefs IX JOIN $TBGP_IPI_DATA ID ON IX.ipi_accessions = ID.ipi_accession_number JOIN $TBGP_OBSERVED_TO_IPI ITI ON ITI.ipi_data_id = ID.ipi_data_id JOIN $TBGP_OBSERVED_PEPTIDE IP ON ITI.observed_peptide_id = IP.observed_peptide_id WHERE entrez_id IN ($gene_string) AND peptide_prophet_score >= $cutoff END } else { $log->debug( "In the elsewhere" ); my $delim = ''; my $gene_string = ''; for my $gene ( split ",", $gene_list ) { if ( $gene =~ /\'/ ) { $gene_string .= $delim . $gene; } else { $gene_string .= $delim . "'" . $gene . "'"; } $delim = ','; } $sql =<<" END"; SELECT ipi_accession_number, synonyms, ID.ipi_data_id, delta_cn, experimental_mass, peptide_prophet_score, observed_peptide_sequence FROM $TBGP_IPI_DATA ID LEFT JOIN $TBGP_OBSERVED_TO_IPI ITI ON ITI.ipi_data_id = ID.ipi_data_id LEFT JOIN $TBGP_OBSERVED_PEPTIDE IP ON ITI.OBSERVED_peptide_id = IP.OBSERVED_peptide_id WHERE ipi_accession_number IN ( $gene_string ) AND ( peptide_prophet_score >= $cutoff OR peptide_prophet_score IS NULL ) END } return ( $sql ); } __DATA__ #+ # Routine to list all available pathways for current organism #- sub list_pathways { my $url = $q->url( -full=>1 ); my $page = $sbeams->getGifSpacer(900); my $t0 = new Benchmark; my $pathways = $keggmap->getKeggPathways( organism => $kegg_organism, source => 'db' ); $page .= "
\n"; for my $path ( @{$pathways} ) { my $desc = $q->escape( $path->{definition} ); $page .=<<" END"; END } $page .= "
$path->{definition} {path_id};path_def=$desc TARGET=_pathdetails>$path->{entry_id}

\n"; my $t1 = new Benchmark; $log->debug( parseTime( $t1, $t0, "seconds to fetch pathways for $kegg_organism" ) ); return $page; } #+ # Main functionality of page. Print image mapped, colored pathway #- sub pathway_details { my $params = shift; my $url = $q->self_url(); my @url = split( /\?/, $url ); $url = $url[0]; # Due to performance issues, we will print this out as we go along until # feature is better implemented. print $sbeams->getGifSpacer(1200) . "
\n"; # Some page-specific javascript/css. Allows loading info to get hidden, # draws box around legend print <<" END"; END # Turned off # my $addpath = ( $params->{path_def} =~ /pathway/i ) ? '' : 'Pathway '; print "
\n"; print " * Looking up genes in $params->{path_def} ($params->{path_id}) at KEGG
\n"; my $t0 = new Benchmark; $keggmap->setPathway( pathway => $params->{path_id} ); my $gene_list = $keggmap->getPathwayGenes(); # pathway => $params->{path_id} ); my $tot = scalar( @{$gene_list} ); my $t1 = new Benchmark; $log->debug( parseTime( $t1, $t0, "seconds to fetch genes for $params->{path_def}" ) ); print "* Found $tot genes in pathway, looking up data in Unipep
\n"; # Fetches expression info from db, returns arrays of bg/fg colors my ( $bg, $fg, $glyco_hits, $cnt ) = getExpressionValues($gene_list); my $t2 = new Benchmark; $log->debug( parseTime( $t2, $t1, "seconds to get glyco data for $params->{path_def}" ) ); print "* Found data for $cnt of $tot, plotting on KEGG map
\n"; # Send gene/color info to kegg for a map my $url = $keggmap->getColoredPathway( bg => $bg, fg => $fg, genes => $gene_list, ); my $t3 = new Benchmark; $log->debug( parseTime( $t3, $t2, "seconds to get color pathway" ) ); # Get info from pathway XML my $processed = $keggmap->parsePathwayXML(); unless ( $processed ) { # Fetching XML must have failed, simply print mapped image and exit $log->warn( "Missing XML for pathway $params->{path_id}" ); print <<" END";
END print ""; exit; } my $organism = keggOrg2Std( $kegg_organism ); my @links; my @text; my %entry2genes = %{$processed->{entry2genes}}; for my $en ( @{$processed->{entries}} ) { if ( $entry2genes{$en} ) { my @genes = map( /$kegg_organism:(.*)/, @{$entry2genes{$en}} ); # @genes = map { $glyco_hits->{$_}->[2] } @genes; my $gene = join( '%3B', @genes ); # @{$entry2genes{$en}} ); if ( $kegg_organism eq 'dme' ) { push @links, "peptideSearch.cgi?search_type=gene_name;search_term=$gene;action=Show_hits_form;autorun=1"; } else { push @links, "peptideSearch.cgi?search_type=GeneID;search_term=$gene;action=Show_hits_form"; } push @text, "Gene ID(s) " . join( ", ", @genes ); } else { log->warn( "No entry for $en" ); } } my $t4 = new Benchmark; $log->debug( parseTime( $t4, $t3, "seconds to get KGML" ) ); print <<" END"; END # Get hashref of pathway info (mostly for image URL); # Get image map for links to peptide glyco my $image_map = $keggmap->get_image_map( coords => $processed->{coords}, name => 'kegg_map', links => \@links, text => \@text, colors => $bg, img_src => $url ); my $legend = getExpressionLegend(); print "$legend
\n"; print "$image_map
\n"; my @coordinates = @{$processed->{coords}}; my $t5 = new Benchmark; $log->debug( parseTime( $t5, $t4, "seconds to get finish" ) ); # return $page; return ''; } sub getExpressionValues { my $gene_list = shift; my $cutoff = $glyco->get_current_prophet_cutoff(); my $gene_string = join( ", ", @{$gene_list} ); my $sql = ''; if ( $kegg_organism eq 'dme' ) { $sql =<<" END"; SELECT CASE WHEN ipi_accession_number LIKE 'CG%' THEN ipi_accession_number ELSE synonyms END AS accession, ID.ipi_data_id, 'nada', 'nada', 1 FROM $TBGP_IPI_DATA ID JOIN $TBGP_OBSERVED_TO_IPI ITI ON ITI.ipi_data_id = ID.ipi_data_id JOIN $TBGP_OBSERVED_PEPTIDE IP ON ITI.OBSERVED_peptide_id = IP.OBSERVED_peptide_id WHERE ( ipi_accession_number IN ($gene_string) OR synonyms IN ($gene_string) ) AND peptide_prophet_score >= $cutoff END } elsif ( 0 ) { $sql =<<" END"; SELECT entrez_id, ipi_accessions, 'nada', 'nada', 1 FROM dcampbel.dbo.ipi_xrefs IX JOIN $TBGP_IPI_DATA ID ON IX.ipi_accessions = ID.ipi_accession_number JOIN $TBGP_OBSERVED_TO_IPI ITI ON ITI.ipi_data_id = ID.ipi_data_id JOIN $TBGP_OBSERVED_PEPTIDE IP ON ITI.observed_peptide_id = IP.observed_peptide_id WHERE entrez_id IN ($gene_string) AND peptide_prophet_score >= $cutoff END } else { $sql =<<" END"; SELECT ipi_accession_number accession, ID.ipi_data_id, 'nada', 'nada', 1 FROM $TBGP_IPI_DATA ID JOIN $TBGP_OBSERVED_TO_IPI ITI ON ITI.ipi_data_id = ID.ipi_data_id JOIN $TBGP_OBSERVED_PEPTIDE IP ON ITI.OBSERVED_peptide_id = IP.OBSERVED_peptide_id WHERE ( ipi_accession_number IN ($gene_string) OR synonyms IN ($gene_string) ) AND peptide_prophet_score >= $cutoff END } $log->debug( "gonna run $sql" ); # ref to hash keyed by gene_id, points to arrayref of one or more peptides my %glyco_hits; my @rows = $sbeams->selectSeveralColumns( $sql ); for my $row ( @rows ) { # just need to look at the first one next if $glyco_hits{$row->[0]}; $glyco_hits{$row->[0]} = $row; # push @{$glyco_hits{$row->[0]}}, $row; } # define colors my $seen = 'yellow'; my $uniq = 'green'; my @bg; my @fg; my $gene_cnt = 0; for my $gene ( @{$gene_list} ) { # Has quotes if it is a string $gene =~ s/\'//g; # Assume it's neutral my $color = '#E0FFFF'; if ( $glyco_hits{$gene} ) { $gene_cnt++; $color = ( $glyco_hits{$gene}->[4] > 1 ) ? $seen : $uniq; } push @bg, $color; push @fg, 'black'; } return ( \@bg, \@fg, \%glyco_hits, $gene_cnt ); } sub getExpressionLegend { my $cell = $sbeams->getGifSpacer(20); # . "
\n"; my $table =<<" END";
$cell Peptides observed
$cell No peptides observed
$cell N/A
END return $table; } #+ # Read/process CGI parameters #- #+ # Extract wallclock seconds from time diff, append message, and return #- sub parseTime { my @args = @_; my $time = timestr(timediff( $args[0], $args[1] )); $time =~ /(\d+) wallclock.*/; return "$1 $args[2]"; } #+ # Translate kegg organism to Standard glyco #- sub keggOrg2Std { my $korg = shift; return '' unless $korg; my %k2s = ( hsa => 'Human', dme => 'Drosophila', sce => 'Yeast', mmu => 'Mouse' ); return $k2s{$korg} || ''; } __DATA__ sub error_redirect { my $msg = shift || ''; my $type = shift || 'Error'; $sbeams->set_page_message( msg => $msg, type => $type ); print $q->redirct( "treatmentList.cgi" ); exit; }