#!/tools32/bin/perl ############################################################################### # Program showPathways # $Id: $ # # Description : Form and processing logic for showing protein expression # data on a KEGG map. # # SBEAMS is Copyright (C) 2000-2006 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### BEGIN { push @INC, qw( /net/db/src/SSRCalc/ssrcalc . /tools32/lib/perl5/5.8.0/i386-linux-thread-multi /tools32/lib/perl5/5.8.0 /tools32/lib/perl5/site_perl/5.8.0/i386-linux-thread-multi /tools32/lib/perl5/site_perl/5.8.0 /tools32/lib/perl5/site_perl ); } use strict; use lib qw (../../lib/perl); use File::Basename; use Benchmark; use SBEAMS::Connection qw($q $log); use SBEAMS::Connection::Settings; use SBEAMS::BioLink::KeggMaps; use SBEAMS::Glycopeptide; #use SBEAMS::PeptideAtlas::Settings; use SBEAMS::Glycopeptide::Tables; ## Globals ## my $sbeams = new SBEAMS::Connection; my $glyco = new SBEAMS::Glycopeptide; $glyco->setSBEAMS($sbeams); my $program = basename( $0 ); my $keggmap = SBEAMS::BioLink::KeggMaps->new(); my $kegg_organism; # Species my $sprefix; my $verbose = 1; # Don't buffer output $|++; { # Main $log->debug( localtime(time()) ); my $t0 = new Benchmark; # Authenticate user. my $current_username = $sbeams->Authenticate(allow_anonymous_access=>1) || die "Authentication failed"; # Process cgi parameters my $params = process_params(); if ( $params->{unipep_build_id} ) { my $build_id = $glyco->get_current_build( build_id => $params->{unipep_build_id} ); if ( $build_id != $params->{unipep_build_id} ) { $sbeams->set_page_message( type => 'Error', msg => 'You must log in to access specified build' ); } } $kegg_organism = $glyco->getKeggOrganism( ); $sprefix = ( $kegg_organism eq 'dme' ) ? 'Dmel_' : ''; my $content = ''; $params->{apply_action} ||= 'list_pathways'; # Decision block, what type of page are we going to display? if ( $params->{apply_action} eq 'list_pathways' ) { $content = list_pathways( $params ); # Print cgi headers $glyco->printPageHeader(); # $sbeams->printUserContext(); print $content; $glyco->printPageFooter( close_tables=>'NO'); } elsif ( $params->{apply_action} eq 'pathway_details' ) { # Until caching is available, will print out as we go. $glyco->printPageHeader( onload => 'hideTimerInfo()' ); $content = pathway_details( $params ); print $content; $glyco->printPageFooter( close_tables=>'NO'); } else { $content = list_pathways( $params ); } my $t1 = new Benchmark; $log->debug( localtime(time()) ); $log->debug( "$params->{apply_action} took " . timestr(timediff($t1, $t0)) ); } # end Main #+ # Routine to list all available pathways for current organism #- sub list_pathways { my $params = shift; my $url = $q->url( -full=>1 ); my $cyto_url = $url; $cyto_url =~ s/showPathways/getCytoscapeWebstart/; my $page = $sbeams->getGifSpacer(900); my $cyto_png = "$HTML_BASE_DIR/images/cyto_tiny.png"; my $t0 = new Benchmark; my %genes; if ( $params->{ga} ) { # list of accessions passed in my @genes = split ",", $params->{ga}; $genes{genes} = \@genes; } $log->debug( "Fetching pathways with $kegg_organism\n" ); my $pathways = $keggmap->getKeggPathways( organism => $kegg_organism, source => 'db', %genes ); $page .= "
\n"; if ( !scalar( @{$pathways} ) ) { $page .= " '; } for my $path ( @{$pathways} ) { my $desc = $q->escape( $path->{definition} ); my $pathway_id = $path->{entry_id}; # $pathway_id =~ s/path://; $page .=<<" END"; END } $page .= "
" . $sbeams->makeInactiveText( "No KEGG pathways found for the selected protein(s)" ) . '
$path->{definition} {entry_id};path_def=$desc TARGET=_pathdetails>$path->{entry_id}

\n"; my $t1 = new Benchmark; $log->debug( parseTime( $t1, $t0, "seconds to fetch pathways for $kegg_organism" ) ); return $page; } #+ # Main functionality of page. Print image mapped, colored pathway #- sub pathway_details { my $params = shift; my $url = $q->self_url(); my @url = split( /\?/, $url ); $url = $url[0]; # Due to performance issues, we will print this out as we go along until # feature is better implemented. print $sbeams->getGifSpacer(1200) . "
\n"; # Some page-specific javascript/css. Allows loading info to get hidden, # draws box around legend print <<" END"; END # Turned off # my $addpath = ( $params->{path_def} =~ /pathway/i ) ? '' : 'Pathway '; print <<" END";

END print " * Looking up genes in $params->{path_def} ($params->{path_id}) at KEGG
\n"; my $t0 = new Benchmark; $keggmap->setPathway( pathway => $params->{path_id} ); my $gene_list = $keggmap->getPathwayGenes(); # pathway => $params->{path_id} ); my $tot = scalar( @{$gene_list} ); my $t1 = new Benchmark; $log->debug( parseTime( $t1, $t0, "seconds to fetch genes for $params->{path_def}" ) ); if ( $tot ) { print "* Found $tot genes in pathway, looking up peptide data
\n"; } else { print "* Found $tot genes in pathway - quitting \n"; return; } # Fetches expression info from db, returns arrays of bg/fg colors my ( $bg, $fg, $glyco_hits, $cnt ) = getExpressionValues($gene_list); my $t2 = new Benchmark; $log->debug( parseTime( $t2, $t1, "seconds to get glyco data for $params->{path_def}" ) ); print "* Found data for $cnt of $tot, plotting on KEGG map
\n"; my $table = SBEAMS::Connection::DataTable->new( BORDER => 0 ); $table->addRow( [ ' ', 'Protein', 'Peptides Observed' ] ); $table->setHeaderAttr( BOLD => 1 ); my $acc_string =''; my $sep = ''; for my $gene ( @{$gene_list} ) { my $cnt = 0; if ( $glyco_hits->{$gene} ) { $cnt = scalar( @{$glyco_hits->{$gene}} ); } my $chk = ( $cnt ) ? 'checked' : ''; my $chk_box = ""; $table->addRow( [ $chk_box, $gene, $cnt ] ); $acc_string .= $sep . $gene; $sep ||= ','; } $table->alternateColors( FIRSTROW => 2, PERIOD => 1, BGCOLOR => '#DDDDDD', DEF_BGCOLOR => '#FFFFFF' ); $table->setColAttr( ALIGN => 'RIGHT', COLS => [3], ROWS => [2..$table->getRowNum()] ); my $cytolink = 'Open network with protein list'; print <<" END";
$cytolink


$table


END # Send gene/color info to kegg for a map my $url = $keggmap->getColoredPathway( bg => $bg, fg => $fg, genes => $gene_list, ); my $t3 = new Benchmark; $log->debug( parseTime( $t3, $t2, "seconds to get color pathway" ) ); # Get info from pathway XML my $processed = $keggmap->parsePathwayXML(); unless ( $processed ) { # Fetching XML must have failed, simply print mapped image and exit $log->warn( "Missing XML for pathway $params->{path_id}" ); print <<" END";
END print ""; exit; } my $organism = keggOrg2Std( $kegg_organism ); my @links; my @text; my %entry2genes = %{$processed->{entry2genes}}; for my $en ( @{$processed->{entries}} ) { if ( $entry2genes{$en} ) { my @genes = map( /$kegg_organism:(.*)/, @{$entry2genes{$en}} ); # @genes = map { $glyco_hits->{$_}->[2] } @genes; if ( $kegg_organism eq 'dme' ) { my $gene = ''; my $delim = ''; for my $gn ( @genes ) { # @{$entry2genes{$en}} ); $gn =~ s/$sprefix//g; $gn .= '%25'; $gene .= $delim . $gn; $delim = '%2C'; } push @links, "peptideSearch.cgi?search_type=gene_name;search_term=$gene;action=Show_hits_form;autorun=1;organism=$kegg_organism"; } elsif ( grep /$kegg_organism/, ( qw( hsa mmu ) ) ) { my $gene = join( '%2C', @genes ); # @{$entry2genes{$en}} ); push @links, "peptideSearch.cgi?search_type=GeneID;search_term=$gene;action=Show_hits_form;organism=$kegg_organism"; } else { my $gene = join( '%2C', @genes ); # @{$entry2genes{$en}} ); push @links, "peptideSearch.cgi?search_type=accession;search_term=$gene;action=Show_hits_form;autorun=1;organism=$kegg_organism"; } push @text, "Gene ID(s) " . join( ", ", @genes ); } else { log->warn( "No entry for $en" ); } } my $t4 = new Benchmark; $log->debug( parseTime( $t4, $t3, "seconds to get KGML" ) ); print <<" END"; END # Get hashref of pathway info (mostly for image URL); # Get image map for links to peptide glyco my $image_map = $keggmap->get_image_map( coords => $processed->{coords}, name => 'kegg_map', links => \@links, text => \@text, colors => $bg, img_src => $url ); my $legend = getExpressionLegend(); print "$legend
\n"; print "$image_map
\n"; my @coordinates = @{$processed->{coords}}; my $t5 = new Benchmark; $log->debug( parseTime( $t5, $t4, "seconds to get finish" ) ); # return $page; return ''; } sub getExpressionValues { my $gene_list = shift; my $cutoff = $glyco->get_current_prophet_cutoff(); my $sql = ''; my $gene_string = join( ", ", @{$gene_list} ); if ( $kegg_organism eq 'dme' ) { $gene_string = ''; my $delim = ' '; for my $gene ( @{$gene_list} ) { my $tmp_gene = $gene; $tmp_gene =~ s/Dmel_//; $tmp_gene =~ s/\'//g; $gene_string = $gene_string . $delim . " synonyms like '$tmp_gene%' "; $delim = "\n OR "; } $sql =<<" END"; SELECT CASE WHEN ipi_accession_number LIKE 'CG%' THEN ipi_accession_number ELSE synonyms END AS accession, ID.ipi_data_id, 'nada', 'nada', 1 FROM $TBGP_IPI_DATA ID JOIN $TBGP_OBSERVED_TO_IPI ITI ON ITI.ipi_data_id = ID.ipi_data_id JOIN $TBGP_OBSERVED_PEPTIDE IP ON ITI.OBSERVED_peptide_id = IP.OBSERVED_peptide_id WHERE ( $gene_string ) AND peptide_prophet_score >= $cutoff END } elsif ( $kegg_organism eq 'hsa' || $kegg_organism eq 'mmu' ) { $sql =<<" END"; SELECT entrez_id, ipi_accessions, 'nada', 'nada', 1 FROM dcampbel.dbo.ipi_xrefs IX JOIN $TBGP_IPI_DATA ID ON IX.ipi_accessions = ID.ipi_accession_number JOIN $TBGP_OBSERVED_TO_IPI ITI ON ITI.ipi_data_id = ID.ipi_data_id JOIN $TBGP_OBSERVED_PEPTIDE IP ON ITI.observed_peptide_id = IP.observed_peptide_id WHERE entrez_id IN ($gene_string) AND peptide_prophet_score >= $cutoff END } else { $sql =<<" END"; SELECT ipi_accession_number, synonyms, 'nada', 'nada', 1 FROM $TBGP_IPI_DATA ID JOIN $TBGP_OBSERVED_TO_IPI ITI ON ITI.ipi_data_id = ID.ipi_data_id JOIN $TBGP_OBSERVED_PEPTIDE IP ON ITI.observed_peptide_id = IP.observed_peptide_id WHERE ipi_accession_number IN ($gene_string) AND peptide_prophet_score >= $cutoff END } $log->debug( <<" END" ); ORG: $kegg_organism STR: $gene_string SQL: $sql END # ref to hash keyed by gene_id, points to arrayref of one or more peptides my %glyco_hits; my @rows = $sbeams->selectSeveralColumns( $sql ); for my $row ( @rows ) { # just need to look at the first one # next if $glyco_hits{$row->[0]}; # $glyco_hits{$row->[0]} = $row; my $acc = $row->[0]; $acc =~ s/-P.$//; $acc = $sprefix . $acc; push @{$glyco_hits{$acc}}, $row; } # define colors my $seen = 'yellow'; my $uniq = 'green'; my @bg; my @fg; my $gene_cnt = 0; for my $gene ( @{$gene_list} ) { # Has quotes if it is a string $gene =~ s/\'//g; # Assume it's neutral my $color = '#E0FFFF'; if ( $glyco_hits{$gene} ) { $gene_cnt++; $color = ( $glyco_hits{$gene}->[4] > 1 ) ? $seen : $uniq; } push @bg, $color; push @fg, 'black'; } return ( \@bg, \@fg, \%glyco_hits, $gene_cnt ); } sub getExpressionLegend { my $cell = $sbeams->getGifSpacer(20); # . "
\n"; my $table =<<" END";
$cell Unique Observed Peptide(s)
$cell Observed Peptide(s)
$cell No Peptides Observed
$cell N/A
END return $table; } #+ # Read/process CGI parameters #- sub process_params { my $params = { null => 'filler' }; # Standard SBEAMS processing $sbeams->parse_input_parameters( parameters_ref => $params, q => $q ); #for ( keys( %$params ) ){ print "$_ = $params->{$_}
" } # Process "state" parameters $sbeams->processStandardParameters( parameters_ref => $params ); return $params; } #+ # Extract wallclock seconds from time diff, append message, and return #- sub parseTime { my @args = @_; my $time = timestr(timediff( $args[0], $args[1] )); $time =~ /(\d+) wallclock.*/; return "$1 $args[2]"; } #+ # Translate kegg organism to Standard glyco #- sub keggOrg2Std { my $korg = shift; return '' unless $korg; my %k2s = ( hsa => 'Human', dme => 'Drosophila', 'C elegans' => 'Yeast', cel => 'C elegans', mmu => 'Mouse' ); return $k2s{$korg} || ''; } __DATA__ sub error_redirect { my $msg = shift || ''; my $type = shift || 'Error'; $sbeams->set_page_message( msg => $msg, type => $type ); print $q->redirct( "treatmentList.cgi" ); exit; }