#!/usr/local/bin/perl -w
###############################################################################
# Program treatment.cgi
# $Id: $
#
# Description : Form and processing logic for applying laboratory
# manipulation or treatment to a set of samples.
#
# SBEAMS is Copyright (C) 2000-2007 Institute for Systems Biology
# This program is governed by the terms of the GNU General Public License (GPL)
# version 2 as published by the Free Software Foundation. It is provided
# WITHOUT ANY WARRANTY. See the full description of GPL terms in the
# LICENSE file distributed with this software.
#
###############################################################################
use strict;
use lib qw (../../lib/perl);
use File::Basename;
use Benchmark;
use SBEAMS::Connection qw($q $log);
use SBEAMS::Connection::Settings;
use SBEAMS::BioLink::KeggMaps;
use SBEAMS::Glycopeptide;
use SBEAMS::Glycopeptide::Tables;
## Globals ##
my $sbeams = new SBEAMS::Connection;
my $glyco = new SBEAMS::Glycopeptide;
$glyco->setSBEAMS($sbeams);
my $keggmap = SBEAMS::BioLink::KeggMaps->new();
my $kegg_organism;
my $params = {};
my $base_url = $q->url( -absolute => 1 );
my $program = basename( $0 );
# Don't buffer output
$|++;
{ # Main
# Authenticate user.
my $current_username = $sbeams->Authenticate( allow_anonymous_access => 1) || die "Authentication failed";
# Process cgi parameters
$params = process_params();
my $spc = $sbeams->getGifSpacer(800);
my $content = "$spc \n";
$kegg_organism = $glyco->getKeggOrganism( );
# for my $p ( keys( %$params ) ) { $log->debug( "hey, $p => $params->{$p}"); }
if ( $sbeams->output_mode() !~ /cytoscape/ ) {
my $sql = '';
if ( $params->{apply_action} eq 'kegg_pathway' ) {
# Check for params
if ( $params->{path_id} ) {
$sql = get_pathway_sql();
}
# create SQL
} elsif ( $params->{apply_action} eq 'gene_list' ) {
# Check for params
if ( $params->{accession} ) {
# create SQL
$sql = get_genelist_sql( gene_list => $params->{accession} );
}
} else {
$content .= $sbeams->makeErrorText( "Missing or erroneous action" );
}
# Cache sql, pass key to link below
if ( !$sql ) {
$content .= $sbeams->makeErrorText( "Missing required information" );
} else {
my $key = $sbeams->getRandomString( num_chars => 20 );
# $sbeams->getSessionCookie();
$sbeams->setSessionAttribute( key => $key, value => $sql );
my $sp = ' ' x 2;
my @buttons = $sbeams->getFormButtons( types => [ 'submit' ],
name => 'Launch Cytoscape',
value => 'Submit' );
my $info_text = get_info_text();
# A few fly-specific options, currently unimplemented for other orgs.
my $primary_identifier = '';
my $interactions_option =<<" END";
Show known interactions as network edges:
$sp
END
my $fb_interaction_network = '';
if ( $params->{path_id} && $kegg_organism eq 'dme' ) {
$primary_identifier =<<" END";
Primary identifier:
$sp Locus tag (CGxxxxx)
$sp Flybase ID (FBgnxxxxxxx)
END
$interactions_option =<<" END";
Show known interactions as network edges (Locus tag only):
$sp
END
$fb_interaction_network =<<" END";
If desired, launch the
Interaction Network from Flybase. (works best with FBgn accessions)
END
}
$content .=<<" END";
Click links below to launch a cytoscape network to explore the proteins you've selected
First, launch the Gaggle Boss (facilitates communication between 'geese').
This cytoscape view depicts the relationship between the chosen proteins and any
peptides that they may have. Proteins are shown as blue oval-shaped nodes. Peptides
are shown as rectangular nodes, and their color is determined by the 'type' of peptide
they are:
- Green for high confidence (dCn) phosphopeptides
- Yellow for lower dCn phosphopeptides
- Red for non-phosphopeptides
A line (edge) between a protein and a peptide indicates that that peptide is contained
within the protein. You can choose whether to display either type of peptide, and
which protein identifier to use. To use Flybase interactions goose, it is better to
use FBgn accessions. Peptides are identified by their sequences, where & and * are
observed phosphorylation sites.
\n";
my $t1 = new Benchmark;
$log->debug( parseTime( $t1, $t0, "seconds to fetch pathways for $kegg_organism" ) );
return $page;
}
#+
# Main functionality of page. Print image mapped, colored pathway
#-
sub pathway_details {
my $params = shift;
my $url = $q->self_url();
my @url = split( /\?/, $url );
$url = $url[0];
# Due to performance issues, we will print this out as we go along until
# feature is better implemented.
print $sbeams->getGifSpacer(1200) . " \n";
# Some page-specific javascript/css. Allows loading info to get hidden,
# draws box around legend
print <<" END";
END
# Turned off
# my $addpath = ( $params->{path_def} =~ /pathway/i ) ? '' : 'Pathway ';
print "
\n";
print " * Looking up genes in $params->{path_def} ($params->{path_id}) at KEGG \n";
my $t0 = new Benchmark;
$keggmap->setPathway( pathway => $params->{path_id} );
my $gene_list = $keggmap->getPathwayGenes(); # pathway => $params->{path_id} );
my $tot = scalar( @{$gene_list} );
my $t1 = new Benchmark;
$log->debug( parseTime( $t1, $t0, "seconds to fetch genes for $params->{path_def}" ) );
print "* Found $tot genes in pathway, looking up data in Unipep \n";
# Fetches expression info from db, returns arrays of bg/fg colors
my ( $bg, $fg, $glyco_hits, $cnt ) = getExpressionValues($gene_list);
my $t2 = new Benchmark;
$log->debug( parseTime( $t2, $t1, "seconds to get glyco data for $params->{path_def}" ) );
print "* Found data for $cnt of $tot, plotting on KEGG map \n";
# Send gene/color info to kegg for a map
my $url = $keggmap->getColoredPathway( bg => $bg,
fg => $fg,
genes => $gene_list,
);
my $t3 = new Benchmark;
$log->debug( parseTime( $t3, $t2, "seconds to get color pathway" ) );
# Get info from pathway XML
my $processed = $keggmap->parsePathwayXML();
unless ( $processed ) {
# Fetching XML must have failed, simply print mapped image and exit
$log->warn( "Missing XML for pathway $params->{path_id}" );
print <<" END";
END
print "";
exit;
}
my $organism = keggOrg2Std( $kegg_organism );
my @links;
my @text;
my %entry2genes = %{$processed->{entry2genes}};
for my $en ( @{$processed->{entries}} ) {
if ( $entry2genes{$en} ) {
my @genes = map( /$kegg_organism:(.*)/, @{$entry2genes{$en}} );
# @genes = map { $glyco_hits->{$_}->[2] } @genes;
my $gene = join( '%3B', @genes ); # @{$entry2genes{$en}} );
if ( $kegg_organism eq 'dme' ) {
push @links, "peptideSearch.cgi?search_type=gene_name;search_term=$gene;action=Show_hits_form;autorun=1";
} else {
push @links, "peptideSearch.cgi?search_type=GeneID;search_term=$gene;action=Show_hits_form";
}
push @text, "Gene ID(s) " . join( ", ", @genes );
} else {
log->warn( "No entry for $en" );
}
}
my $t4 = new Benchmark;
$log->debug( parseTime( $t4, $t3, "seconds to get KGML" ) );
print <<" END";
END
# Get hashref of pathway info (mostly for image URL);
# Get image map for links to peptide glyco
my $image_map = $keggmap->get_image_map( coords => $processed->{coords},
name => 'kegg_map',
links => \@links,
text => \@text,
colors => $bg,
img_src => $url );
my $legend = getExpressionLegend();
print "$legend \n";
print "$image_map \n";
my @coordinates = @{$processed->{coords}};
my $t5 = new Benchmark;
$log->debug( parseTime( $t5, $t4, "seconds to get finish" ) );
# return $page;
return '';
}
sub getExpressionValues {
my $gene_list = shift;
my $cutoff = $glyco->get_current_prophet_cutoff();
my $gene_string = join( ", ", @{$gene_list} );
my $sql = '';
if ( $kegg_organism eq 'dme' ) {
$sql =<<" END";
SELECT CASE WHEN ipi_accession_number LIKE 'CG%' THEN ipi_accession_number
ELSE synonyms END AS accession, ID.ipi_data_id, 'nada', 'nada', 1
FROM $TBGP_IPI_DATA ID
JOIN $TBGP_OBSERVED_TO_IPI ITI
ON ITI.ipi_data_id = ID.ipi_data_id
JOIN $TBGP_OBSERVED_PEPTIDE IP
ON ITI.OBSERVED_peptide_id = IP.OBSERVED_peptide_id
WHERE ( ipi_accession_number IN ($gene_string)
OR synonyms IN ($gene_string) )
AND peptide_prophet_score >= $cutoff
END
} elsif ( 0 ) {
$sql =<<" END";
SELECT entrez_id, ipi_accessions, 'nada', 'nada', 1
FROM dcampbel.dbo.ipi_xrefs IX JOIN $TBGP_IPI_DATA ID
ON IX.ipi_accessions = ID.ipi_accession_number
JOIN $TBGP_OBSERVED_TO_IPI ITI
ON ITI.ipi_data_id = ID.ipi_data_id
JOIN $TBGP_OBSERVED_PEPTIDE IP
ON ITI.observed_peptide_id = IP.observed_peptide_id
WHERE entrez_id IN ($gene_string)
AND peptide_prophet_score >= $cutoff
END
} else {
$sql =<<" END";
SELECT ipi_accession_number accession, ID.ipi_data_id, 'nada', 'nada', 1
FROM $TBGP_IPI_DATA ID
JOIN $TBGP_OBSERVED_TO_IPI ITI
ON ITI.ipi_data_id = ID.ipi_data_id
JOIN $TBGP_OBSERVED_PEPTIDE IP
ON ITI.OBSERVED_peptide_id = IP.OBSERVED_peptide_id
WHERE ( ipi_accession_number IN ($gene_string)
OR synonyms IN ($gene_string) )
AND peptide_prophet_score >= $cutoff
END
}
$log->debug( "gonna run $sql" );
# ref to hash keyed by gene_id, points to arrayref of one or more peptides
my %glyco_hits;
my @rows = $sbeams->selectSeveralColumns( $sql );
for my $row ( @rows ) {
# just need to look at the first one
next if $glyco_hits{$row->[0]};
$glyco_hits{$row->[0]} = $row;
# push @{$glyco_hits{$row->[0]}}, $row;
}
# define colors
my $seen = 'yellow';
my $uniq = 'green';
my @bg;
my @fg;
my $gene_cnt = 0;
for my $gene ( @{$gene_list} ) {
# Has quotes if it is a string
$gene =~ s/\'//g;
# Assume it's neutral
my $color = '#E0FFFF';
if ( $glyco_hits{$gene} ) {
$gene_cnt++;
$color = ( $glyco_hits{$gene}->[4] > 1 ) ? $seen : $uniq;
}
push @bg, $color;
push @fg, 'black';
}
return ( \@bg, \@fg, \%glyco_hits, $gene_cnt );
}
sub getExpressionLegend {
my $cell = $sbeams->getGifSpacer(20); # . " \n";
my $table =<<" END";
$cell
Peptides observed
$cell
No peptides observed
$cell
N/A
END
return $table;
}
#+
# Read/process CGI parameters
#-
#+
# Extract wallclock seconds from time diff, append message, and return
#-
sub parseTime {
my @args = @_;
my $time = timestr(timediff( $args[0], $args[1] ));
$time =~ /(\d+) wallclock.*/;
return "$1 $args[2]";
}
#+
# Translate kegg organism to Standard glyco
#-
sub keggOrg2Std {
my $korg = shift;
return '' unless $korg;
my %k2s = ( hsa => 'Human',
dme => 'Drosophila',
sce => 'Yeast',
mmu => 'Mouse'
);
return $k2s{$korg} || '';
}
__DATA__
sub error_redirect {
my $msg = shift || '';
my $type = shift || 'Error';
$sbeams->set_page_message( msg => $msg, type => $type );
print $q->redirct( "treatmentList.cgi" );
exit;
}