#!/usr/local/bin/perl -w ############################################################################### # Program : calcPeptideOrder.pl # Author : Eric Deutsch # $Id$ # # Description : This script reads a peptide list created by # createPipelineInput.pl and calculates relative peptide order # # SBEAMS is Copyright (C) 2000-2005 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### use strict; use Getopt::Long; use FindBin; use vars qw ( $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $TESTONLY $peptide_hash ); ############################################################################### # Read and validate command line args ############################################################################### my $VERSION = q[$Id$ ]; $PROG_NAME = $FindBin::Script; my $USAGE = <; #### Read in all the peptides for the first experiment my @columns; my $line; my $not_done = 1; while ($not_done) { #### Try to read in the next line; if ($line = ) { chomp($line); @columns = split(/\t/,$line); next if ($columns[2] < $P_threshold); if ($process_search_batch_id) { next unless ($columns[0] == $process_search_batch_id); } #### If it fails, we're done, but still process the last batch } else { $not_done = 0; @columns = (-998899,'xx',-1,'zz'); } #### If the this search_batch_id is not, known, learn from first record unless ($this_search_batch_id) { $this_search_batch_id = $columns[0]; print "Processing search_batch_id=$this_search_batch_id\n"; $n_experiments++; } #### If the search_batch_id of this peptide is not the same as the last #### then finish processing the last peptides of previous search_batch_id if ($this_search_batch_id != $columns[0]) { #### Process this search_batch foreach my $msrun (keys(%peptides)) { print " Processing msrun $msrun..."; my @scan_numbers = sort numerically keys(%{$peptides{$msrun}}); my $n_spectrum_ids = scalar(@scan_numbers); print " Found $n_spectrum_ids...\n"; foreach my $scan_number (@scan_numbers) { my $sequence = $peptides{$msrun}->{$scan_number}; foreach my $other_scan_number (@scan_numbers) { my $other_sequence = $peptides{$msrun}->{$other_scan_number}; my $offset = $other_peptides{$other_sequence}; unless ($offset) { $other_peptides{$other_sequence} = $offset_counter; $offset = $offset_counter; my @tmp = ( 0,0 ); $matrix{$sequence}->[$offset] = \@tmp; $offset_counter++; } my $difference = abs($scan_number - $other_scan_number); if ($scan_number < $other_scan_number) { $matrix{$sequence}->[$offset]->[0]++; $matrix{$sequence}->[$offset]->[2] += $difference; #$matrix{$sequence}->[0] += $difference; # produces non-linear $matrix{$sequence}->[0] += 1; } elsif ($scan_number > $other_scan_number) { $matrix{$sequence}->[$offset]->[1]++; $matrix{$sequence}->[$offset]->[3] += $difference; #$matrix{$sequence}->[0] += $difference; # produces non-linear $matrix{$sequence}->[0] += 1; } } } } #### Prepare for next search_batch_id $this_search_batch_id = $columns[0]; %peptides = (); unless ($this_search_batch_id == -998899) { print "Processing search_batch_id=$this_search_batch_id\n"; $n_experiments++; } } #### Put this peptide entry to the arrays if ($not_done) { my $sequence = $columns[1]; my $spectrum = $columns[4]; my ($msrun,$scan_number); if ($spectrum =~ /(.+)\.(\d+)\.(\d+)\.\d/) { $msrun = $1; $scan_number = $2; } else { print "ERROR: Unable to parse $spectrum\n"; } $peptides{$msrun}->{$scan_number} = $sequence; } } close(INFILE); print "Done reading.\n"; print "Total experiments: $n_experiments\n"; print "Total peptides: $offset_counter\n"; #### Calculate relative order for peptides print "Caculating peptide order...\n"; my %peptide_scores; my @peptides; foreach my $peptide (keys(%matrix)) { my $before_sum = 0; my $after_sum = 0; my $total = 0; my $confused = 0; foreach my $other_peptide (keys(%other_peptides)) { my $offset = $other_peptides{$other_peptide}; my $before = $matrix{$peptide}->[$offset]->[2] || 0; my $after = $matrix{$peptide}->[$offset]->[3] || 0; $before_sum += $before; $after_sum += $after; if ($before && $after) { $confused++; } } $total = $matrix{$peptide}->[0]; print "$before_sum $after_sum $total $confused\n"; my $score = ($after_sum-$before_sum)/$total; $peptide_scores{$peptide} = $score; my %tmp = ( sequence => $peptide, score => $score, confusion => $confused, ); push(@peptides,\%tmp); } my @sorted_peptides = sort byScore @peptides; print "Writing file...\n"; open(OUTFILE,">out.peporder"); foreach my $peptide (@sorted_peptides) { print OUTFILE "$peptide->{sequence}\t$peptide->{score}\t". "$peptide->{confusion}\n"; } close(OUTFILE); print "Freeing memory...\n"; return(1); } ############################################################################### # by_Probability ############################################################################### sub by_Probability { return $b->[2] <=> $a->[2]; } # end by_Probability ############################################################################### # numerically ############################################################################### sub numerically { return $a <=> $b; } # end numerically ############################################################################### # byScore ############################################################################### sub byScore { return $a->{score} <=> $b->{score}; } # end byScore ############################################################################### # round ############################################################################### sub round { my $value = shift; my $digits = shift; return sprintf("%.${digits}f",$value); } # end round