#!/usr/local/bin/perl -w
use CGI qw/:standard/;
use CGI::Pretty;
$CGI::Pretty::INDENT = "";
use FileManager;
use Batch;
use BioC;
use Site;
use strict;
use XML::LibXML;
use FindBin;
use lib "$FindBin::Bin/../../../lib/perl";
use SBEAMS::Connection qw($log $q);
use SBEAMS::Connection::Settings;
use SBEAMS::Connection::DataTable;
use SBEAMS::Microarray;
use SBEAMS::Microarray::Settings;
use SBEAMS::Microarray::Tables;
use SBEAMS::Microarray::Affy_Analysis;
use vars qw ($sbeams $affy_o $sbeamsMOD $cgi $current_username $USER_ID
$PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE
$TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME
@MENU_OPTIONS %CONVERSION_H);
$sbeams = new SBEAMS::Connection;
$affy_o = new SBEAMS::Microarray::Affy_Analysis;
$affy_o->setSBEAMS($sbeams);
$sbeamsMOD = new SBEAMS::Microarray;
$sbeamsMOD->setSBEAMS($sbeams);
#### Do the SBEAMS authentication and exit if a username is not returned
exit
unless (
$current_username = $sbeams->Authenticate(
permitted_work_groups_ref =>
[ 'Microarray_user', 'Microarray_admin', 'Admin' ],
#connect_read_only=>1,
#allow_anonymous_access=>1,
)
);
# Create the global CGI instance
#our $cgi = new CGI;
#using a single cgi in instance created during authentication
$cgi = $q;
#### Read in the default input parameters
my %parameters;
my $n_params_found = $sbeams->parse_input_parameters(
q => $cgi,
parameters_ref => \%parameters
);
#### Process generic "state" parameters before we start
$sbeams->processStandardParameters( parameters_ref => \%parameters );
# Create the global FileManager instance
our $fm = new FileManager;
# Handle initializing the FileManager session
if ($cgi->param('files_token') ) {
my $token = $cgi->param('files_token') ;
if ($fm->init_with_token($BC_UPLOAD_DIR, $token)) {
error('Upload session has no files') if !($fm->filenames > 0);
} else {
error("Couldn't load session from token: ". $cgi->param('files_token')) if
$cgi->param('files_token');
}
}else{
error("Cannot start session no param 'files_token'");
}
if (! $cgi->param('step')) {
$sbeamsMOD->printPageHeader();
step1();
$sbeamsMOD->printPageFooter();
} elsif ($cgi->param('step') == 1) {
$sbeamsMOD->printPageHeader();
step2();
$sbeamsMOD->printPageFooter();
} elsif ($cgi->param('step') == 2) {
step3();
} else {
error('There was an error in form input.');
}
#### Subroutine: step1
# Make step 1 form
####
sub step1 {
#print $cgi->header;
#site_header('Affymetrix Expression Analysis: affy');
print h1('Affymetrix Expression Analysis: affy'),
h2('Step 1:'),
start_form,
hidden('step', 1),
p('Enter upload manager token:', textfield('token', $fm->token, 30)),
p('Enter number of CEL files:', textfield('numfiles', '', 10)),
submit("Next Step"),
end_form,
p(a({-href=>"upload.cgi"}, "Go to Upload Manager"));
print <<'END';
Quick Help
affy performs low-level analysis of Affymetrix data and calculates
expression summaries for each of the probe sets. It processes the
data in three main stages: background correction, normalization,
and summarization. There are a number of different methods that
can be used for each of those stages. affy can also be configured
to handle PM-MM correction in several different ways.
To get started with affy, first upload all of your CEL files with
the upload manager. Make sure to note your upload manager token.
Next you must specify the number of CEL files to process. Generally,
you should process all the CEL files for an experiment at once, to
take maximum advantage of cross-chip normalization.
END
site_footer();
}
#### Subroutine: step2
# Handle step 1, make step 2 form
####
sub step2 {
my $numfiles = $cgi->param('numfiles');
my %labels;
if (grep(/[^0-9]|^$/, $numfiles) || !($numfiles > 0)) {
error('Please enter an integer greater than 0.');
}
site_header('Affymetrix Expression Analysis: affy');
my $normalization_token = $cgi->param('normalization_token');
print h1('Affymetrix Expression Analysis: affy'),
h2('Step 2:'),
start_form,
hidden('files_token', $fm->token),
hidden(-name=>'step', -default=>2, -override=>1),
hidden(-name=>'normalization_token', -value=>$normalization_token),
hidden(-name=>'analysis_id', -value=>$cgi->param('analysis_id')),
hidden('numfiles', $cgi->param('numfiles')),
p("Select files for expression analysis:");
print '',
Tr(th('#'), th('File'), th('Sample Name'));
my $file_info = parse_sample_groups_file( data_type => 'file_info',
folder => $normalization_token );
my @file_names = @{$file_info->{file_names}};
unless ( scalar @file_names == $numfiles ) {
$log->error( "Mismatch! $numfiles reported, but found " . scalar( @file_names) );
}
my $i = 0;
for my $sample_name ( @file_names ) {
# Cache original name
my $file_name = $sample_name;
# Remove the .CEL suffix to make a nice sample name
$sample_name =~ s/\.CEL$//;
my $default_names = $cgi->param( 'default_sample_names' ) || 'file_root';
if ( $default_names eq 'sample_tag' ) {
my $stagSQL =<<" END";
SELECT sample_tag
FROM $TBMA_AFFY_ARRAY_SAMPLE AAS JOIN $TBMA_AFFY_ARRAY AA
ON AAS.affy_array_sample_id = AA.affy_array_sample_id
WHERE file_root = '$sample_name'
END
my ( $tag ) = $sbeams->selectOneColumn( $stagSQL );
$sample_name = $tag if $tag;
}
print Tr(td($i+1),
td({-bgcolor=>"#CCCCCC"},
$cgi->textfield(-name=>"file$i",
-default=>$file_name,
-override=>1,
-size=>40,
-onFocus=>"this.blur()" )),
td(textfield('sampleNames', $sample_name, 40)));
$i++;
}
my $email = $sbeams->getEmailAddress();
print '
',
p("Choose the processing method:"),
p($cgi->radio_group('process', ['RMA','GCRMA'], 'RMA','true')),
"---- or ----",
p($cgi->radio_group('process', ['Custom'], '-')),
'',
Tr(td({-style=>"text-align: right"}, "Background Correction:"),
td(popup_menu('custom', ['none', 'rma', 'rma2', 'mas' ], 'rma'))),
Tr(td({-style=>"text-align: right"}, "Normalization:"),
td(popup_menu('custom', ['quantiles', 'quantiles.robust', 'loess', 'contrasts', 'constant', 'invariantset', 'qspline', 'vsn'], 'quantiles'))),
Tr(td({-style=>"text-align: right"}, "PM Correction:"),
td(popup_menu('custom', ['mas', 'pmonly', 'subtractmm'], 'pmonly'))),
Tr(td({-style=>"text-align: right"}, "Summarization:"),
td(popup_menu('custom', ['avgdiff', 'liwong', 'mas', 'medianpolish', 'playerout', 'rlm'], 'medianpolish'))),
'
',
p($cgi->checkbox('log2trans','checked','YES','Log base 2 transform the results (required for multtest)')),
p($cgi->checkbox('MVAplot','checked','YES','Produce MVA scatter plot among members of each sample group?')),
p($cgi->checkbox('corrMat','checked','YES','Produce correlation matrix for this normalization set?')),
p('Enter description for analysis set (optional)
', $cgi->textfield('user_description', '', 40)),
p("E-mail address where you would like your job status sent: (optional)", br(), textfield('email', $email, 40)),
p(submit("Start Normalization")),
end_form;
print <<'END';
Quick Help
For information about each of the processing methods, see Ben
Bolstad's PDF vignette, affy:
Built-in Processing Methods. Not all of the methods work with
one another so consult that document first.
Variance Stabilization (vsn) is a background correction and
normalization method written as an add-on to affy. If you use it,
make sure to set background correction to "none" as vsn already
does this. For more information, see Wolfgang Huber's PDF vignette,
Robust
calibration and variance stabilization with VSN.
GCRMA is an expression measure developed by Zhijin Wu and Rafael
A. Irizarry. It pools MM probes with similar GC content to form
a pseudo-MM suitable for background correction of those probe pairs.
To use GCRMA, select either gcrma-eb or gcrma-mle for Background
Correction and rlm for Summarization. For further information,
please see their paper currently under preparation, A
Model Based Background Adjustement for Oligonucleotide Expression
Arrays. Also, please note that the gcrma R package is currenly
a developmental version.
END
# $sbeams->printCGIParams( $cgi );
site_footer();
}
#### Subroutine: step3
# Handle step 2, redirect web browser to results
####
sub step3 {
my $jobname = '';
# Modified to allow user to specify description on step_2 form.
my $udesc = $cgi->param( 'user_description' );
my $analysis_id = $cgi->param( 'analysis_id' );
if ( $udesc && $analysis_id ) {
$sbeams->updateOrInsertRow( table_name => $TBMA_AFFY_ANALYSIS ,
rowdata_ref => { user_description => $udesc },
PK_name => 'affy_analysis_id',
PK_value => $analysis_id,
update => 1 );
my ( $parent_id ) = $sbeams->selectOneColumn( <<" END" );
SELECT parent_analysis_id FROM $TBMA_AFFY_ANALYSIS
WHERE affy_analysis_id = $analysis_id
END
if( $parent_id ) {
$sbeams->updateOrInsertRow( table_name => $TBMA_AFFY_ANALYSIS ,
rowdata_ref => { user_description => $udesc },
PK_name => 'affy_analysis_id',
PK_value => $parent_id,
update => 1 );
}
}
if ($cgi->param('normalization_token') ){
$jobname = $cgi->param('normalization_token');
}else{
$jobname = "affy-norm" . rand_token();
}
my (@filenames, $script, $output, $jobsummary, $custom, $error, $args, $job);
my @custom = $cgi->param('custom');
for (my $i = 0; $i < $cgi->param('numfiles'); $i++) {
my $debug = $fm->path();
error("File does not exist.") if !$fm->file_exists($cgi->param("file$i"));
$filenames[$i] = $cgi->param("file$i");
}
if ($cgi->param('email') && !check_email($cgi->param('email'))) {
error("Invalid e-mail address.");
}
if ($cgi->param('process') eq "Custom" && !expresso_safe(@custom)) {
error("Invalid custom processing method combination");
}
$output = <Show analysis Data:
Show Files
Output Files:
${jobname}_annotated.txt
$jobname.exprSet
END
$args = "";
if ($cgi->param('process') eq "Custom") {
$args = ": " . join(' -> ', $cgi->param('custom'));
}
if ($cgi->param('process') eq "GCRMA") {
$args = "";
}
my $log2trans = ($cgi->param('log2trans') ) ? "TRUE" : "FALSE";
my $MVAplot = ($cgi->param('MVAplot') ) ? "TRUE" : "FALSE";
my $corrMat = ($cgi->param('corrMat') ) ? "TRUE" : "FALSE";
$jobsummary = jobsummary('Files', join(', ', @filenames),
'Sample Names', join(', ', $cgi->param('sampleNames')),
'Processing', scalar($cgi->param('process')) . $args,
'Log 2 Transform', $log2trans,
'MVAplot', $MVAplot,
'corrMat', $corrMat,
# 'Copy back', $cgi->param('fmcopy') ? "Yes" : "No",
'E-Mail', scalar($cgi->param('email')));
# update db with analysis run info, add user defined sample names to XML file
my @db_jobsummary = ('File Names =>' . join(', ', @filenames),
'Log 2 Transform' . $cgi->param('log2trans')?"Yes":"No",
'Sample Names =>' . join(', ', $cgi->param('sampleNames')),
'Processing =>'. $cgi->param('process')
);
update_analysis_table(@db_jobsummary);
# Moved this up so as we can fetch the current names
my @all_samples = $cgi->param('sampleNames');
update_xml_file(token => $jobname,
sample_names=>\@all_samples,
file_name =>\@filenames );
if ( $cgi->param( 'MVAplot' ) || $cgi->param( 'corrMat' ) ) {
$output .=<<" END";
Image files:
View all image files inline
END
}
# Table to hold images
my $img_table = SBEAMS::Connection::DataTable->new(BORDER=>1);
if ( $cgi->param('corrMat') ) {
my $raw_url = "$RESULT_URL?action=view_image&analysis_folder=$jobname&analysis_file=raw_correlation_matrix&file_ext=png";
my $norm_url = "$RESULT_URL?action=view_image&analysis_folder=$jobname&analysis_file=normalized_correlation_matrix&file_ext=png";
# Add links to main results page
$output .=<<" END";
Raw correlation matrix
Normalized correlation matrix
END
# Add links to image page
$img_table->addRow( [ "
" ] );
$img_table->addRow( [ "Raw Correlation Matrix
" ] );
$img_table->addRow( [ "
" ] );
$img_table->addRow( [ "Normalized Correlation Matrix
" ] );
}
if ( $cgi->param('MVAplot') ) {
# Need to see which ones we'll have plots for
my $info = BioC::parse_sample_groups_file( folder => $jobname,
data_type => 'file_info' );
# Hash keyed by group name, will hold number of files in group.
my %group_files;
foreach ( @{$info->{sample_groups}} ) {
$group_files{$_}++;
}
my %seenit;
for my $grp ( @{$info->{sample_groups}} ) {
# Only want to do this once
next if $seenit{$grp};
$seenit{$grp}++;
# No plot if there is only one file in the group
next if $group_files{$grp} < 2;
my $mva_url = "$RESULT_URL?action=view_image&analysis_folder=$jobname&analysis_file=${grp}_MVA_matrix&file_ext=png";
# Add links to main results page
$output .= "MVA comparison for file group $grp
";
# Add links to image page
$img_table->addRow( [ "
" ] );
$img_table->addRow( [ "MVA scatter plot for file group $grp
" ] );
}
}
$img_table->setColAttr( ROWS => [1..$img_table->getRowNum()], COLS => [1], ALIGN=>'CENTER' );
# This should be wrapped into create_files call, but not yet...
createImageIndexPage( content => $img_table, path => "$RESULT_DIR/$jobname/" );
$script = generate_r( jobname => $jobname,
log2trans => $log2trans,
MVAplot => $MVAplot,
corrMat => $corrMat
);
$error = create_files($jobname, $script, $output, $jobsummary, 15,
"Affymetrix Expression Analysis", $cgi->param('email'));
error($error) if $error;
$job = new Batch;
$job->type($BATCH_SYSTEM);
$job->script("$RESULT_DIR/$jobname/$jobname.sh");
$job->name($jobname);
$job->out("$RESULT_DIR/$jobname/$jobname.out");
$job->submit ||
error("Couldn't start job in $RESULT_DIR");
open(ID, ">$RESULT_DIR/$jobname/id") || error("Couldn't write job id file");
print ID $job->id;
close(ID);
log_job($jobname, "Affymetrix Expression Analysis", $fm);
print $cgi->redirect("job.cgi?name=$jobname");
}
#+
#
#-
sub createImageIndexPage {
my %args = @_;
my $page =<<" END_PAGE";
Click on any image to see a larger version
$args{content}
END_PAGE
open( IDX, ">$args{path}image_index.html" ) || die "Unable to open image index page";
print IDX $page;
close IDX;
}
#### Subroutine: generate_r
# Generate an R script to process the data
####
sub generate_r {
my %args = @_;
my $jobname = $args{jobname};
my $info = BioC::parse_sample_groups_file( folder => $jobname,
data_type => 'file_info' );
my @files = @{$info->{file_names}};
my @sampleNames = @{$info->{sample_names}};
my $celfiles = \@files;
$log->debug( join( ":::", @files) );
$log->debug( join( ":::", @sampleNames) );
my $celpath = $fm->path;
my $process = $cgi->param('process');
my @custom = $cgi->param('custom');
my $fmcopy = $cgi->param('fmcopy');
my $script;
my $slide_type_name = $affy_o->find_slide_type_name(file_names=>$celfiles);
# Escape double quotes to prevent nasty hacking
for (@$celfiles) { s/\"/\\\"/g }
for (@sampleNames) { s/\"/\\\"/g }
$process =~ s/\"/\\\"/g;
for (@custom) { s/\"/\\\"/g }
# Make R variables out of the perl variables
$script = <{sample_groups}} ) {
$group_files{$_}++;
}
# Generate plot code/dimensions for MVA and correlation plots.
my %mva_dim;
my $mva_code = '';
my $idx = 1;
my $tot = 0;
my %seenit;
foreach my $grp ( @{$info->{sample_groups}} ) {
# Will do this only once per group
next if $seenit{$grp};
$seenit{$grp}++;
my $first = $idx;
my $last = $first + $group_files{$grp} - 1;
$idx = $last + 1;
# If we have only one member in the group, don't try to do an MVA plot
next unless ( $group_files{$grp} > 1 );
$tot++;
my $height = 3 + .12* $group_files{$grp};
my $width = 4 + .16* $group_files{$grp};
$mva_code .=<<" END_CODE";
# Do MVA (scatterplot) matrix for $grp
bitmap( "$RESULT_DIR/$jobname/${grp}_MVA_matrix.png", height=$height, width=$width, res = 72*4, pointsize = 10)
mva.pairs.custom(Matrix[,$first:$last])
dev.off()
END_CODE
}
my %corr_dims = ( height => 6, # + .04*$tot,
width => 4 + .04*$tot,
);
# Main data processing, entirely R
$script .= <<"END";
.libPaths(lib_path)
library(affy)
library(gcrma)
library(vsn)
bgcorrect.methods <- c(bgcorrect.methods, "gcrma")
normalize.AffyBatch.methods <- c(normalize.AffyBatch.methods, "vsn")
express.summary.stat.methods <- c(express.summary.stat.methods, "rlm")
#read in the annotation file
annot <- read.table(paste (path.to.annotation ,chip.name,"_annot.csv",sep=""),sep=",")
annot.orders <- order(annot[-1,1])
annot.header <- as.matrix(annot[1,])
annot.noheader <- annot[-1,]
annot.grab.columns <- c( grep("Representative Public ID",annot.header),grep("Gene Symbol",annot.header),grep("Gene Title",annot.header),grep("LocusLink",annot.header) )
# Set working directory, and read CEL files
setwd(filepath)
# Various circumstances dictate running affybatch
if (process == "Custom" || MVAplot || corrMat) {
affybatch <- ReadAffy(filenames = filenames)
}
# Output a correlation matrix for pre-normalization data if requested.
if (corrMat) {
pm <- pm(affybatch)# grab perfect match intensities
affybatch.cor <- cor(pm)
gray.colors <- gray(1:100/100) # define a grayscale color set
numChips <- dim(exprs(affybatch))[2]
# bitmap("$RESULT_DIR/$jobname/raw_correlation_matrix.png", height=$corr_dims{height}, width=$corr_dims{width}, res = 72*4, pointsize = 10)
bitmap("$RESULT_DIR/$jobname/raw_correlation_matrix.png", height=$corr_dims{height}, res = 72*4, pointsize = 10)
par(mar=c(3,3,3,9))
image(x=1:numChips,y=1:numChips,affybatch.cor,col=gray.colors,zlim=c(min(affybatch.cor),1))
title(paste("Correlation Matrix(Raw), black:R=",trunc(min(affybatch.cor)*100)/100,"white:R=1"),cex.main=numChips/15)
for(i in 1:numChips) {
name <- row.names(pData(affybatch))[i]
text(numChips+1,i,name,xpd=TRUE,adj=c(0,0.5),cex=0.666)
}
dev.off() # dev.off() causes image file to be written
}
if (process == "RMA") {
# exprset <- rma(ReadAffy(filenames = filenames))
# This is causing an error on Linux but not Mac OS X
# More investigation needed
exprset <- justRMA(filenames = filenames) #changed 10.21.04 Bruz
}
if (process == "GCRMA"){
exprset <- justGCRMA(filenames = filenames)
}
if (process == "Custom") {
bgcorrect.param <- list()
if (custom[1] == "gcrma-eb") {
custom[1] <- "gcrma"
gcgroup <- getGroupInfo(affybatch)
bgcorrect.param <- list(gcgroup, estimate = "eb", rho = 0.8, step = 60,
lower.bound = 1, triple.goal = TRUE)
}
if (custom[1] == "gcrma-mle") {
custom[1] <- "gcrma"
gcgroup <- getGroupInfo(affybatch)
bgcorrect.param <- list(gcgroup, estimate = "mle", rho = 0.8,
baseline = 0.25, triple.goal = TRUE)
}
exprset <- expresso(affybatch, bgcorrect.method = custom[1],
bgcorrect.param = bgcorrect.param,
normalize.method = custom[2],
pmcorrect.method = custom[3],
summary.method = custom[4])
}
colnames(exprset\@exprs) <- samples
log2methods <- c("medianpolish", "mas", "rlm")
if (log2trans) {
if (process == "Custom" && !(custom[4] %in% log2methods)) {
exprset\@exprs <- log2(exprset\@exprs)
exprset\@se.exprs <- log2(exprset\@se.exprs)
}
} else {
if (process == "RMA" || process == "Custom" && custom[4] %in% log2methods) {
exprset\@exprs <- 2^exprset\@exprs
exprset\@se.exprs <- 2^exprset\@se.exprs
}
}
END
# Output results
$script .= <header;
site_header("Affymetrix Expression Analysis: affy");
print h1("Affymetrix Expression Analysis: affy"),
h2("Error:"),
p($error);
print "DEBUG INFO
";
my @param_names = $cgi->param;
foreach my $p (@param_names){
print $p, " => ", $cgi->param($p),br;
}
site_footer();
exit(1);
}
#### Subroutine: Update Analysis record
# Once an analysis run has start update the db record with the switches that were used
# Give a hash from the cgi->parms;
# Return 1 for succesful update 0 for a failure
####
sub update_analysis_table {
my @db_jobsummary = @_;
my $analysis_data = join ('// ', @db_jobsummary);
# my %cgi_params = @_;
my $analysis_id = $cgi->param('analysis_id');
# my @R_switches = qw(process custom log2trans path sampleNames);
# my $analysis_data = '';
error("Analysis ID not FOUND '$analysis_id'") unless ($analysis_id =~ /^\d/);
# foreach my $key (@R_switches){
# if (exists $cgi_params{$key}){
# my $val = $cgi_params{$key} ? $cgi_params{$key} : "Not Set";
# $analysis_data .= "$key=>$val //";
# }
# }
# $analysis_data =~ s/[^a-zA-Z0-9\/_\.=> ]/__/g; #remove any junk from the analsysis annotation note
# $log->debug("ANALYSIS ANNO '$analysis_data'");
my $rowdata_ref = { analysis_description => $analysis_data,
};
my $result = $sbeams->updateOrInsertRow(
update=>1,
table_name=>$TBMA_AFFY_ANALYSIS,
rowdata_ref=>$rowdata_ref,
PK=>'affy_analysis_id',
PK_value=>$analysis_id,
return_PK=>1,
verbose=>0,
testonly=>0,
add_audit_parameters=>1,
);
}
#### Subroutine: update_xml_file
# Once an analysis run has started update the xml sample group file with the sample names used
# return 1 for success 0 for failure
####
sub update_xml_file {
my %args = @_;
my $folder_name = $args{token};
my $all_sample_names_aref = $args{sample_names};
my @all_filenames = @{ $args{file_name} };
my $xml_file = "$BC_UPLOAD_DIR/$folder_name/$SAMPLE_GROUP_XML";
my $parser = XML::LibXML->new();
my $doc = $parser->parse_file( $xml_file);
my $root = $doc->getDocumentElement;
my %class_numbers = ();
my $class_count = 0;
##need to convert the sample name to a number since R only wants two classes for t-testing O and 1
##First frind the reference sample node to use as the class 0
#$sample_groups{SAMPLE_NAMES}{$sample_name} = "$class";
my $reference_sample_group_node = $root->findnodes('//reference_sample_group');
my $reference_sample_group = $reference_sample_group_node->to_literal;
if ($reference_sample_group){
$class_numbers{$reference_sample_group} = $class_count;
$class_count ++;
}else{
error("Cannot find the reference sample group node");
}
for(my $i=0; $i <= $#all_filenames; $i++){
my $file_name = $all_filenames[$i];
my $sample_name = $all_sample_names_aref->[$i];
my $found_flag = 'F';
my $x_path = "//file_name[.='$file_name']";
my $count = 0;
foreach my $c ($root->findnodes($x_path)) {
$found_flag = 'T';
my $sample_group = $c->findnodes('./@sample_group_name')->string_value();
$c->setAttribute("sample_name", $sample_name);
my $node_val = $c->to_literal;
### Need to add a class number that will be used by R in the t-test and anova test. It cannot use
### a sample_group name for determing the different classes....
my $class = '';
if (exists $class_numbers{$sample_group}){
$class = $class_numbers{$sample_group};
}else{
$class_numbers{$sample_group} = $class_count;
$class = $class_numbers{$sample_group};
$class_count ++;
}
$c->setAttribute("class_number", $class);
# print STDERR "NODE VAL '$sample_group' CLASS '$class'\n";
if ($count > 1){
error("Updating XML Error: More then two files with the same name $file_name");
}
$count ++;
}
print STDERR "COULD NOT FIND NODE FOR FILE '$file_name'" unless $found_flag eq 'T';
}
my $state = $doc->toFile($xml_file, 1); #1 is the xml format to produce, it will make a nice looking indented pattern
}