#!/usr/local/bin/perl ############################################################################### # Program : sbeamsbot # Author : Eric Deutsch # $Id$ # # Description : This program watches for new data coming from the instruments # and processes it accordingly # # SBEAMS is Copyright (C) 2000-2005 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### ############################################################################### # Generic SBEAMS setup for all the needed modules and objects ############################################################################### use strict; use Getopt::Long; use FindBin; use DirHandle; use POSIX; use lib qw (../perl ../../perl); use vars qw ($sbeams $sbeamsPROT $q $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE $TESTONLY $current_contact_id $current_username ); #### Set up SBEAMS core module use SBEAMS::Connection; use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::Proteomics; use SBEAMS::Proteomics::Settings; use SBEAMS::Proteomics::Tables; $sbeams = SBEAMS::Connection->new(); $sbeamsPROT = SBEAMS::Proteomics->new(); $sbeamsPROT->setSBEAMS($sbeams); #### Set program name and usage banner $PROG_NAME = $FindBin::Script; $USAGE = <Authenticate() and exit if it fails or continue if it works. ############################################################################### sub main { #### Do the SBEAMS authentication and exit if a username is not returned exit unless ($current_username = $sbeams->Authenticate( work_group=>'Proteomics_admin', )); $sbeams->printPageHeader() unless ($QUIET); handleRequest(); $sbeams->printPageFooter() unless ($QUIET); } # end main ############################################################################### # handleRequest ############################################################################### sub handleRequest { my %args = @_; #### Define standard variables my ($i,$element,$key,$value,$line,$result,$sql); #### Get $BASE_DIR from the environment my $BASE_DIR = $ENV{BASE_DIR}; unless ($BASE_DIR) { print "ERROR: This program needs to be started by the special starter\n". "script. If you are trying to run this by hand, don't!\n"; return; } #### Define some settings my $staging_dir = "$BASE_DIR/staging"; my $archive_dir = "$BASE_DIR/archive"; my $user_dir = "$BASE_DIR/users"; my $BIN_DIR = "$BASE_DIR/../.bin"; #### Set the command-line options my $daemon_mode = $OPTIONS{"daemon"} || ''; my $once_mode = $OPTIONS{"once"} || ''; $TESTONLY = $OPTIONS{'testonly'} || 0; $once_mode = 1 unless ($daemon_mode || $once_mode); #### If there are any parameters left, complain and print usage if ($ARGV[0]){ print "ERROR: Unresolved parameter '$ARGV[0]'.\n"; print "$USAGE"; return; } #### Set looping parameters my $sleeptime = 5; my $heartbeat_interval = 2; my $heartbeat = 0; #### Print a message and get started unless ($QUIET) { $sbeams->printUserContext(); print "\n"; print scalar localtime," [$PROG_NAME] Starting...\n"; } #### Define some additional variables my $stopflag = 0; my $file; my @files; my %parameters; #### Loop forever until we're told to stop or get killed until ($stopflag) { #### See if there are new files in the staging area and register processNewDataFiles( staging_dir => $staging_dir, archive_dir => $archive_dir, ); #### Process files that need attention processDataFiles( staging_dir => $staging_dir, archive_dir => $archive_dir, user_dir => $user_dir, ); #### Sleep for a while and then check again unless ($once_mode) { print scalar localtime," [$PROG_NAME] Sleeping $sleeptime seconds.\n" if ($VERBOSE > 2); sleep($sleeptime); } else { $stopflag = 1; } #### Update heartbeat counter and squawk when appropriate $heartbeat++; if ($heartbeat >= $heartbeat_interval) { $heartbeat = 0; print scalar localtime," [$PROG_NAME] Alive and waiting for new jobs\n" if ($VERBOSE); } } # enduntil unless ($QUIET) { print scalar localtime," [$PROG_NAME] Finished.\n"; } return; } ############################################################################### # processNewDataFiles ############################################################################### sub processNewDataFiles { my %args = @_; my $SUB_NAME = 'processNewDataFiles'; #### Decode the argument list my $staging_dir = $args{'staging_dir'} || die "ERROR[$SUB_NAME]: staging_dir not passed"; #### Get a directory listing of what's in the staging area my @files = getDirListing(directory=>$staging_dir,exclude_dot_files=>1); #### Define standard variables my ($i,$element,$key,$value,$line,$result,$sql); my $file_name; #### If there are any, process them if (@files) { #### Get a list of the available processing_status's $sql = qq~ SELECT status_tag,processing_status_id FROM $TBPR_PROCESSING_STATUS WHERE record_status != 'D' ~; my %processing_status_ids = $sbeams->selectTwoColumnHash($sql); #### Get information about current user my $current_contact_id = $sbeams->getCurrent_contact_id(); my $current_work_group_id = $sbeams->getCurrent_work_group_id(); #### Loop over each file and decide what to do foreach $file_name (@files) { #### Query to see if this file is already known my $sql = qq~ SELECT raw_data_file_id,file_path FROM $TBPR_RAW_DATA_FILE WHERE file_name = '$file_name' AND file_path = '$staging_dir' AND record_status != 'D' ~; my @rows = $sbeams->selectSeveralColumns($sql); #### If there are multiple rows, barf if (scalar @rows > 1) { print "ERROR[$SUB_NAME]: There are mulitple records for this ". "filename!! This should never happen.\n"; return; } #### Extract data my $raw_data_file_id = $rows[0]->[0]; #### If it is known, then we don't do anything here if ($raw_data_file_id) { print " File '$file_name' is already known and in the queue\n" if ($VERBOSE > 1); #### Else, put it in the queue as WaitingToAge } else { print " Found new file '$file_name'.\n" if ($VERBOSE); my %rowdata = ( file_name => $file_name, file_path => $staging_dir, processing_status_id => $processing_status_ids{WaitingToAge}, file_status_message => 'Waiting to make sure file is complete', date_created => 'CURRENT_TIMESTAMP', created_by_id => $current_contact_id, date_modified => 'CURRENT_TIMESTAMP', modified_by_id => $current_contact_id, owner_group_id => $current_work_group_id, ); $sbeams->insert_update_row( insert=>1, table_name=>$TBPR_RAW_DATA_FILE, rowdata_ref=>\%rowdata, verbose=>$VERBOSE, testonly=>$TESTONLY, ); } } #### If there are no files } else { print scalar localtime," [$PROG_NAME] No new files in staging area.\n" if ($VERBOSE > 1); } return(1); } ############################################################################### # processDataFiles ############################################################################### sub processDataFiles { my %args = @_; my $SUB_NAME = 'processDataFiles'; #### Decode the argument list my $staging_dir = $args{'staging_dir'} || die "ERROR[$SUB_NAME]: staging_dir not passed"; my $archive_dir = $args{'archive_dir'} || die "ERROR[$SUB_NAME]: archive_dir not passed"; my $user_dir = $args{'user_dir'} || die "ERROR[$SUB_NAME]: user_dir not passed"; #### Define standard variables my ($i,$element,$key,$value,$line,$result,$sql); #### Query to see what files need attention my $sql = qq~ SELECT raw_data_file_id,status_tag FROM $TBPR_RAW_DATA_FILE RDF INNER JOIN $TBPR_PROCESSING_STATUS PS ON ( RDF.processing_status_id = PS.processing_status_id ) WHERE RDF.record_status != 'D' AND PS.status_tag != 'Finished' ~; my @rows = $sbeams->selectSeveralColumns($sql); #### If there's nothing to do, return unless (@rows) { print " No unfinished files in the queue.\n" if ($VERBOSE); return; } #### Loop over all files foreach my $row (@rows) { my $raw_data_file_id = $row->[0]; my $status_tag = $row->[1]; #### If we're WaitingToAge, move the data file if it's ready if ($status_tag eq 'WaitingToAge') { moveDataFile( raw_data_file_id=>$raw_data_file_id, staging_dir => $staging_dir, archive_dir => $archive_dir, user_dir => $user_dir, ); #### If the current status is an error state, try to climb out } elsif ($status_tag eq 'UnableToParseName' || $status_tag eq 'ProcessError' || $status_tag eq 'UnableToFindExperiment') { recoverFromErrorState( raw_data_file_id=>$raw_data_file_id, staging_dir => $staging_dir, ); #### Otherwise, confess we don't know what to do yet } else { print " Don't know what to do with status '$status_tag' yet.\n"; } } return(1); } ############################################################################### # moveDataFile ############################################################################### sub moveDataFile { my %args = @_; my $SUB_NAME = 'moveDataFile'; #### Decode the argument list my $raw_data_file_id = $args{'raw_data_file_id'} || die "ERROR[$SUB_NAME]: raw_data_file_id not passed"; my $staging_dir = $args{'staging_dir'} || die "ERROR[$SUB_NAME]: staging_dir not passed"; my $archive_dir = $args{'archive_dir'} || die "ERROR[$SUB_NAME]: archive_dir not passed"; my $user_dir = $args{'user_dir'} || die "ERROR[$SUB_NAME]: user_dir not passed"; #### Define standard variables my ($i,$element,$key,$value,$line,$result,$sql); #### Get a list of the available processing_status's $sql = qq~ SELECT status_tag,processing_status_id FROM $TBPR_PROCESSING_STATUS WHERE record_status != 'D' ~; my %processing_status_ids = $sbeams->selectTwoColumnHash($sql); my $current_contact_id = $sbeams->getCurrent_contact_id(); #### Get some information about this data file my $sql = qq~ SELECT raw_data_file_id,status_tag,file_name,file_path FROM $TBPR_RAW_DATA_FILE RDF INNER JOIN $TBPR_PROCESSING_STATUS PS ON ( RDF.processing_status_id = PS.processing_status_id ) WHERE raw_data_file_id = '$raw_data_file_id' ~; my @rows = $sbeams->selectSeveralColumns($sql); #### If there's nothing to do, print error and return unless (@rows) { print "ERROR[$SUB_NAME]: Cannot find raw_data_file_id=". "'$raw_data_file_id'. This should never happen.\n"; return; } #### If more than one row returned, print error and return if (scalar @rows > 1) { print "ERROR[$SUB_NAME]: More than one row returned from $sql ". "This should never happen.\n"; return; } #### Extract data from result my ($raw_data_file_id,$status_tag,$file_name,$file_path) = @{$rows[0]}; #### If the file_name doesn't exist, delete it in the queue my $full_file_name = "$file_path/$file_name"; unless ( -e $full_file_name ) { print " Cannot find file '$full_file_name'. This is unfortunate. ". "Perhaps it wasn't meant to be.\n"; my %rowdata = ( record_status => 'D', date_modified => 'CURRENT_TIMESTAMP', modified_by_id => $current_contact_id, ); $sbeams->insert_update_row( update=>1, table_name=>$TBPR_RAW_DATA_FILE, rowdata_ref=>\%rowdata, PK=>'raw_data_file_id', PK_value=>$raw_data_file_id, verbose=>$VERBOSE, testonly=>$TESTONLY, ); return; } #### Verify that this file isn't already somewhere in the system my $sql = qq~ SELECT raw_data_file_id,file_path FROM $TBPR_RAW_DATA_FILE WHERE file_name = '$file_name' AND file_path != '$staging_dir' AND record_status != 'D' ~; my @rows = $sbeams->selectSeveralColumns($sql); #### If there are multiple rows, barf if (scalar @rows > 1) { print "ERROR[$SUB_NAME]: There are multiple records for this ". "filename!! This should never happen.\n"; return; } #### Extract data my $other_raw_data_file_id = $rows[0]->[0]; my $current_dir = $rows[0]->[1]; #### If there is another one, complain and stop if ($other_raw_data_file_id) { updateRawDataFileRecord( raw_data_file_id=>$raw_data_file_id, processing_status=>'ProcessError', file_status_message=>"There is already a file by this name in ". "the system in '$current_dir'. Cannot continue. Delete old or ". "new duplicate.", ); return; } #### Get the size of the file my $file_size = ( -s $full_file_name ); #### Get the age of the file in minutes $^T = time; my $age = ( -M $full_file_name ) * 24 * 60; if ($age < 2.0) { print "File $file_name is still too young. Needs to age 2 minutes.\n" if ($VERBOSE); return; } #### Print some information about the file if verbose if ($VERBOSE) { print " File name: $full_file_name\n"; print " File size: $file_size\n"; print " File age: $age minutes\n"; } #### Parse the data file name my %file_name_components = parseDataFileName( file_name => $file_name, ); #### If the parsing didn't go well, set this to UnableToParse if ($file_name_components{parse_error}) { my %rowdata = ( processing_status_id => $processing_status_ids{UnableToParseName}, file_status_message => $file_name_components{parse_error}, date_modified => 'CURRENT_TIMESTAMP', modified_by_id => $current_contact_id, ); $sbeams->insert_update_row( update=>1, table_name=>$TBPR_RAW_DATA_FILE, rowdata_ref=>\%rowdata, PK=>'raw_data_file_id', PK_value=>$raw_data_file_id, verbose=>$VERBOSE, testonly=>$TESTONLY, ); return; } #### Show the results of the parsing if ($VERBOSE > 1) { print " Decomposed components:\n"; while (($key,$value) = each (%file_name_components)) { print " $key = $value\n"; } } #### Try to identify the experiment this belongs to $sql = qq~ SELECT experiment_id,experiment_tag,username,project_tag FROM $TBPR_PROTEOMICS_EXPERIMENT PE INNER JOIN $TB_USER_LOGIN UL ON ( PE.contact_id = UL.contact_id ) INNER JOIN $TB_PROJECT P ON ( PE.project_id = P.project_id ) WHERE PE.record_status != 'D' AND UL.record_status != 'D' AND P.record_status != 'D' AND username = '$file_name_components{client}' AND experiment_tag = '$file_name_components{experiment_tag}' ~; @rows = $sbeams->selectSeveralColumns($sql); #### If there's no match unless (@rows) { print "ERROR[$SUB_NAME]: Cannot find an already registered experiment ". "that matches this file\n"; my %rowdata = ( processing_status_id => $processing_status_ids{UnableToFindExperiment}, file_status_message => "An experiment called ". "'$file_name_components{experiment_tag}' does exist for user ". "'$file_name_components{client}'. It must before we can continue. ", "Perhaps you need to register the experiment or perhaps the file ", "name is misspelled."; date_modified => 'CURRENT_TIMESTAMP', modified_by_id => $current_contact_id, ); $sbeams->insert_update_row( update=>1, table_name=>$TBPR_RAW_DATA_FILE, rowdata_ref=>\%rowdata, PK=>'raw_data_file_id', PK_value=>$raw_data_file_id, verbose=>$VERBOSE, testonly=>$TESTONLY, ); return; } #### If more than one row returned, print error and return if (scalar @rows > 1) { print "ERROR[$SUB_NAME]: More than one row returned from $sql ". "This should never happen.\n"; return; } #### Extract data from result my ($experiment_id,$experiment_tag,$username,$project_tag) = @{$rows[0]}; #### Create the final archive and user directories foreach my $area ( $archive_dir,$user_dir ) { $result = createArchiveDir( archive_dir => $area, username=>$username, project_tag=>$project_tag, experiment_tag=>$experiment_tag, ); #### If it didn't work unless ($result eq '1') { my %rowdata = ( processing_status_id => $processing_status_ids{ProcessError}, file_status_message => $result, date_modified => 'CURRENT_TIMESTAMP', modified_by_id => $current_contact_id, ); $sbeams->insert_update_row( update=>1, table_name=>$TBPR_RAW_DATA_FILE, rowdata_ref=>\%rowdata, PK=>'raw_data_file_id', PK_value=>$raw_data_file_id, verbose=>$VERBOSE, testonly=>$TESTONLY, ); return; } } #### Create the new file name, without the client and exptag my $new_file_name = $file_name; #$new_file_name =~ s/^.+?_//; #$new_file_name =~ s/^.+?_//; my $new_file_path = "$archive_dir/$username/$project_tag/$experiment_tag"; my $new_full_file_name = "$new_file_path/$new_file_name"; #### Now move the file to its final place and rename it in the process print " Move: $full_file_name\n to: $new_full_file_name\n"; unless (rename($full_file_name,$new_full_file_name)) { my $message = "Move to archive area failed: $!"; print "ERROR[$SUB_NAME]: $message\n"; updateRawDataFileRecord( raw_data_file_id=>$raw_data_file_id, processing_status=>'ProcessError', file_status_message=>$message, ); } #### Add some details about the file size and date my %rowdata; $rowdata{file_size} = $file_size; my @fs = stat($new_full_file_name); my $mtime = $fs[9]; my ($sec,$min,$hour,$mday,$mon,$year) = localtime($mtime); my $timestr = strftime("%Y-%m-%d %H:%M:%S",$sec,$min,$hour,$mday,$mon,$year); $rowdata{file_written_date} = $timestr; $rowdata{file_path} = $new_file_path; #For now, just leave the file names the same in the database #$rowdata{file_name} = $new_file_name; #### Now update the record status updateRawDataFileRecord( raw_data_file_id=>$raw_data_file_id, processing_status=>'ReadyToExtract', file_status_message=>"File has been moved to archive area and is ". "ready for extractms", rowdata_ref=>\%rowdata, ); return(1); } ############################################################################### # recoverFromErrorState ############################################################################### sub recoverFromErrorState { my %args = @_; my $SUB_NAME = 'recoverFromErrorState'; #### Decode the argument list my $raw_data_file_id = $args{'raw_data_file_id'} || die "ERROR[$SUB_NAME]: raw_data_file_id not passed"; my $staging_dir = $args{'staging_dir'} || die "ERROR[$SUB_NAME]: staging_dir not passed"; #### Define standard variables my ($i,$element,$key,$value,$line,$result,$sql); #### Get some information about this data file my $sql = qq~ SELECT raw_data_file_id,status_tag,file_name,file_path FROM $TBPR_RAW_DATA_FILE RDF INNER JOIN $TBPR_PROCESSING_STATUS PS ON ( RDF.processing_status_id = PS.processing_status_id ) WHERE raw_data_file_id = '$raw_data_file_id' ~; my @rows = $sbeams->selectSeveralColumns($sql); #### If there's nothing to do, print error and return unless (@rows) { print "ERROR[$SUB_NAME]: Cannot find raw_data_file_id=". "'$raw_data_file_id'. This should never happen.\n"; return; } #### If more than one row returned, print error and return if (scalar @rows > 1) { print "ERROR[$SUB_NAME]: More than one row returned from $sql ". "This should never happen.\n"; return; } #### Extract data from result my ($raw_data_file_id,$status_tag,$file_name,$file_path) = @{$rows[0]}; #### If the file_name doesn't exist, delete it in the queue my $full_file_name = "$file_path/$file_name"; unless ( -e $full_file_name ) { print " Cannot find file '$full_file_name'. This is curious. ". "Perhaps it has been removed or renamed to fix the error.\n"; my %rowdata = ( record_status => 'D', ); updateRawDataFileRecord( raw_data_file_id=>$raw_data_file_id, processing_status=>'ProcessError', file_status_message=>"File has disappeared from staging area", rowdata_ref=>\%rowdata, ); return; } if ($status_tag eq 'UnableToFindExperiment') { #### Parse the data file name my %file_name_components = parseDataFileName( file_name => $file_name, ); #### Try to identify the experiment this belongs to $sql = qq~ SELECT experiment_id,experiment_tag,username,project_tag FROM $TBPR_PROTEOMICS_EXPERIMENT PE INNER JOIN $TB_USER_LOGIN UL ON ( PE.contact_id = UL.contact_id ) INNER JOIN $TB_PROJECT P ON ( PE.project_id = P.project_id ) WHERE PE.record_status != 'D' AND UL.record_status != 'D' AND P.record_status != 'D' AND username = '$file_name_components{client}' AND experiment_tag = '$file_name_components{experiment_tag}' ~; @rows = $sbeams->selectSeveralColumns($sql); #### If there's now a match, push back to WaitingToAge if (scalar @rows == 1) { updateRawDataFileRecord( raw_data_file_id=>$raw_data_file_id, processing_status=>'WaitingToAge', file_status_message=>"Retrying... Experiment definition now appears ". "to exist.", ); } } } # end recoverFromErrorState ############################################################################### # updateRawDataFileRecord ############################################################################### sub updateRawDataFileRecord { my %args = @_; my $SUB_NAME = 'updateRawDataFileRecord'; #### Decode the argument list my $raw_data_file_id = $args{'raw_data_file_id'} || die "ERROR[$SUB_NAME]: raw_data_file_id not passed"; my $processing_status = $args{'processing_status'} || die "ERROR[$SUB_NAME]: processing_status not passed"; my $file_status_message = $args{'file_status_message'} || die "ERROR[$SUB_NAME]: file_status_message not passed"; my $rowdata_ref = $args{'rowdata_ref'}; #### If no rowdata hash was passed, create it unless (defined($rowdata_ref)) { unless (ref($rowdata_ref)) { my %rowdata; $rowdata_ref = \%rowdata; } } #### Get a list of the available processing_status's my $sql = qq~ SELECT status_tag,processing_status_id FROM $TBPR_PROCESSING_STATUS WHERE record_status != 'D' ~; my %processing_status_ids = $sbeams->selectTwoColumnHash($sql); my $current_contact_id = $sbeams->getCurrent_contact_id(); #### Add the processing_status to the rowdata hash if ($processing_status_ids{$processing_status}) { $rowdata_ref->{processing_status_id} = $processing_status_ids{$processing_status}; } else { $rowdata_ref->{processing_status_id} = 3; $rowdata_ref->{file_status_message} = "INTERNAL ERROR: Unrecognized ". "processing_status '$processing_status'"; } #### Add the file_status_message to the rowdata hash $rowdata_ref->{file_status_message} = $file_status_message; #### Add audit information to the rowdata hash $rowdata_ref->{date_modified} = 'CURRENT_TIMESTAMP'; $rowdata_ref->{modified_by_id} = $current_contact_id; #### UPDATE the record $sbeams->insert_update_row( update=>1, table_name=>$TBPR_RAW_DATA_FILE, rowdata_ref=>$rowdata_ref, PK=>'raw_data_file_id', PK_value=>$raw_data_file_id, verbose=>$VERBOSE, testonly=>$TESTONLY, ); } ############################################################################### # parseDataFileName ############################################################################### sub parseDataFileName { my %args = @_; my $SUB_NAME = 'parseDataFileName'; #### Decode the argument list my $file_name = $args{'file_name'} || die "ERROR[$SUB_NAME]: file_name not passed"; #### Define a hash to hold the various properties my %components; $components{parse_error} = 'UNKNOWN'; #### Make sure there's a .dat at the end unless ( $file_name =~ /\.dat$/i ) { $components{parse_error} = 'File must end in .dat'; return %components; } #### Get the file_root without the extension my $file_root = $file_name; $file_root =~ s/\.dat$//i; #### Split file_root by underscores my @parts = split(/_/,$file_root); if (scalar @parts < 6) { $components{parse_error} = "filename must have at least 6 fields separated by an underscore"; return %components; } #### Extract the parts into the hash $components{client} = $parts[0]; $components{experiment_tag} = $parts[1]; $components{fraction_type} = $parts[2]; $components{fraction_number} = $parts[3]; $components{window} = $parts[4]; $components{fraction_repeat} = $parts[5]; $components{other} = $parts[6]; $components{other} = '' unless (defined($components{other})); #### Look for errors unless ($components{client} && $components{client} =~ /^[A-Z,a-z,0-9\-]+$/) { $components{parse_error} = "client '$components{client}' is not valid: /^[A-Z,a-z,0-9\-]+\$/"; return %components; } unless ($components{experiment_tag} && $components{experiment_tag} =~ /^[A-Z,a-z,0-9\-]+$/) { $components{parse_error} = "experiment_tag '$components{experiment_tag}' is not valid: ". "/^[A-Z,a-z,0-9\-]+\$/"; return %components; } unless ($components{fraction_type} && $components{fraction_type} =~ /^[A-Z,a-z,0-9\-]+$/) { $components{parse_error} = "fraction_type '$components{fraction_type}' is not valid: ". "/^[A-Z,a-z,0-9\-]+\$/"; return %components; } unless ($components{fraction_number} && $components{fraction_number} =~ /^[A-Z,a-z,0-9\-]+$/) { $components{parse_error} = "fraction_number '$components{fraction_number}' is not valid: ". "/^[A-Z,a-z,0-9\-]+\$/"; return %components; } unless ($components{window} && $components{window} =~ /^[A-Z,a-z,0-9\-]+$/) { $components{parse_error} = "window '$components{window}' is not valid: /^[A-Z,a-z,0-9\-]+\$/"; return %components; } unless ($components{fraction_repeat} && $components{fraction_repeat} =~ /^[A-Z,a-z,0-9\-]+$/) { $components{parse_error} = "fraction_repeat '$components{fraction_repeat}' is not valid: ". "/^[A-Z,a-z,0-9\-]+\$/"; return %components; } #### Success $components{parse_error} = ''; return %components; } ############################################################################### # createArchiveDir ############################################################################### sub createArchiveDir { my %args = @_; my $SUB_NAME = 'createArchiveDir'; my $error; #### Decode the argument list my $archive_dir = $args{'archive_dir'} || die "ERROR[$SUB_NAME]: archive_dir not passed"; my $username = $args{'username'} || die "ERROR[$SUB_NAME]: username not passed"; my $project_tag = $args{'project_tag'} || ''; my $experiment_tag = $args{'experiment_tag'} || die "ERROR[$SUB_NAME]: experiment_tag not passed"; my $search_batch_subdir = $args{'search_batch_subdir'} || ''; #### Verify that the $archive_dir exists unless ( -d $archive_dir ) { $error = "ERROR[$SUB_NAME]: archive_dir '$archive_dir' does not exist ". "and it must! This should never happen.\n"; return $error; } #### Look for and create each subdirectory foreach my $newdir ( "$archive_dir/$username", "$archive_dir/$username/$project_tag", "$archive_dir/$username/$project_tag/$experiment_tag", "$archive_dir/$username/$project_tag/$experiment_tag/". "$search_batch_subdir" ) { if ( -d $newdir ) { print " Already exists: $newdir\n" if ($VERBOSE); } else { print " Creating $newdir\n" if ($VERBOSE); mkdir($newdir); unless ( -d $newdir) { $error = "ERROR[$SUB_NAME]: Failed to create directory '$newdir'\n"; return $error; } } } return 1; } ############################################################################### # getDirListing ############################################################################### sub getDirListing { my %args = @_; my $SUB_NAME = 'getDirListing'; #### Decode the argument list my $dir = $args{'directory'} || die "ERROR[$SUB_NAME]: directory not passed"; my $exclude_dot_files = $args{'exclude_dot_files'} || 0; #### Open the directory and get the files (except . and ..) opendir(DIR, $dir) || die "[$PROG_NAME::getDirListing] Cannot open $dir: $!"; my @files = grep (!/^\.{1,2}$/, readdir(DIR)); closedir(DIR); #### Remove the dot files if we don't want them if ($exclude_dot_files) { @files = grep (!/^\./,@files); } #### Always sort the files @files = sort(@files); return @files; } ############################################################################### # read Control File ############################################################################### sub readControlFile { my $file = shift; my %parameters; my ($key,$value,$line); unless (open(INFILE,"$file")) { print scalar localtime," [$PROG_NAME::getControlStatus] cannot ". "find file $file\n"; return ("ERROR","ERROR"); } while ($line=) { chomp $line; ($key,$value) = split("=",$line); $parameters{$key} = $value if ($key); } close(INFILE); return %parameters; } ############################################################################### # update Control File ############################################################################### sub updateControlFile { my $file = shift; my %newparameters = @_; my ($key,$value,$line); my %parameters = readControlFile($file); return if ($parameters{'ERROR'}); while ( ($key,$value) = each %newparameters ) { $parameters{$key}=$newparameters{$key}; } unless ( unlink("$file") ) { print scalar localtime," [$PROG_NAME::updateControlStatus] cannot ". "delete file $file\n"; return 0; } unless (open(OUTFILE,">$file")) { print scalar localtime," [$PROG_NAME::updateControlStatus] cannot ". "open for writing file $file\n"; return 0; } while ( ($key,$value) = each %parameters ) { print OUTFILE "$key=$value\n"; } close(OUTFILE); return 1; }