#! /usr/bin/perl -w # The top level script that runs a pmark benchmark. # # Usage: # ./pmark-master.pl # # : Top level directory for finding executables to # be benchmarked. This is passed on to the # without modification. How the script uses it will depend # on where the executables are expected to be found, relative # to this top level directory. For example, for HMMER3 benchmarks, # we might pass ~/releases/hmmer-release/build-icc or # ~/releases/hmmer-3.0xx/build-icc for testing a release candidate # or an existing release. # # : Top level directory for finding scripts or data files. # This too will simply be passed on to the # without modification. For example, for HMMER benchmarks, we # might pass ~/releases/hmmer-release/ or ~/releases/hmmer-3.0xx. # For installed packages like BLAST, it's likely that # would be the same as , because we will not have # multiple build directories. # # : A directory for holding all temporary files # created by the benchmark. This name should be short and unique; # it will also be used to construct job names on the cluster, as # . # # : how many processes to parallelize over in our cluster. # # : The script will look for files .tbl, # .msa, and .fa, defining a PMARK benchmark set. # # : This script is executed on each of # processes, on an appropriately constructed subset of the # benchmark queries. # # It must take the following arguments: # # # Examples of HMMER3 benchmark: # ./pmark-master.pl ~/releases/hmmer-release/build-icc ~/releases/hmmer-release h3-results 100 pmark ./pmark-h3 # ./pmark-master.pl ~/releases/hmmer-release/build-icc ~/releases/hmmer-release h2-results-ls 100 pmark ./pmark-h2-ls # ./pmark-master.pl ~/releases/hmmer-release/build-icc ~/releases/hmmer-release h2-results-fs 100 pmark ./pmark-h2-fs # $top_builddir = shift; $top_srcdir = shift; $resultdir = shift; $ncpu = shift; $benchmark_pfx = shift; $pmark_script = shift; $tbl = "$benchmark_pfx.tbl"; $msafile = "$benchmark_pfx.msa"; $fafile = "$benchmark_pfx.fa"; if (-e $resultdir) { die("$resultdir exists");} system("mkdir $resultdir"); # Suck in the master table open(BENCHMARK_TBL, $tbl) || die; $n = 0; $pid = 0; $nseq = 0; while () { ($msaname[$n], $pid, $L, $nseq) = split; $alen{$msaname[$n]} = $L; $n++; } close BENCHMARK_TBL; # Sort it by alen - this helps load balance. sub by_alen { $alen{$b} <=> $alen{$a} } @sorted_msaname = sort by_alen @msaname; # Create subtables. for ($i = 0; $i < $n; $i++) { $subtbl[$i % $ncpu] .= $sorted_msaname[$i]; $subtbl[$i % $ncpu] .= "\n"; } # Output the subtables for ($i = 0; $i < $ncpu; $i++) { open(SUBTBL, ">$resultdir/tbl.$i") || die ("Failed to create $resultdir/tbl.$i"); print SUBTBL $subtbl[$i]; close SUBTBL; } # Submit all the individual profmark jobs for ($i = 0; $i < $ncpu; $i++) { system("qsub -V -cwd -b y -N $resultdir.$i -j y -o $resultdir/tbl$i.sge '$pmark_script $top_builddir $top_srcdir $resultdir $resultdir/tbl.$i $msafile $fafile $resultdir/tbl$i.out'"); }