Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2012-08-22 11:36:41 -04:00
commit 7df0abf49b
11 changed files with 253 additions and 124 deletions

View File

@ -64,12 +64,35 @@ public class GATKArgumentCollection {
@Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
public Integer readBufferSize = null;
// --------------------------------------------------------------------------------------------------------------
//
// GATKRunReport options
//
// --------------------------------------------------------------------------------------------------------------
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false)
public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
@Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false)
public File gatkKeyFile = null;
/**
* The GATKRunReport supports (as of GATK 2.2) tagging GATK runs with an arbitrary String tag that can be
* used to group together runs during later analysis. One use of this capability is to tag runs as GATK
* performance tests, so that the performance of the GATK over time can be assessed from the logs directly.
*
* Note that the tags do not conform to any ontology, so you are free to use any tags that you might find
* meaningful.
*/
@Argument(fullName = "tag", shortName = "tag", doc="Arbitrary tag string to identify this GATK run as part of a group of runs, for later analysis", required = false)
public String tag = "NA";
// --------------------------------------------------------------------------------------------------------------
//
// XXX
//
// --------------------------------------------------------------------------------------------------------------
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
public List<String> readFilters = new ArrayList<String>();

View File

@ -304,7 +304,7 @@ public class LocusIteratorByState extends LocusIterator {
final boolean isSingleElementCigar = nextElement == lastElement;
final CigarOperator nextOp = nextElement.getOperator(); // next cigar operator
final CigarOperator lastOp = lastElement.getOperator(); // last cigar operator
final int readOffset = state.getReadOffset(); // the base offset on this read
int readOffset = state.getReadOffset(); // the base offset on this read
final boolean isBeforeDeletion = nextOp == CigarOperator.DELETION;
final boolean isAfterDeletion = lastOp == CigarOperator.DELETION;
@ -331,6 +331,9 @@ public class LocusIteratorByState extends LocusIterator {
String insertedBaseString = null;
if (nextOp == CigarOperator.I) {
final int insertionOffset = isSingleElementCigar ? 0 : 1;
// TODO -- someone please implement a better fix for the single element insertion CIGAR!
if (isSingleElementCigar)
readOffset -= (nextElement.getLength() - 1); // LIBS has passed over the insertion bases!
insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + insertionOffset, readOffset + insertionOffset + nextElement.getLength()));
}

View File

@ -138,6 +138,9 @@ public class GATKRunReport {
@Element(required = true, name = "iterations")
private long nIterations;
@Element(required = true, name = "tag")
private String tag;
public enum PhoneHomeOption {
/** Disable phone home */
NO_ET,
@ -186,6 +189,8 @@ public class GATKRunReport {
nIterations = engine.getCumulativeMetrics().getNumIterations();
}
tag = engine.getArguments().tag;
// user and hostname -- information about the runner of the GATK
userName = System.getProperty("user.name");
hostName = Utils.resolveHostname();

View File

@ -286,6 +286,7 @@ public class VariantDataManager {
case INDEL:
case MIXED:
case SYMBOLIC:
case STRUCTURAL_INDEL:
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL );
default:
return false;

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.utils;
import net.sf.samtools.util.StringUtil;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.util.Arrays;
@ -444,29 +445,8 @@ public class BaseUtils {
* @param bases the bases
* @return the upper cased version
*/
static public byte[] convertToUpperCase(final byte[] bases) {
for ( int i = 0; i < bases.length; i++ ) {
if ( (char)bases[i] >= 'a' )
bases[i] = toUpperCaseBase(bases[i]);
}
return bases;
}
static public byte toUpperCaseBase(final byte base) {
switch (base) {
case 'a':
return 'A';
case 'c':
return 'C';
case 'g':
return 'G';
case 't':
return 'T';
case 'n':
return 'N';
default:
return base;
}
static public void convertToUpperCase(final byte[] bases) {
StringUtil.toUpperCase(bases);
}
/**

View File

@ -105,7 +105,7 @@ public class Allele implements Comparable<Allele> {
if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele");
}
else {
bases = BaseUtils.convertToUpperCase(bases);
BaseUtils.convertToUpperCase(bases);
}
this.isRef = isRef;

View File

@ -208,19 +208,18 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
}
/**
* Test to make sure that reads supporting only an indel (example cigar string: 76I) do
* not negatively influence the ordering of the pileup.
* Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly
*/
@Test
public void testWholeIndelReadTest2() {
public void testWholeIndelReadRepresentedTest() {
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
SAMRecord read = ArtificialSAMUtils.createArtificialRead(header,"read",0,secondLocus,1);
read.setReadBases(Utils.dupBytes((byte) 'A', 1));
read.setBaseQualities(Utils.dupBytes((byte) '@', 1));
read.setCigarString("1I");
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,secondLocus,1);
read1.setReadBases(Utils.dupBytes((byte) 'A', 1));
read1.setBaseQualities(Utils.dupBytes((byte) '@', 1));
read1.setCigarString("1I");
List<SAMRecord> reads = Arrays.asList(read);
List<SAMRecord> reads = Arrays.asList(read1);
// create the iterator by state with the fake reads and fake records
li = makeLTBS(reads, createTestReadProperties());
@ -234,6 +233,26 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
Assert.assertFalse(pe.isAfterInsertion());
Assert.assertEquals(pe.getEventBases(), "A");
}
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,secondLocus,10);
read2.setReadBases(Utils.dupBytes((byte) 'A', 10));
read2.setBaseQualities(Utils.dupBytes((byte) '@', 10));
read2.setCigarString("10I");
reads = Arrays.asList(read2);
// create the iterator by state with the fake reads and fake records
li = makeLTBS(reads, createTestReadProperties());
while(li.hasNext()) {
AlignmentContext alignmentContext = li.next();
ReadBackedPileup p = alignmentContext.getBasePileup();
Assert.assertTrue(p.getNumberOfElements() == 1);
PileupElement pe = p.iterator().next();
Assert.assertTrue(pe.isBeforeInsertion());
Assert.assertFalse(pe.isAfterInsertion());
Assert.assertEquals(pe.getEventBases(), "AAAAAAAAAA");
}
}
private static ReadProperties createTestReadProperties() {

View File

@ -105,8 +105,7 @@ class QCommandLine extends CommandLineProgram with Logging {
def execute = {
if (settings.qSettings.runName == null)
settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName)
qGraph.settings = settings
qGraph.initializeWithSettings(settings)
val allQScripts = pluginManager.createAllTypes();
for (script <- allQScripts) {
@ -137,26 +136,9 @@ class QCommandLine extends CommandLineProgram with Logging {
logger.info("Script %s with %d total jobs".format(if (success) "completed successfully" else "failed", functionsAndStatus.size))
if (!settings.disableJobReport) {
val jobStringName = {
if (settings.jobReportFile != null)
settings.jobReportFile
else
settings.qSettings.runName + ".jobreport.txt"
}
if (!shuttingDown) {
val reportFile = IOUtils.absolute(settings.qSettings.runDirectory, jobStringName)
logger.info("Writing JobLogging GATKReport to file " + reportFile)
QJobReport.printReport(functionsAndStatus, reportFile)
if (settings.run) {
val pdfFile = IOUtils.absolute(settings.qSettings.runDirectory, FilenameUtils.removeExtension(jobStringName) + ".pdf")
logger.info("Plotting JobLogging GATKReport to file " + pdfFile)
QJobReport.plotReport(reportFile, pdfFile)
}
}
}
// write the final complete job report
logger.info("Writing final jobs report...")
qGraph.writeJobsReport()
if (!qGraph.success) {
logger.info("Done with errors")

View File

@ -39,7 +39,7 @@ import collection.immutable.{TreeSet, TreeMap}
import org.broadinstitute.sting.queue.function.scattergather.{ScatterFunction, CloneFunction, GatherFunction, ScatterGatherableFunction}
import java.util.Date
import org.broadinstitute.sting.utils.Utils
import org.apache.commons.io.{FileUtils, IOUtils}
import org.apache.commons.io.{FilenameUtils, FileUtils, IOUtils}
import java.io.{OutputStreamWriter, File}
/**
@ -71,6 +71,16 @@ class QGraph extends Logging {
private val inProcessManager = new InProcessJobManager
private def managers = Seq[Any](inProcessManager, commandLineManager)
/**
* If true, we will write out incremental job reports
*/
private val INCREMENTAL_JOBS_REPORT = true
/**
* Holds the optional jobInfoReporter structure
*/
private var jobInfoReporter: QJobsReporter = null
private class StatusCounts {
var pending = 0
var running = 0
@ -79,6 +89,19 @@ class QGraph extends Logging {
}
private val statusCounts = new StatusCounts
/**
* Final initialization step of this QGraph -- tell it runtime setting information
*
* The settings aren't necessarily available until after this QGraph object has been constructed, so
* this function must be called once the QGraphSettings have been filled in.
*
* @param settings
*/
def initializeWithSettings(settings: QGraphSettings) {
this.settings = settings
this.jobInfoReporter = createJobsReporter()
}
/**
* Adds a QScript created CommandLineFunction to the graph.
* @param command Function to add to the graph.
@ -467,6 +490,12 @@ class QGraph extends Logging {
checkRetryJobs(failedJobs)
}
// incremental
if ( logNextStatusCounts && INCREMENTAL_JOBS_REPORT ) {
logger.info("Writing incremental jobs reports...")
writeJobsReport(false)
}
readyJobs ++= getReadyJobs
}
@ -1084,6 +1113,39 @@ class QGraph extends Logging {
}
}
/**
* Create the jobsReporter for this QGraph, based on the settings data.
*
* Must be called after settings has been initialized properly
*
* @return
*/
private def createJobsReporter(): QJobsReporter = {
val jobStringName = if (settings.jobReportFile != null)
settings.jobReportFile
else
settings.qSettings.runName + ".jobreport.txt"
val reportFile = org.broadinstitute.sting.utils.io.IOUtils.absolute(settings.qSettings.runDirectory, jobStringName)
val pdfFile = if ( settings.run )
Some(org.broadinstitute.sting.utils.io.IOUtils.absolute(settings.qSettings.runDirectory, FilenameUtils.removeExtension(jobStringName) + ".pdf"))
else
None
new QJobsReporter(settings.disableJobReport, reportFile, pdfFile)
}
/**
* Write, if possible, the jobs report
*/
def writeJobsReport(plot: Boolean = true) {
// note: the previous logic didn't write the job report if the system was shutting down, but I don't
// see any reason not to write the job report
if ( jobInfoReporter != null )
jobInfoReporter.write(this, plot)
}
/**
* Returns true if the graph was shutdown instead of exiting on its own.
*/

View File

@ -25,13 +25,8 @@
package org.broadinstitute.sting.queue.util
import org.broadinstitute.sting.queue.function.QFunction
import org.broadinstitute.sting.gatk.report.{GATKReportTable, GATKReport}
import org.broadinstitute.sting.utils.exceptions.UserException
import org.broadinstitute.sting.gatk.report.GATKReportTable
import org.broadinstitute.sting.queue.engine.JobRunInfo
import java.io.{PrintStream, File}
import org.broadinstitute.sting.utils.R.{RScriptLibrary, RScriptExecutor}
import org.broadinstitute.sting.utils.io.Resource
import org.apache.commons.io.{IOUtils, FileUtils}
/**
* A mixin to add Job info to the class
@ -98,31 +93,10 @@ trait QJobReport extends Logging {
}
object QJobReport {
val JOB_REPORT_QUEUE_SCRIPT = "queueJobReport.R"
// todo -- fixme to have a unique name for Scatter/gather jobs as well
var seenCounter = 1
var seenNames = Set[String]()
def printReport(jobsRaw: Map[QFunction, JobRunInfo], dest: File) {
val jobs = jobsRaw.filter(_._2.isFilledIn).filter(_._1.includeInReport)
jobs foreach {case (qf, info) => qf.setRunInfo(info)}
val stream = new PrintStream(FileUtils.openOutputStream(dest))
try {
printJobLogging(jobs.keys.toSeq, stream)
} finally {
IOUtils.closeQuietly(stream)
}
}
def plotReport(reportFile: File, pdfFile: File) {
val executor = new RScriptExecutor
executor.addLibrary(RScriptLibrary.GSALIB)
executor.addScript(new Resource(JOB_REPORT_QUEUE_SCRIPT, classOf[QJobReport]))
executor.addArgs(reportFile.getAbsolutePath, pdfFile.getAbsolutePath)
executor.exec()
}
def workAroundSameJobNames(func: QFunction):String = {
if ( seenNames.apply(func.jobName) ) {
seenCounter += 1
@ -132,45 +106,4 @@ object QJobReport {
func.jobName
}
}
/**
* Prints the JobLogging logs to a GATKReport. First splits up the
* logs by group, and for each group generates a GATKReportTable
*/
private def printJobLogging(logs: Seq[QFunction], stream: PrintStream) {
// create the report
val report: GATKReport = new GATKReport
// create a table for each group of logs
for ( (group, groupLogs) <- groupLogs(logs) ) {
val keys = logKeys(groupLogs)
report.addTable(group, "Job logs for " + group, keys.size)
val table: GATKReportTable = report.getTable(group)
// add the columns
keys.foreach(table.addColumn(_))
for (log <- groupLogs) {
for ( key <- keys )
table.set(log.getReportName, key, log.getReportFeature(key))
}
}
report.print(stream)
}
private def groupLogs(logs: Seq[QFunction]): Map[String, Seq[QFunction]] = {
logs.groupBy(_.getReportGroup)
}
private def logKeys(logs: Seq[QFunction]): Set[String] = {
// the keys should be the same for each log, but we will check that
val keys = Set[String](logs(0).getReportFeatureNames : _*)
for ( log <- logs )
if ( keys.sameElements(Set(log.getReportFeatureNames)) )
throw new UserException(("All JobLogging jobs in the same group must have the same set of features. " +
"We found one with %s and another with %s").format(keys, log.getReportFeatureNames))
keys
}
}

View File

@ -0,0 +1,121 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.util
import java.io.{PrintStream, File}
import org.broadinstitute.sting.utils.io.{Resource}
import org.broadinstitute.sting.queue.engine.{JobRunInfo, QGraph}
import org.broadinstitute.sting.queue.function.QFunction
import org.broadinstitute.sting.utils.R.{RScriptLibrary, RScriptExecutor}
import org.broadinstitute.sting.gatk.report.{GATKReportTable, GATKReport}
import org.broadinstitute.sting.utils.exceptions.UserException
import org.apache.commons.io.{FileUtils, IOUtils}
/**
* Writes out RunInfo to a GATKReport
*/
class QJobsReporter(val disabled: Boolean, val reportFile: File, val pdfFile: Option[File]) extends Logging {
private val JOB_REPORT_QUEUE_SCRIPT = "queueJobReport.R"
/**
* Write out a job report based on the finished jobs graph
* @param jobGraph
* @param enabledPlotting if true, we will plot the report as well with the JOB_REPORT_QUEUE_SCRIPT
*/
def write(jobGraph: QGraph, enabledPlotting: Boolean) {
if ( ! disabled ) {
logger.info("Writing JobLogging GATKReport to file " + reportFile)
printReport(jobGraph.getFunctionsAndStatus, reportFile)
if ( enabledPlotting )
pdfFile match {
case Some(file) =>
logger.info("Plotting JobLogging GATKReport to file " + file)
plotReport(reportFile, file)
case None =>
}
}
}
private def printReport(jobsRaw: Map[QFunction, JobRunInfo], dest: File) {
val jobs = jobsRaw.filter(_._2.isFilledIn).filter(_._1.includeInReport)
jobs foreach {case (qf, info) => qf.setRunInfo(info)}
val stream = new PrintStream(FileUtils.openOutputStream(dest))
try {
printJobLogging(jobs.keys.toSeq, stream)
} finally {
IOUtils.closeQuietly(stream)
}
}
private def plotReport(reportFile: File, pdfFile: File) {
val executor = new RScriptExecutor
executor.addLibrary(RScriptLibrary.GSALIB)
executor.addScript(new Resource(JOB_REPORT_QUEUE_SCRIPT, classOf[QJobReport]))
executor.addArgs(reportFile.getAbsolutePath, pdfFile.getAbsolutePath)
executor.exec()
}
/**
* Prints the JobLogging logs to a GATKReport. First splits up the
* logs by group, and for each group generates a GATKReportTable
*/
private def printJobLogging(logs: Seq[QFunction], stream: PrintStream) {
// create the report
val report: GATKReport = new GATKReport
// create a table for each group of logs
for ( (group, groupLogs) <- groupLogs(logs) ) {
val keys = logKeys(groupLogs)
report.addTable(group, "Job logs for " + group, keys.size)
val table: GATKReportTable = report.getTable(group)
// add the columns
keys.foreach(table.addColumn(_))
for (log <- groupLogs) {
for ( key <- keys )
table.set(log.getReportName, key, log.getReportFeature(key))
}
}
report.print(stream)
}
private def groupLogs(logs: Seq[QFunction]): Map[String, Seq[QFunction]] = {
logs.groupBy(_.getReportGroup)
}
private def logKeys(logs: Seq[QFunction]): Set[String] = {
// the keys should be the same for each log, but we will check that
val keys = Set[String](logs(0).getReportFeatureNames : _*)
for ( log <- logs )
if ( keys.sameElements(Set(log.getReportFeatureNames)) )
throw new UserException(("All JobLogging jobs in the same group must have the same set of features. " +
"We found one with %s and another with %s").format(keys, log.getReportFeatureNames))
keys
}
}