diff --git a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java index 830fd111b..59357e1c4 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java @@ -37,12 +37,6 @@ import java.util.*; public class AlleleBiasedDownsamplingUtils { - private static final ArrayList[] alleleStratifiedElements = new ArrayList[4]; - static { - for ( int i = 0; i < 4; i++ ) - alleleStratifiedElements[i] = new ArrayList(); - } - /** * Computes an allele biased version of the given pileup * @@ -51,13 +45,17 @@ public class AlleleBiasedDownsamplingUtils { * @param log logging output * @return allele biased pileup */ - public synchronized static ReadBackedPileup createAlleleBiasedBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) { + public static ReadBackedPileup createAlleleBiasedBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) { // special case removal of all or no reads if ( downsamplingFraction <= 0.0 ) return pileup; if ( downsamplingFraction >= 1.0 ) return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList()); + final ArrayList[] alleleStratifiedElements = new ArrayList[4]; + for ( int i = 0; i < 4; i++ ) + alleleStratifiedElements[i] = new ArrayList(); + // start by stratifying the reads by the alleles they represent at this position for( final PileupElement pe : pileup ) { // abort if we have a reduced read - we do not want to remove it! diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index a50219f48..780ef9e0b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -122,12 +122,12 @@ public class BQSRIntegrationTest extends WalkerTest { public Object[][] createPRTestData() { List tests = new ArrayList(); - tests.add(new Object[]{1, new PRTest(" -qq -1", "a1d87da5dcbde35170d6ba6bc3ee2812")}); - tests.add(new Object[]{1, new PRTest(" -qq 6", "a0fecae6d0e5ab9917862fa306186d10")}); + tests.add(new Object[]{1, new PRTest(" -qq -1", "5226c06237b213b9e9b25a32ed92d09a")}); + tests.add(new Object[]{1, new PRTest(" -qq 6", "b592a5c62b952a012e18adb898ea9c33")}); tests.add(new Object[]{1, new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")}); for ( final int nct : Arrays.asList(1, 2, 4) ) { - tests.add(new Object[]{nct, new PRTest("", "d1bbb4ce6aa93e866f106f8b11d888ed")}); + tests.add(new Object[]{nct, new PRTest("", "ab2f209ab98ad3432e208cbd524a4c4a")}); } return tests.toArray(new Object[][]{}); diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 54ade61f6..5d7eba16c 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -406,7 +406,7 @@ public abstract class ArgumentTypeDescriptor { else throw new UserException.CommandLineException( String.format("Failed to parse value %s for argument %s. Message: %s", - value.asString(), fieldName, e.getMessage())); + value, fieldName, e.getMessage())); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 67e5ad95b..b7000e0ee 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -64,6 +64,7 @@ import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor; import java.io.File; import java.util.*; +import java.util.concurrent.TimeUnit; /** * A GenomeAnalysisEngine that runs a specified walker. @@ -73,6 +74,7 @@ public class GenomeAnalysisEngine { * our log, which we want to capture anything from this class */ private static Logger logger = Logger.getLogger(GenomeAnalysisEngine.class); + public static final long NO_RUNTIME_LIMIT = -1; /** * The GATK command-line argument parsing code. @@ -1090,6 +1092,33 @@ public class GenomeAnalysisEngine { public String createApproximateCommandLineArgumentString(Object... argumentProviders) { return CommandLineUtils.createApproximateCommandLineArgumentString(parsingEngine,argumentProviders); } - + /** + * Does the current runtime in unit exceed the runtime limit, if one has been provided? + * + * @param runtime the runtime of this GATK instance in minutes + * @param unit the time unit of runtime + * @return false if not limit was requested or if runtime <= the limit, true otherwise + */ + public boolean exceedsRuntimeLimit(final long runtime, final TimeUnit unit) { + if ( runtime < 0 ) throw new IllegalArgumentException("runtime must be >= 0 but got " + runtime); + + if ( getArguments().maxRuntime == NO_RUNTIME_LIMIT ) + return false; + else { + final long actualRuntimeNano = TimeUnit.NANOSECONDS.convert(runtime, unit); + final long maxRuntimeNano = getRuntimeLimitInNanoseconds(); + return actualRuntimeNano > maxRuntimeNano; + } + } + + /** + * @return the runtime limit in nanoseconds, or -1 if no limit was specified + */ + public long getRuntimeLimitInNanoseconds() { + if ( getArguments().maxRuntime == NO_RUNTIME_LIMIT ) + return -1; + else + return TimeUnit.NANOSECONDS.convert(getArguments().maxRuntime, getArguments().maxRuntimeUnits); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 7875ced5a..e2b943582 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.IntervalBinding; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; @@ -44,6 +45,7 @@ import java.io.File; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.concurrent.TimeUnit; /** * @author aaron @@ -91,7 +93,7 @@ public class GATKArgumentCollection { // -------------------------------------------------------------------------------------------------------------- // - // XXX + // General features // // -------------------------------------------------------------------------------------------------------------- @@ -143,6 +145,12 @@ public class GATKArgumentCollection { @Argument(fullName = "disableRandomization",doc="Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator.") public boolean disableRandomization = false; + @Argument(fullName = "maxRuntime", shortName = "maxRuntime", doc="If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure. By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits", required = false) + public long maxRuntime = GenomeAnalysisEngine.NO_RUNTIME_LIMIT; + + @Argument(fullName = "maxRuntimeUnits", shortName = "maxRuntimeUnits", doc="The TimeUnit for maxRuntime", required = false) + public TimeUnit maxRuntimeUnits = TimeUnit.MINUTES; + // -------------------------------------------------------------------------------------------------------------- // // Downsampling Arguments diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index 31f2a469c..cc0161b2d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -123,7 +123,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar final ReduceTree reduceTree = new ReduceTree(this); initializeWalker(walker); - while (isShardTraversePending() || isTreeReducePending()) { + while (! abortExecution() && (isShardTraversePending() || isTreeReducePending())) { // Check for errors during execution. errorTracker.throwErrorIfPending(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 5b94e0767..f3c1ae91c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -63,7 +63,7 @@ public class LinearMicroScheduler extends MicroScheduler { final TraversalEngine traversalEngine = borrowTraversalEngine(this); for (Shard shard : shardStrategy ) { - if ( done || shard == null ) // we ran out of shards that aren't owned + if ( abortExecution() || done || shard == null ) // we ran out of shards that aren't owned break; if(shard.getShardType() == Shard.ShardType.LOCUS) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index df4ed9ef8..38170040a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.gatk.traversals.*; import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.AutoFormattingTime; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -52,6 +53,7 @@ import javax.management.ObjectName; import java.io.File; import java.lang.management.ManagementFactory; import java.util.*; +import java.util.concurrent.TimeUnit; /** @@ -269,6 +271,26 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { this.threadEfficiencyMonitor = threadEfficiencyMonitor; } + /** + * Should we stop all execution work and exit gracefully? + * + * Returns true in the case where some external signal or time limit has been received, indicating + * that this GATK shouldn't continue executing. This isn't a kill signal, it is really a "shutdown + * gracefully at the next opportunity" signal. Concrete implementations of the MicroScheduler + * examine this value as often as reasonable and, if it returns true, stop what they are doing + * at the next available opportunity, shutdown their resources, call notify done, and return. + * + * @return true if we should abort execution, or false otherwise + */ + protected boolean abortExecution() { + final boolean abort = engine.exceedsRuntimeLimit(progressMeter.getRuntimeInNanoseconds(), TimeUnit.NANOSECONDS); + if ( abort ) { + final AutoFormattingTime aft = new AutoFormattingTime(TimeUnit.SECONDS.convert(engine.getRuntimeLimitInNanoseconds(), TimeUnit.NANOSECONDS), 1, 4); + logger.info("Aborting execution (cleanly) because the runtime has exceeded the requested maximum " + aft); + } + return abort; + } + /** * Walks a walker over the given list of intervals. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 2b46414a8..5c7da82d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -364,7 +364,7 @@ public class VariantContextAdaptors { long end = hapmap.getEnd(); if ( deletionLength > 0 ) - end += deletionLength; + end += (deletionLength - 1); VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make(); return vc; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java index 7c73f59e9..0165c6cf3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java @@ -438,8 +438,6 @@ public class SomaticIndelDetector extends ReadWalker { location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1); - normalSamples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeaders().get(0)); - try { // we already checked that bedOutput and output_file are not set simultaneously if ( bedOutput != null ) bedWriter = new FileWriter(bedOutput); @@ -1152,8 +1150,9 @@ public class SomaticIndelDetector extends ReadWalker { GenotypesContext genotypes = GenotypesContext.create(); for ( String sample : normalSamples ) { - final GenotypeBuilder gb = new GenotypeBuilder(sample); - gb.attributes(call.makeStatsAttributes(null)); + GenotypeBuilder gb = new GenotypeBuilder(sample); + + gb=call.addStatsAttributes(gb); gb.alleles(! discard_event ? alleles // we made a call - put actual het genotype here: : homref_alleles); // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all) @@ -1200,8 +1199,11 @@ public class SomaticIndelDetector extends ReadWalker { if ( start == 0 ) return; - Map attrsNormal = nCall.makeStatsAttributes(null); - Map attrsTumor = tCall.makeStatsAttributes(null); + GenotypeBuilder nGB = new GenotypeBuilder(); + GenotypeBuilder tGB = new GenotypeBuilder(); + + nCall.addStatsAttributes(nGB); + tCall.addStatsAttributes(tGB); Map attrs = new HashMap(); @@ -1242,11 +1244,11 @@ public class SomaticIndelDetector extends ReadWalker { GenotypesContext genotypes = GenotypesContext.create(); for ( String sample : normalSamples ) { - genotypes.add(GenotypeBuilder.create(sample, homRefN ? homRefAlleles : alleles, attrsNormal)); + genotypes.add(nGB.name(sample).alleles(homRefN ? homRefAlleles : alleles).make()); } for ( String sample : tumorSamples ) { - genotypes.add(GenotypeBuilder.create(sample, homRefT ? homRefAlleles : alleles, attrsTumor)); + genotypes.add(tGB.name(sample).alleles(homRefT ? homRefAlleles : alleles).make()); } Set filters = null; @@ -1254,14 +1256,6 @@ public class SomaticIndelDetector extends ReadWalker { filters = new HashSet(); filters.add("NoCall"); } - if ( nCall.getCoverage() < minNormalCoverage ) { - if ( filters == null ) filters = new HashSet(); - filters.add("NCov"); - } - if ( tCall.getCoverage() < minCoverage ) { - if ( filters == null ) filters = new HashSet(); - filters.add("TCov"); - } VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles) .genotypes(genotypes).filters(filters).attributes(attrs).make(); @@ -1844,6 +1838,38 @@ public class SomaticIndelDetector extends ReadWalker { VCFIndelAttributes.recordStrandCounts(strand_cons.first,strand_cons.second,strand_ref.first,strand_ref.second,attr); return attr; } + + + /** + * Adds alignment statistics directly into the genotype builder object. + * + * @param gb + * @return + */ + public GenotypeBuilder addStatsAttributes(GenotypeBuilder gb) { + if ( gb == null ) gb = new GenotypeBuilder(); + + gb = VCFIndelAttributes.recordDepth(getConsensusVariantCount(),getAllVariantCount(),getCoverage(),gb); + + gb = VCFIndelAttributes.recordAvMM(getAvConsensusMismatches(),getAvRefMismatches(),gb); + + gb = VCFIndelAttributes.recordAvMapQ(getAvConsensusMapq(),getAvRefMapq(),gb); + + gb = VCFIndelAttributes.recordNQSMMRate(getNQSConsensusMMRate(),getNQSRefMMRate(),gb); + + gb = VCFIndelAttributes.recordNQSAvQ(getNQSConsensusAvQual(),getNQSRefAvQual(),gb); + + gb = VCFIndelAttributes.recordOffsetFromStart(from_start_median,from_start_mad,gb); + + gb = VCFIndelAttributes.recordOffsetFromEnd(from_end_median,from_end_mad,gb); + + PrimitivePair.Int strand_cons = getConsensusStrandCounts(); + PrimitivePair.Int strand_ref = getRefStrandCounts(); + + gb = VCFIndelAttributes.recordStrandCounts(strand_cons.first,strand_cons.second,strand_ref.first,strand_ref.second,gb); + return gb; + } + } interface IndelListener { @@ -2170,18 +2196,18 @@ class VCFIndelAttributes { public static Set getAttributeHeaderLines() { Set lines = new HashSet(); - lines.add(new VCFFormatHeaderLine(ALLELIC_DEPTH_KEY, 2, VCFHeaderLineType.Integer, "# of reads supporting consensus indel/reference at the site")); + lines.add(new VCFFormatHeaderLine(ALLELIC_DEPTH_KEY, 2, VCFHeaderLineType.Integer, "# of reads supporting consensus reference/indel at the site")); lines.add(new VCFFormatHeaderLine(DEPTH_TOTAL_KEY, 1, VCFHeaderLineType.Integer, "Total coverage at the site")); - lines.add(new VCFFormatHeaderLine(MAPQ_KEY, 2, VCFHeaderLineType.Float, "Average mapping qualities of consensus indel-supporting reads/reference-supporting reads")); + lines.add(new VCFFormatHeaderLine(MAPQ_KEY, 2, VCFHeaderLineType.Float, "Average mapping qualities of ref-/consensus indel-supporting reads")); - lines.add(new VCFFormatHeaderLine(MM_KEY, 2, VCFHeaderLineType.Float, "Average # of mismatches per consensus indel-supporting read/per reference-supporting read")); + lines.add(new VCFFormatHeaderLine(MM_KEY, 2, VCFHeaderLineType.Float, "Average # of mismatches per ref-/consensus indel-supporting read")); - lines.add(new VCFFormatHeaderLine(NQS_MMRATE_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: fraction of mismatching bases in consensus indel-supporting reads/in reference-supporting reads")); + lines.add(new VCFFormatHeaderLine(NQS_MMRATE_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: fraction of mismatching bases in ref/consensus indel-supporting reads")); - lines.add(new VCFFormatHeaderLine(NQS_AVQ_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: average quality of bases from consensus indel-supporting reads/from reference-supporting reads")); + lines.add(new VCFFormatHeaderLine(NQS_AVQ_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: average quality of bases in ref-/consensus indel-supporting reads")); - lines.add(new VCFFormatHeaderLine(STRAND_COUNT_KEY, 4, VCFHeaderLineType.Integer, "Strandness: counts of forward-/reverse-aligned indel-supporting reads / forward-/reverse-aligned reference supporting reads")); + lines.add(new VCFFormatHeaderLine(STRAND_COUNT_KEY, 4, VCFHeaderLineType.Integer, "Strandness: counts of forward-/reverse-aligned reference and indel-supporting reads (FwdRef,RevRef,FwdIndel,RevIndel)")); lines.add(new VCFFormatHeaderLine(RSTART_OFFSET_KEY, 2, VCFHeaderLineType.Integer, "Median/mad of indel offsets from the starts of the reads")); lines.add(new VCFFormatHeaderLine(REND_OFFSET_KEY, 2, VCFHeaderLineType.Integer, "Median/mad of indel offsets from the ends of the reads")); @@ -2194,39 +2220,72 @@ class VCFIndelAttributes { return attrs; } + public static GenotypeBuilder recordStrandCounts(int cnt_cons_fwd, int cnt_cons_rev, int cnt_ref_fwd, int cnt_ref_rev, + GenotypeBuilder gb) { + return gb.attribute(STRAND_COUNT_KEY, new Integer[] {cnt_ref_fwd, cnt_ref_rev,cnt_cons_fwd, cnt_cons_rev } ); + } + public static Map recordDepth(int cnt_cons, int cnt_indel, int cnt_total, Map attrs) { - attrs.put(ALLELIC_DEPTH_KEY, new Integer[] {cnt_cons, cnt_indel} ); + attrs.put(ALLELIC_DEPTH_KEY, new Integer[] {cnt_total-cnt_indel, cnt_cons} ); attrs.put(DEPTH_TOTAL_KEY, cnt_total); return attrs; } + public static GenotypeBuilder recordDepth(int cnt_cons, int cnt_indel, int cnt_total, GenotypeBuilder gb) { + return gb.AD(new int[] {cnt_total-cnt_indel,cnt_cons} ).DP(cnt_total); + } + public static Map recordAvMapQ(double cons, double ref, Map attrs) { - attrs.put(MAPQ_KEY, new Float[] {(float)cons, (float)ref} ); + attrs.put(MAPQ_KEY, new Float[] {(float)ref, (float)cons} ); return attrs; } + public static GenotypeBuilder recordAvMapQ(double cons, double ref, GenotypeBuilder gb) { + return gb.attribute(MAPQ_KEY,new float[] {(float)ref, (float)cons} ); + } + public static Map recordAvMM(double cons, double ref, Map attrs) { - attrs.put(MM_KEY, new Float[] {(float)cons, (float)ref} ); + attrs.put(MM_KEY, new Float[] {(float)ref, (float)cons} ); return attrs; } + public static GenotypeBuilder recordAvMM(double cons, double ref, GenotypeBuilder gb) { + return gb.attribute(MM_KEY, new float[] {(float)ref, (float)cons} ); + } + public static Map recordNQSMMRate(double cons, double ref, Map attrs) { - attrs.put(NQS_MMRATE_KEY, new Float[] {(float)cons, (float)ref} ); + attrs.put(NQS_MMRATE_KEY, new Float[] {(float)ref, (float)cons} ); return attrs; } + public static GenotypeBuilder recordNQSMMRate(double cons, double ref, GenotypeBuilder gb) { + return gb.attribute(NQS_MMRATE_KEY, new float[] {(float)ref, (float)cons} ); + } + public static Map recordNQSAvQ(double cons, double ref, Map attrs) { - attrs.put(NQS_AVQ_KEY, new Float[] {(float)cons, (float)ref} ); + attrs.put(NQS_AVQ_KEY, new float[] {(float)ref, (float)cons} ); return attrs; } + public static GenotypeBuilder recordNQSAvQ(double cons, double ref, GenotypeBuilder gb) { + return gb.attribute(NQS_AVQ_KEY, new float[] {(float)ref, (float)cons} ); + } + public static Map recordOffsetFromStart(int median, int mad, Map attrs) { attrs.put(RSTART_OFFSET_KEY, new Integer[] {median, mad} ); return attrs; } + public static GenotypeBuilder recordOffsetFromStart(int median, int mad, GenotypeBuilder gb) { + return gb.attribute(RSTART_OFFSET_KEY, new int[] {median, mad} ); + } + public static Map recordOffsetFromEnd(int median, int mad, Map attrs) { attrs.put(REND_OFFSET_KEY, new Integer[] {median, mad} ); return attrs; } + + public static GenotypeBuilder recordOffsetFromEnd(int median, int mad, GenotypeBuilder gb) { + return gb.attribute(REND_OFFSET_KEY, new int[] {median, mad} ); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java index e74684c53..78bcf1228 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java @@ -55,8 +55,8 @@ public class AssessReducedQuals extends LocusWalker implem @Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false) public int qual_epsilon = 0; - @Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on") - public int debugLevel = 0; + @Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on") // TODO -- best to make this optional + public int debugLevel = 0; // TODO -- best to make this an enum or boolean @Output protected PrintStream out; @@ -114,7 +114,11 @@ public class AssessReducedQuals extends LocusWalker implem return quals; } + // TODO -- arguments/variables should be final, not method declaration private final boolean isGoodRead(PileupElement p, List tags){ + // TODO -- this isn't quite right. You don't need the tags here. Instead, you want to check whether the read itself (which + // TODO -- you can get from the PileupElement) is a reduced read (not all reads from the reduced bam are reduced) and only + // TODO -- for them do you want to ignore that min mapping quality cutoff (but you *do* still want the min base cutoff). return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 5f80f77a4..059e9c5fb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -160,7 +160,7 @@ public class VariantsToVCF extends RodWalker { Map alleleMap = new HashMap(2); alleleMap.put(RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion())); - alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); + alleleMap.put(RawHapMapFeature.INSERTION, Allele.create((char)ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); hapmap.setActualAlleles(alleleMap); // also, use the correct positioning for insertions diff --git a/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java b/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java index 8964c16cb..4455666e8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java +++ b/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java @@ -4,12 +4,21 @@ package org.broadinstitute.sting.utils; * Simple utility class that makes it convenient to print unit adjusted times */ public class AutoFormattingTime { - double timeInSeconds; // in Seconds - int precision; // for format + private final int width; // for format + private final int precision; // for format - public AutoFormattingTime(double timeInSeconds, int precision) { + double timeInSeconds; // in Seconds + private final String formatString; + + public AutoFormattingTime(double timeInSeconds, final int width, int precision) { + this.width = width; this.timeInSeconds = timeInSeconds; this.precision = precision; + this.formatString = "%" + width + "." + precision + "f %s"; + } + + public AutoFormattingTime(double timeInSeconds, int precision) { + this(timeInSeconds, 6, precision); } public AutoFormattingTime(double timeInSeconds) { @@ -20,6 +29,20 @@ public class AutoFormattingTime { return timeInSeconds; } + /** + * @return the precision (a la format's %WIDTH.PERCISIONf) + */ + public int getWidth() { + return width; + } + + /** + * @return the precision (a la format's %WIDTH.PERCISIONf) + */ + public int getPrecision() { + return precision; + } + /** * Instead of 10000 s, returns 2.8 hours * @return @@ -48,6 +71,6 @@ public class AutoFormattingTime { } } - return String.format("%6."+precision+"f %s", unitTime, unit); + return String.format(formatString, unitTime, unit); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java index 2663e848f..44a3e9af3 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java @@ -159,6 +159,7 @@ public class VCFHeader { */ public void addMetaDataLine(VCFHeaderLine headerLine) { mMetaData.add(headerLine); + loadMetaDataMaps(); } /** @@ -236,7 +237,6 @@ public class VCFHeader { + VCFConstants.GENOTYPE_PL_KEY + " field. As the GATK now only manages PL fields internally" + " automatically adding a corresponding PL field to your VCF header"); addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification")); - loadMetaDataMaps(); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java index 69cf52fc2..a8715e242 100755 --- a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java +++ b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.utils.progressmeter; +import com.google.java.contract.Ensures; import com.google.java.contract.Invariant; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.*; @@ -200,6 +201,14 @@ public class ProgressMeter { "Location", processingUnitName, processingUnitName)); } + /** + * @return the current runtime in nanoseconds + */ + @Ensures("result >= 0") + public long getRuntimeInNanoseconds() { + return timer.getElapsedTimeNano(); + } + /** * Utility routine that prints out process information (including timing) every N records or * every M seconds, for N and M set in global variables. diff --git a/public/java/test/org/broadinstitute/sting/gatk/MaxRuntimeIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/MaxRuntimeIntegrationTest.java new file mode 100644 index 000000000..6cfd7bf46 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/MaxRuntimeIntegrationTest.java @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk; + +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.SimpleTimer; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.concurrent.TimeUnit; + +/** + * + */ +public class MaxRuntimeIntegrationTest extends WalkerTest { + private static final long STARTUP_TIME = TimeUnit.NANOSECONDS.convert(20, TimeUnit.SECONDS); + + private class MaxRuntimeTestProvider extends TestDataProvider { + final long maxRuntime; + final TimeUnit unit; + + public MaxRuntimeTestProvider(final long maxRuntime, final TimeUnit unit) { + super(MaxRuntimeTestProvider.class); + this.maxRuntime = maxRuntime; + this.unit = unit; + setName(String.format("Max runtime test : %d of %s", maxRuntime, unit)); + } + + public long expectedMaxRuntimeNano() { + return TimeUnit.NANOSECONDS.convert(maxRuntime, unit) + STARTUP_TIME; + } + } + + @DataProvider(name = "MaxRuntimeProvider") + public Object[][] makeMaxRuntimeProvider() { + for ( final TimeUnit requestedUnits : Arrays.asList(TimeUnit.NANOSECONDS, TimeUnit.MILLISECONDS, TimeUnit.SECONDS, TimeUnit.MINUTES) ) + new MaxRuntimeTestProvider(requestedUnits.convert(30, TimeUnit.SECONDS), requestedUnits); + + return MaxRuntimeTestProvider.getTests(MaxRuntimeTestProvider.class); + } + + // + // Loop over errors to throw, make sure they are the errors we get back from the engine, regardless of NT type + // + @Test(enabled = true, dataProvider = "MaxRuntimeProvider", timeOut = 60 * 1000) + public void testMaxRuntime(final MaxRuntimeTestProvider cfg) { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -R " + hg18Reference + + " -I " + validationDataLocation + "NA12878.WEx.downsampled20x.bam -o /dev/null" + + " -maxRuntime " + cfg.maxRuntime + " -maxRuntimeUnits " + cfg.unit, 0, + Collections.emptyList()); + final SimpleTimer timer = new SimpleTimer().start(); + executeTest("Max runtime " + cfg, spec); + final long actualRuntimeNano = timer.getElapsedTimeNano(); + + Assert.assertTrue(actualRuntimeNano < cfg.expectedMaxRuntimeNano(), + "Actual runtime " + TimeUnit.SECONDS.convert(actualRuntimeNano, TimeUnit.NANOSECONDS) + + " exceeded max. tolerated runtime " + TimeUnit.SECONDS.convert(cfg.expectedMaxRuntimeNano(), TimeUnit.NANOSECONDS) + + " given requested runtime " + cfg.maxRuntime + " " + cfg.unit); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 1b2fbb848..9212d0e53 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -117,24 +117,24 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "3d25ea660275a455ca443a786bff3d32"; + String md5 = "d408b4661b820ed86272415b8ea08780"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( - baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, + baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, Arrays.asList(md5)); executeTest("test parallelization (single thread)", spec1); GenomeAnalysisEngine.resetRandomGenerator(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 2", 1, + baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 2", 1, Arrays.asList(md5)); executeTest("test parallelization (2 threads)", spec2); GenomeAnalysisEngine.resetRandomGenerator(); WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( - baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 4", 1, + baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 4", 1, Arrays.asList(md5)); executeTest("test parallelization (4 threads)", spec3); } @@ -157,7 +157,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSLOD() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("9f95cfe14d53a697c58247833bfd72a6")); + Arrays.asList("6ccb9bd88934e4272d0ce362dd35e603")); executeTest("test SLOD", spec); } @@ -451,13 +451,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("1b711c0966a2f76eb21801e73b76b758")); + Arrays.asList("9a7cd58b9e3d5b72608c0d529321deba")); executeTest("test calling on a ReducedRead BAM", spec); } @Test public void testReducedBamSNPs() { - testReducedCalling("SNP", "9ba4867cadb366746ee63e7a4afdb95e"); + testReducedCalling("SNP", "e7fc11baf208a1bca7b462d3148c936e"); } @Test diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java index 37614f15f..80b0b4ee2 100755 --- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java @@ -32,11 +32,12 @@ public class NanoSchedulerIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( "-T UnifiedGenotyper -R " + b37KGReference, - "-nosl --no_cmdline_in_header -G", + "--no_cmdline_in_header -G", //"--dbsnp " + b37dbSNP132, "-I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "-L 20:10,000,000-10,100,000", "-glm " + glm, + "--contamination_fraction_to_filter 0.0", "-nt " + nt, "-nct " + nct, "-o %s"