From 22bf0528280630b1ae16e04a465d85a0b6e2922d Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 1 Aug 2012 13:17:16 -0400 Subject: [PATCH 01/18] Fixing BQSR GATK docs --- .../sting/gatk/walkers/bqsr/BaseRecalibrator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index b2400a49d..06f42cbad 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -58,7 +58,7 @@ import java.util.ArrayList; * This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal operating * only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors and indicative * of poor base quality. This walker generates tables based on various user-specified covariates (such as read group, - * reported quality score, cycle, and dinucleotide). Since there is a large amount of data one can then calculate an empirical + * reported quality score, cycle, and context). Since there is a large amount of data one can then calculate an empirical * probability of error given the particular covariates seen at this site, where p(error) = num mismatches / num observations. * The output file is a table (of the several covariate values, num observations, num mismatches, empirical quality score). *

@@ -90,7 +90,7 @@ import java.util.ArrayList; *

Examples

*
  * java -Xmx4g -jar GenomeAnalysisTK.jar \
- *   -T BaseQualityScoreRecalibrator \
+ *   -T BaseRecalibrator \
  *   -I my_reads.bam \
  *   -R resources/Homo_sapiens_assembly18.fasta \
  *   -knownSites bundle/hg18/dbsnp_132.hg18.vcf \

From 4093909a56ad3366308da40656cf2317b594e7ab Mon Sep 17 00:00:00 2001
From: Ryan Poplin 
Date: Wed, 1 Aug 2012 14:30:24 -0400
Subject: [PATCH 02/18] Updating VQSR docs. Removing references to old best
 practices pages.

---
 .../sting/gatk/walkers/bqsr/BQSR.R            |  1 +
 .../ApplyRecalibration.java                   |  6 ++---
 .../VariantRecalibrator.java                  | 22 ++++---------------
 3 files changed, 7 insertions(+), 22 deletions(-)

diff --git a/public/R/scripts/org/broadinstitute/sting/gatk/walkers/bqsr/BQSR.R b/public/R/scripts/org/broadinstitute/sting/gatk/walkers/bqsr/BQSR.R
index fd301cd97..6c4dace1d 100644
--- a/public/R/scripts/org/broadinstitute/sting/gatk/walkers/bqsr/BQSR.R
+++ b/public/R/scripts/org/broadinstitute/sting/gatk/walkers/bqsr/BQSR.R
@@ -1,4 +1,5 @@
 library("ggplot2")
+library("tools") #For compactPDF in R 2.13+
 
 args <- commandArgs(TRUE)
 data <- read.csv(args[1])
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
index 744a094c6..073f56707 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
@@ -58,14 +58,11 @@ import java.util.*;
  * to the desired level but also has the information necessary to pull out more variants for a higher sensitivity but a
  * slightly lower quality level.
  *
- * 

- * See the GATK wiki for a tutorial and example recalibration accuracy plots. - * *

Input

*

* The input raw variants to be recalibrated. *

- * The recalibration table file in CSV format that was generated by the VariantRecalibrator walker. + * The recalibration table file in VCF format that was generated by the VariantRecalibrator walker. *

* The tranches file that was generated by the VariantRecalibrator walker. * @@ -82,6 +79,7 @@ import java.util.*; * --ts_filter_level 99.0 \ * -tranchesFile path/to/output.tranches \ * -recalFile path/to/output.recal \ + * -mode SNP \ * -o path/to/output.recalibrated.filtered.vcf *

* diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 2a424e15b..a2909d4ca 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; @@ -48,7 +47,6 @@ import org.broadinstitute.sting.utils.io.Resource; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; import java.io.File; import java.io.FileNotFoundException; @@ -73,15 +71,9 @@ import java.util.*; * the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model. * *

- * NOTE: Please see our best practices wiki page for our recommendations on which annotations to use for specific project designs. - * - *

* NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version). * See http://www.r-project.org for more info on how to download and install R. * - *

- * See the GATK wiki for a tutorial and example recalibration accuracy plots. - * *

Input

*

* The input raw variants to be recalibrated. @@ -90,7 +82,7 @@ import java.util.*; * *

Output

*

- * A recalibration table file in CSV format that is used by the ApplyRecalibration walker. + * A recalibration table file in VCF format that is used by the ApplyRecalibration walker. *

* A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data. * @@ -102,8 +94,9 @@ import java.util.*; * -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \ * -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \ * -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \ - * -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \ - * -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an MQ \ + * -resource:dbsnp,known=true,training=false,truth=false,prior=6.0 dbsnp_135.b37.vcf \ + * -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ -an InbreedingCoeff \ + * -mode SNP \ * -recalFile path/to/output.recal \ * -tranchesFile path/to/output.tranches \ * -rscriptFile path/to/output.plots.R @@ -187,9 +180,6 @@ public class VariantRecalibrator extends RodWalker Date: Wed, 1 Aug 2012 14:52:49 -0400 Subject: [PATCH 03/18] Another fix to VQSR docs --- .../gatk/walkers/variantrecalibration/VariantRecalibrator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index a2909d4ca..5e2125fb2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -63,7 +63,7 @@ import java.util.*; * The purpose of the variant recalibrator is to assign a well-calibrated probability to each variant call in a call set. * One can then create highly accurate call sets by filtering based on this single estimate for the accuracy of each call. * The approach taken by variant quality score recalibration is to develop a continuous, covarying estimate of the relationship - * between SNP call annotations (QD, SB, HaplotypeScore, HRun, for example) and the the probability that a SNP is a true genetic + * between SNP call annotations (QD, MQ, HaplotypeScore, and ReadPosRankSum, for example) and the the probability that a SNP is a true genetic * variant versus a sequencing or data processing artifact. This model is determined adaptively based on "true sites" provided * as input, typically HapMap 3 sites and those sites found to be polymorphic on the Omni 2.5M SNP chip array. This adaptive * error model can then be applied to both known and novel variation discovered in the call set of interest to evaluate the From 2b25df3d538ca4e1197786a500642c35a1b367b1 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Mon, 30 Jul 2012 10:19:57 -0400 Subject: [PATCH 04/18] Add removeProgramRecords argument * Add unit test for the removeProgramRecords --- .../sting/gatk/GenomeAnalysisEngine.java | 3 +- .../arguments/GATKArgumentCollection.java | 3 + .../gatk/datasources/reads/SAMDataSource.java | 21 +++++- .../reads/SAMDataSourceUnitTest.java | 73 +++++++++++++++++++ 4 files changed, 95 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 5d6fb75ed..228f8351a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -813,7 +813,8 @@ public class GenomeAnalysisEngine { getWalkerBAQQualityMode(), refReader, getBaseRecalibration(), - argCollection.defaultBaseQualities); + argCollection.defaultBaseQualities, + argCollection.removeProgramRecords); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 3fd3857c5..91a008cbc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -249,6 +249,9 @@ public class GATKArgumentCollection { @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false) public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT; + @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we remove program records from the SAM header", required = false) + public boolean removeProgramRecords = false; + @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false) public ValidationExclusion.TYPE unsafe; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 0fa4234b3..7f0a0c4c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -89,6 +89,11 @@ public class SAMDataSource { */ private final SAMFileReader.ValidationStringency validationStringency; + /** + * Do we want to remove the program records from this data source? + */ + private final boolean removeProgramRecords; + /** * Store BAM indices for each reader present. */ @@ -200,7 +205,8 @@ public class SAMDataSource { BAQ.QualityMode.DONT_MODIFY, null, // no BAQ null, // no BQSR - (byte) -1); + (byte) -1, + false); } /** @@ -233,7 +239,8 @@ public class SAMDataSource { BAQ.QualityMode qmode, IndexedFastaSequenceFile refReader, BaseRecalibration bqsrApplier, - byte defaultBaseQualities) { + byte defaultBaseQualities, + boolean removeProgramRecords) { this.readMetrics = new ReadMetrics(); this.genomeLocParser = genomeLocParser; @@ -249,6 +256,7 @@ public class SAMDataSource { dispatcher = null; validationStringency = strictness; + this.removeProgramRecords = removeProgramRecords; if(readBufferSize != null) ReadShard.setReadBufferSize(readBufferSize); else { @@ -748,7 +756,7 @@ public class SAMDataSource { private synchronized void createNewResource() { if(allResources.size() > maxEntries) throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use."); - SAMReaders readers = new SAMReaders(readerIDs, validationStringency); + SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords); allResources.add(readers); availableResources.add(readers); } @@ -777,9 +785,11 @@ public class SAMDataSource { /** * Derive a new set of readers from the Reads metadata. * @param readerIDs reads to load. + * TODO: validationStringency is not used here * @param validationStringency validation stringency. + * @param removeProgramRecords indicate whether to clear program records from the readers */ - public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency) { + public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) { final int totalNumberOfFiles = readerIDs.size(); int readerNumber = 1; final SimpleTimer timer = new SimpleTimer().start(); @@ -790,6 +800,9 @@ public class SAMDataSource { long lastTick = timer.currentTime(); for(final SAMReaderID readerID: readerIDs) { final ReaderInitializer init = new ReaderInitializer(readerID).call(); + if (removeProgramRecords) { + init.reader.getFileHeader().setProgramRecords(new ArrayList()); + } if (threadAllocation.getNumIOThreads() > 0) { inputStreams.put(init.readerID, init.blockInputStream); // get from initializer } diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java index 1c5dab254..f2c546317 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java @@ -24,9 +24,12 @@ package org.broadinstitute.sting.gatk.datasources.reads; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMProgramRecord; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.Tags; @@ -36,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.AfterMethod; @@ -143,4 +147,73 @@ public class SAMDataSourceUnitTest extends BaseTest { fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception"); } } + + /** Test that we clear program records when requested */ + @Test + public void testRemoveProgramRecords() { + logger.warn("Executing testRemoveProgramRecords"); + + // setup the data + readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags())); + + // use defaults + SAMDataSource data = new SAMDataSource(readers, + new ThreadAllocation(), + null, + genomeLocParser, + false, + SAMFileReader.ValidationStringency.SILENT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + false); + + List defaultProgramRecords = data.getHeader().getProgramRecords(); + assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor"); + + boolean removeProgramRecords = false; + data = new SAMDataSource(readers, + new ThreadAllocation(), + null, + genomeLocParser, + false, + SAMFileReader.ValidationStringency.SILENT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + false, + BAQ.CalculationMode.OFF, + BAQ.QualityMode.DONT_MODIFY, + null, // no BAQ + null, // no BQSR + (byte) -1, + removeProgramRecords); + + List dontRemoveProgramRecords = data.getHeader().getProgramRecords(); + assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false"); + + removeProgramRecords = true; + data = new SAMDataSource(readers, + new ThreadAllocation(), + null, + genomeLocParser, + false, + SAMFileReader.ValidationStringency.SILENT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + false, + BAQ.CalculationMode.OFF, + BAQ.QualityMode.DONT_MODIFY, + null, // no BAQ + null, // no BQSR + (byte) -1, + removeProgramRecords); + + List doRemoveProgramRecords = data.getHeader().getProgramRecords(); + assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true"); + } } From ccac77d888f3a0c2cd5e3d8d2c0e7c13c21596af Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 1 Aug 2012 11:57:16 -0400 Subject: [PATCH 05/18] Bugfix for incorrect allele counting in IndelSummary -- Previous version would count all alt alleles as present in a sample, even if only 1 were present, because of the way VariantEval subsetted VCs -- Updated code for subsetting VCs by sample to be clearer about how it handles rederiving alleles -- Update a few pieces of code to get previous correct behavior -- Updated a few MD5s as now ref calls at sites in dbSNP are counted as having a comp sites, and therefore show up in known sites when Novelty strat is on (which I think is correct) -- Walkers that used old subsetting function with true are now using clearer version that does rederive alleles by default --- .../walkers/phasing/ReadBackedPhasing.java | 2 +- .../GLBasedSampleSelector.java | 2 +- .../GTBasedSampleSelector.java | 2 +- .../gatk/walkers/varianteval/VariantEval.java | 5 ++++- .../evaluators/TiTvVariantEvaluator.java | 9 ++++++--- .../varianteval/stratifications/Novelty.java | 2 +- .../varianteval/util/VariantEvalUtils.java | 12 +++++------- .../utils/variantcontext/VariantContext.java | 18 +++++++++++++++--- .../VariantEvalIntegrationTest.java | 6 +++--- .../VariantContextBenchmark.java | 2 +- 10 files changed, 38 insertions(+), 22 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java index f16deb701..f49e8f8c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java @@ -288,7 +288,7 @@ public class ReadBackedPhasing extends RodWalker samplesToPhase) { // for ( String sample : samplesToPhase ) // logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() )); - VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true); + VariantContext subvc = vc.subContextFromSamples(samplesToPhase); // logger.debug("original VC = " + vc); // logger.debug("sub VC = " + subvc); return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java index e54dc6388..3e48520a7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java @@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector { return true; // want to include a site in the given samples if it is *likely* to be variant (via the EXACT model) // first subset to the samples - VariantContext subContext = vc.subContextFromSamples(samples, true); + VariantContext subContext = vc.subContextFromSamples(samples); // now check to see (using EXACT model) whether this should be variant // do we want to apply a prior? maybe user-spec? diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java index 0f55524a6..de832b108 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java @@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{ if ( samples == null || samples.isEmpty() ) return true; - VariantContext subContext = vc.subContextFromSamples(samples, false); + VariantContext subContext = vc.subContextFromSamples(samples); if ( subContext.isPolymorphicInSamples() ) { return true; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java index 0b395bc62..58cd14737 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java @@ -500,7 +500,10 @@ public class VariantEval extends RodWalker implements TreeRedu @Requires({"eval != null", "comp != null"}) private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) { - // find all of the matching comps + if ( comp.getType() == VariantContext.Type.NO_VARIATION || eval.getType() == VariantContext.Type.NO_VARIATION ) + // if either of these are NO_VARIATION they are LENIENT matches + return EvalCompMatchType.LENIENT; + if ( comp.getType() != eval.getType() ) return EvalCompMatchType.NO_MATCH; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java index 6c4fcd26d..fe2437976 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java @@ -57,9 +57,12 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv } } - public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (vc1 != null) updateTiTv(vc1, false); - if (vc2 != null) updateTiTv(vc2, true); + @Override + public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (eval != null) + updateTiTv(eval, false); + if (comp != null) + updateTiTv(comp, true); } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index 693bdf198..2ad08d806 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -28,7 +28,7 @@ public class Novelty extends VariantStratifier implements StandardStratification final Collection knownComps = tracker.getValues(knowns, ref.getLocus()); for ( final VariantContext c : knownComps ) { // loop over sites, looking for something that matches the type eval - if ( eval.getType() == c.getType() ) { + if ( eval.getType() == c.getType() || eval.getType() == VariantContext.Type.NO_VARIATION ) { return KNOWN_STATES; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 3dcc1f85f..e84b0b10e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -197,7 +197,9 @@ public class VariantEvalUtils { * @return a new VariantContext with just the requested samples */ public VariantContext getSubsetOfVariantContext(VariantContext vc, Set sampleNames) { - return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, false)); + // if we want to preserve AC0 sites as polymorphic we need to not rederive alleles + final boolean deriveAlleles = variantEvalWalker.ignoreAC0Sites(); + return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, deriveAlleles)); } public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) { @@ -262,12 +264,8 @@ public class VariantEvalUtils { // First, filter the VariantContext to represent only the samples for evaluation VariantContext vcsub = vc; - if (subsetBySample && vc.hasGenotypes()) { - if ( variantEvalWalker.isSubsettingToSpecificSamples() ) - vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation()); - else - vcsub = ensureAnnotations(vc, vc); - } + if (subsetBySample && vc.hasGenotypes()) + vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation()); if ((byFilter || !vcsub.isFiltered())) { addMapping(mapping, VariantEval.getAllSampleName(), vcsub); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 979400350..2211cfe5e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -334,12 +334,14 @@ public class VariantContext implements Feature { // to enable tribble integratio * in this VC is returned as the set of alleles in the subContext, even if * some of those alleles aren't in the samples * + * WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING? + * * @param sampleNames the sample names - * @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples + * @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples, true should be default * @return new VariantContext subsetting to just the given samples */ public VariantContext subContextFromSamples(Set sampleNames, final boolean rederiveAllelesFromGenotypes ) { - if ( sampleNames.containsAll(getSampleNames()) ) { + if ( sampleNames.containsAll(getSampleNames()) && ! rederiveAllelesFromGenotypes ) { return this; // fast path when you don't have any work to do } else { VariantContextBuilder builder = new VariantContextBuilder(this); @@ -355,8 +357,18 @@ public class VariantContext implements Feature { // to enable tribble integratio } } + /** + * @see #subContextFromSamples(java.util.Set, boolean) with rederiveAllelesFromGenotypes = true + * + * @param sampleNames + * @return + */ + public VariantContext subContextFromSamples(final Set sampleNames) { + return subContextFromSamples(sampleNames, true); + } + public VariantContext subContextFromSample(String sampleName) { - return subContextFromSamples(Collections.singleton(sampleName), true); + return subContextFromSamples(Collections.singleton(sampleName)); } /** diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index d9a91c4c2..94e52c2b9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -34,7 +34,7 @@ import java.util.Arrays; import java.util.List; public class VariantEvalIntegrationTest extends WalkerTest { - private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/"; + private static String variantEvalTestDataRoot = privateTestDir + "VariantEval/"; private static String fundamentalTestVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.vcf"; private static String fundamentalTestSNPsWithMLEVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.withMLE.vcf"; @@ -122,7 +122,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("e62a3bd9914d48e2bb2fb4f5dfc5ebc0") + Arrays.asList("40abbc9be663aed8ee1158f832463ca8") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); } @@ -144,7 +144,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("087a2d9943c53e7f49663667c3305c7e") + Arrays.asList("106a0e8753e839c0a2c030eb4b165fa9") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java index 7c522eadf..0e5522e3a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java @@ -152,7 +152,7 @@ public class VariantContextBenchmark extends SimpleBenchmark { public void run(final VariantContext vc) { if ( samples == null ) samples = new HashSet(new ArrayList(vc.getSampleNames()).subList(0, nSamplesToTake)); - VariantContext sub = vc.subContextFromSamples(samples, true); + VariantContext sub = vc.subContextFromSamples(samples); sub.getNSamples(); } }; From c3c3d18611f06e2831d3ed39263b44ed106f0df1 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 1 Aug 2012 17:09:22 -0400 Subject: [PATCH 07/18] Update BCF2 to put PASS as offset 0 not at the end -- Unfortunately this commit breaks backward compatibility with all existing BCF2 files... --- .../sting/utils/codecs/bcf2/BCF2Decoder.java | 2 +- .../sting/utils/codecs/bcf2/BCF2Utils.java | 10 +++------- .../utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java | 2 +- .../sting/utils/codecs/vcf/VCFIntegrationTest.java | 2 +- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java index a13be21c5..2619a4dae 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java @@ -202,7 +202,7 @@ public final class BCF2Decoder { return null; else { final String s = new String(bytes, 0, goodLength); - return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s; + return BCF2Utils.isCollapsedString(s) ? BCF2Utils.explodeStringList(s) : s; } } catch ( IOException e ) { throw new ReviewedStingException("readByte failure", e); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index 43e933948..1beacd070 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -79,23 +79,19 @@ public final class BCF2Utils { final Set seen = new HashSet(); final ArrayList dict = new ArrayList(); - boolean sawPASS = false; + dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field + // set up the strings dictionary for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { if ( line instanceof VCFIDHeaderLine && ! (line instanceof VCFContigHeaderLine) ) { final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line; if ( ! seen.contains(idLine.getID())) { - sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4); dict.add(idLine.getID()); seen.add(idLine.getID()); } } } - - if ( ! sawPASS ) - dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field - return dict; } @@ -177,7 +173,7 @@ public final class BCF2Utils { */ @Requires({"collapsed != null", "isCollapsedString(collapsed)"}) @Ensures("result != null") - public static final List exploreStringList(final String collapsed) { + public static final List explodeStringList(final String collapsed) { assert isCollapsedString(collapsed); final String[] exploded = collapsed.substring(1).split(","); return Arrays.asList(exploded); diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java index a0feef186..7569ce90d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java @@ -351,7 +351,7 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest { public void testEncodingListOfString(List strings, String expected) throws IOException { final String collapsed = BCF2Utils.collapseStringList(strings); Assert.assertEquals(collapsed, expected); - Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings); + Assert.assertEquals(BCF2Utils.explodeStringList(collapsed), strings); } // ----------------------------------------------------------------- diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index 3948ba971..86c8c968d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -77,7 +77,7 @@ public class VCFIntegrationTest extends WalkerTest { String testVCF = privateTestDir + "ex2.vcf"; String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("a04a0fc22fedb516c663e56e51fc1e27")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e8f721ce81e4fdadba13c5291027057f")); executeTest("Test writing samtools WEx BCF example", spec1); } From 0381fd7c83a076471f571f012f792fff7adfcd20 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 2 Aug 2012 11:25:10 -0400 Subject: [PATCH 09/18] Hmm, I thought I used the right md5s last time. Let's try again. --- .../sting/queue/pipeline/DataProcessingPipelineTest.scala | 4 ++-- .../sting/queue/pipeline/PacbioProcessingPipelineTest.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala index 778e6c28a..3fb9e0efa 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala @@ -41,7 +41,7 @@ class DataProcessingPipelineTest { " -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf", " -test ", " -p " + projectName).mkString - spec.fileMD5s += testOut -> "021db3a52ee40d9193895213aaa05dd5" + spec.fileMD5s += testOut -> "60d39ae909fdd049920b54e0965b6d3c" PipelineTest.executeTest(spec) } @@ -60,7 +60,7 @@ class DataProcessingPipelineTest { " -bwa /home/unix/carneiro/bin/bwa", " -bwape ", " -p " + projectName).mkString - spec.fileMD5s += testOut -> "94e32ee974aa15d05ee7d3e99f2f8c8f" + spec.fileMD5s += testOut -> "61ca3237afdfabf78ee27a5bb80dae59" PipelineTest.executeTest(spec) } diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala index 62ee7ce90..74e947377 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala @@ -40,7 +40,7 @@ class PacbioProcessingPipelineTest { " -blasr ", " -test ", " -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf").mkString - spec.fileMD5s += testOut -> "8b5f9e8e1ba5bd686e3084eaeea23889" + spec.fileMD5s += testOut -> "61b06e8b78a93e6644657e6d38851084" PipelineTest.executeTest(spec) } } From e3f89fb0549c8f2c751338ffeeda76678c4d94c4 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 2 Aug 2012 11:33:21 -0400 Subject: [PATCH 10/18] Missing/malformed GATK report files are user errors --- .../src/org/broadinstitute/sting/gatk/report/GATKReport.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index bec1ea543..47bc48f81 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -89,9 +89,9 @@ public class GATKReport { reader = new BufferedReader(new FileReader(file)); reportHeader = reader.readLine(); } catch (FileNotFoundException e) { - throw new ReviewedStingException("Could not open file : " + file); + throw new UserException.CouldNotReadInputFile(file, "it does not exist"); } catch (IOException e) { - throw new ReviewedStingException("Could not read file : " + file); + throw new UserException.CouldNotReadInputFile(file, e); } From cb8bc18aeb7780af067f90351566a538fe2f7df8 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 2 Aug 2012 11:41:06 -0400 Subject: [PATCH 11/18] Fix for error in HaplotypeCaller. HC has a UG argument collection for the UG engine but some of those arguments aren't appropriate to set. --- .../sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java | 1 + 1 file changed, 1 insertion(+) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index f5f707690..6bb894323 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -239,6 +239,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem Set samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); samplesList.addAll( samples ); // initialize the UnifiedGenotyper Engine which is used to call into the exact model + UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling From fb5dabce18bbcd06ce60392ff1380faec50d8c08 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 2 Aug 2012 16:44:39 -0400 Subject: [PATCH 12/18] Update BCF2 to include a minor version number so we can rev (and report errors) with BCF2 -- We are no likely to fail with an error when reading old BCF files, rather than just giving bad results -- Added new class BCFVersion that consolidates all of the version management of BCF --- .../sting/utils/codecs/bcf2/BCF2Codec.java | 22 ++++- .../sting/utils/codecs/bcf2/BCF2Utils.java | 44 +++++----- .../sting/utils/codecs/bcf2/BCFVersion.java | 80 +++++++++++++++++++ .../variantcontext/writer/BCF2Writer.java | 6 +- .../utils/codecs/vcf/VCFIntegrationTest.java | 2 +- 5 files changed, 122 insertions(+), 32 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 0f9cc34e7..0776b3aa8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -53,6 +53,11 @@ public final class BCF2Codec implements FeatureCodec { final protected static Logger logger = Logger.getLogger(BCF2Codec.class); private final static boolean FORBID_SYMBOLICS = false; + private final static int ALLOWED_MAJOR_VERSION = 2; + private final static int MIN_MINOR_VERSION = 1; + + private BCFVersion bcfVersion = null; + private VCFHeader header = null; /** @@ -131,8 +136,16 @@ public final class BCF2Codec implements FeatureCodec { public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) { try { // note that this reads the magic as well, and so does double duty - if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) ) - error("Input stream does not begin with BCF2 magic"); + bcfVersion = BCFVersion.readBCFVersion(inputStream); + if ( bcfVersion == null ) + error("Input stream does not contain a BCF encoded file; BCF magic header info not found"); + + if ( bcfVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION ) + error("BCF2Codec can only process BCF2 files, this file has major version " + bcfVersion.getMajorVersion()); + if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION ) + error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion()); + + logger.info("BCF version " + bcfVersion); final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream); @@ -187,7 +200,8 @@ public final class BCF2Codec implements FeatureCodec { FileInputStream fis = null; try { fis = new FileInputStream(path); - return BCF2Utils.startsWithBCF2Magic(fis); + final BCFVersion version = BCFVersion.readBCFVersion(fis); + return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION; } catch ( FileNotFoundException e ) { return false; } catch ( IOException e ) { @@ -196,7 +210,7 @@ public final class BCF2Codec implements FeatureCodec { try { if ( fis != null ) fis.close(); } catch ( IOException e ) { - ; // do nothing + // do nothing } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index 1beacd070..d3f3fc102 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -41,8 +41,6 @@ import java.util.*; * @since 5/12 */ public final class BCF2Utils { - public static final byte[] MAGIC_HEADER_LINE = "BCF\2".getBytes(); - public static final int MAX_ALLELES_IN_GENOTYPES = 127; public static final int OVERFLOW_ELEMENT_MARKER = 15; @@ -75,7 +73,7 @@ public final class BCF2Utils { */ @Requires("header != null") @Ensures({"result != null", "new HashSet(result).size() == result.size()"}) - public final static ArrayList makeDictionary(final VCFHeader header) { + public static ArrayList makeDictionary(final VCFHeader header) { final Set seen = new HashSet(); final ArrayList dict = new ArrayList(); @@ -96,43 +94,37 @@ public final class BCF2Utils { } @Requires({"nElements >= 0", "type != null"}) - public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) { + public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) { int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER); byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F)); return typeByte; } @Ensures("result >= 0") - public final static int decodeSize(final byte typeDescriptor) { + public static int decodeSize(final byte typeDescriptor) { return (0xF0 & typeDescriptor) >> 4; } @Ensures("result >= 0") - public final static int decodeTypeID(final byte typeDescriptor) { + public static int decodeTypeID(final byte typeDescriptor) { return typeDescriptor & 0x0F; } @Ensures("result != null") - public final static BCF2Type decodeType(final byte typeDescriptor) { + public static BCF2Type decodeType(final byte typeDescriptor) { return ID_TO_ENUM[decodeTypeID(typeDescriptor)]; } - public final static boolean sizeIsOverflow(final byte typeDescriptor) { + public static boolean sizeIsOverflow(final byte typeDescriptor) { return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER; } @Requires("nElements >= 0") - public final static boolean willOverflow(final long nElements) { + public static boolean willOverflow(final long nElements) { return nElements > MAX_INLINE_ELEMENTS; } - public final static boolean startsWithBCF2Magic(final InputStream stream) throws IOException { - final byte[] magicBytes = new byte[BCF2Utils.MAGIC_HEADER_LINE.length]; - stream.read(magicBytes); - return Arrays.equals(magicBytes, BCF2Utils.MAGIC_HEADER_LINE); - } - - public final static byte readByte(final InputStream stream) { + public static byte readByte(final InputStream stream) { // TODO -- shouldn't be capturing error here try { return (byte)(stream.read() & 0xFF); @@ -151,7 +143,7 @@ public final class BCF2Utils { */ @Requires({"strings != null", "strings.size() > 1"}) @Ensures("result != null") - public static final String collapseStringList(final List strings) { + public static String collapseStringList(final List strings) { final StringBuilder b = new StringBuilder(); for ( final String s : strings ) { if ( s != null ) { @@ -173,14 +165,14 @@ public final class BCF2Utils { */ @Requires({"collapsed != null", "isCollapsedString(collapsed)"}) @Ensures("result != null") - public static final List explodeStringList(final String collapsed) { + public static List explodeStringList(final String collapsed) { assert isCollapsedString(collapsed); final String[] exploded = collapsed.substring(1).split(","); return Arrays.asList(exploded); } @Requires("s != null") - public static final boolean isCollapsedString(final String s) { + public static boolean isCollapsedString(final String s) { return s.charAt(0) == ','; } @@ -222,7 +214,7 @@ public final class BCF2Utils { } @Ensures("result.isIntegerType()") - public final static BCF2Type determineIntegerType(final int value) { + public static BCF2Type determineIntegerType(final int value) { for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) { if ( potentialType.withinRange(value) ) return potentialType; @@ -232,7 +224,7 @@ public final class BCF2Utils { } @Ensures("result.isIntegerType()") - public final static BCF2Type determineIntegerType(final int[] values) { + public static BCF2Type determineIntegerType(final int[] values) { // literally a copy of the code below, but there's no general way to unify lists and arrays in java BCF2Type maxType = BCF2Type.INT8; for ( final int value : values ) { @@ -258,7 +250,7 @@ public final class BCF2Utils { */ @Requires({"t1.isIntegerType()","t2.isIntegerType()"}) @Ensures("result.isIntegerType()") - public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) { + public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) { switch ( t1 ) { case INT8: return t2; case INT16: return t2 == BCF2Type.INT32 ? t2 : t1; @@ -268,7 +260,7 @@ public final class BCF2Utils { } @Ensures("result.isIntegerType()") - public final static BCF2Type determineIntegerType(final List values) { + public static BCF2Type determineIntegerType(final List values) { BCF2Type maxType = BCF2Type.INT8; for ( final int value : values ) { final BCF2Type type1 = determineIntegerType(value); @@ -293,7 +285,7 @@ public final class BCF2Utils { * @param o * @return */ - public final static List toList(final Object o) { + public static List toList(final Object o) { if ( o == null ) return Collections.emptyList(); else if ( o instanceof List ) return (List)o; else return Collections.singletonList(o); @@ -301,7 +293,7 @@ public final class BCF2Utils { @Requires({"stream != null", "bytesForEachInt > 0"}) - public final static int readInt(int bytesForEachInt, final InputStream stream) { + public static int readInt(int bytesForEachInt, final InputStream stream) { switch ( bytesForEachInt ) { case 1: { return (byte)(readByte(stream)); @@ -319,7 +311,7 @@ public final class BCF2Utils { } } - public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException { + public static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException { switch ( type.getSizeInBytes() ) { case 1: encodeStream.write(0xFF & value); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java new file mode 100644 index 000000000..742da7c0c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java @@ -0,0 +1,80 @@ +package org.broadinstitute.sting.utils.codecs.bcf2; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Arrays; + +/** + * Simple holder for BCF version information + * + * User: depristo + * Date: 8/2/12 + * Time: 2:16 PM + */ +public class BCFVersion { + /** + * BCF2 begins with the MAGIC info BCF_M_m where M is the major version (currently 2) + * and m is the minor version, currently 1 + */ + public static final byte[] MAGIC_HEADER_START = "BCF".getBytes(); + + final int majorVersion; + final int minorVersion; + + public BCFVersion(int majorVersion, int minorVersion) { + this.majorVersion = majorVersion; + this.minorVersion = minorVersion; + } + + /** + * @return the major version number of this BCF file + */ + public int getMajorVersion() { + return majorVersion; + } + + /** + * @return the minor version number of this BCF file + */ + public int getMinorVersion() { + return minorVersion; + } + + /** + * Return a new BCFVersion object describing the major and minor version of the BCF file in stream + * + * Note that stream must be at the very start of the file. + * + * @param stream + * @return a BCFVersion object, or null if stream doesn't contain a BCF file + * @throws IOException + */ + public static BCFVersion readBCFVersion(final InputStream stream) throws IOException { + final byte[] magicBytes = new byte[MAGIC_HEADER_START.length]; + stream.read(magicBytes); + if ( Arrays.equals(magicBytes, MAGIC_HEADER_START) ) { + // we're a BCF file + final int majorByte = stream.read(); + final int minorByte = stream.read(); + return new BCFVersion( majorByte, minorByte ); + } else + return null; + } + + /** + * Write out the BCF magic information indicating this is a BCF file with corresponding major and minor versions + * @param out + * @throws IOException + */ + public void write(final OutputStream out) throws IOException { + out.write(MAGIC_HEADER_START); + out.write(getMajorVersion() & 0xFF); + out.write(getMinorVersion() & 0xFF); + } + + @Override + public String toString() { + return String.format("BCF%d.%d", getMajorVersion(), getMinorVersion()); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index b5da206ad..32377d09e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -31,6 +31,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; +import org.broadinstitute.sting.utils.codecs.bcf2.BCFVersion; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -83,6 +84,9 @@ import java.util.*; * @since 06/12 */ class BCF2Writer extends IndexingVariantContextWriter { + public static final int MAJOR_VERSION = 2; + public static final int MINOR_VERSION = 1; + /** * If true, we will write out the undecoded raw bytes for a genotypes block, if it * is found in the input VC. This can be very dangerous as the genotype encoding @@ -153,7 +157,7 @@ class BCF2Writer extends IndexingVariantContextWriter { writer.close(); final byte[] headerBytes = capture.toByteArray(); - outputStream.write(BCF2Utils.MAGIC_HEADER_LINE); + new BCFVersion(MAJOR_VERSION, MINOR_VERSION).write(outputStream); BCF2Utils.encodeRawBytes(headerBytes.length, BCF2Type.INT32, outputStream); outputStream.write(headerBytes); } catch (IOException e) { diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index 86c8c968d..71fc1d464 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -57,7 +57,7 @@ public class VCFIntegrationTest extends WalkerTest { String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("bdab26dd7648a806dbab01f64db2bdab")); executeTest("Test reading and writing 1000G Phase I SVs", spec1); } From e04989f76d3b2771f83cb33900c31a5518503e36 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 3 Aug 2012 09:42:21 -0400 Subject: [PATCH 14/18] Bugfix for new PASS position in dictionary in BCF2 --- .../org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index d3f3fc102..c79abe2ae 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -77,7 +77,9 @@ public final class BCF2Utils { final Set seen = new HashSet(); final ArrayList dict = new ArrayList(); - dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field + // special case the special PASS field which doesn't show up in the FILTER field definitions + seen.add(VCFConstants.PASSES_FILTERS_v4); + dict.add(VCFConstants.PASSES_FILTERS_v4); // set up the strings dictionary for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { From 524d7ea306d18a687d9dd3ba2454746bab63a936 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Tue, 31 Jul 2012 15:09:36 -0400 Subject: [PATCH 15/18] Choose whether to keep program records based on Walker * Add keepProgramRecords argument * Make removeProgramRecords / keepProgramRecords override default --- .../sting/gatk/GenomeAnalysisEngine.java | 11 ++++++++++- .../sting/gatk/arguments/GATKArgumentCollection.java | 5 ++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 228f8351a..967fd3236 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -797,6 +797,15 @@ public class GenomeAnalysisEngine { if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF) throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested."); + if (argCollection.removeProgramRecords && argCollection.keepProgramRecords) + throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options"); + + // LocusWalkers don't use program records, so remove them by default to save memory + boolean removeProgramRecords = (this.walker instanceof LocusWalker) || argCollection.removeProgramRecords; + + if (argCollection.keepProgramRecords) + removeProgramRecords = false; + return new SAMDataSource( samReaderIDs, threadAllocation, @@ -814,7 +823,7 @@ public class GenomeAnalysisEngine { refReader, getBaseRecalibration(), argCollection.defaultBaseQualities, - argCollection.removeProgramRecords); + removeProgramRecords); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 91a008cbc..972116952 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -249,9 +249,12 @@ public class GATKArgumentCollection { @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false) public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT; - @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we remove program records from the SAM header", required = false) + @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false) public boolean removeProgramRecords = false; + @Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false) + public boolean keepProgramRecords = false; + @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false) public ValidationExclusion.TYPE unsafe; From addbfd6437a47c1f3b9f9c6ffd51184561ed08dc Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Thu, 2 Aug 2012 14:21:30 -0400 Subject: [PATCH 16/18] Add a RemoveProgramRecords annotation * Add the RemoveProgramRecords annotation to LocusWalker --- .../sting/gatk/GenomeAnalysisEngine.java | 3 +-- .../sting/gatk/walkers/LocusWalker.java | 1 + .../gatk/walkers/RemoveProgramRecords.java | 21 +++++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 967fd3236..56fcf0652 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -800,8 +800,7 @@ public class GenomeAnalysisEngine { if (argCollection.removeProgramRecords && argCollection.keepProgramRecords) throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options"); - // LocusWalkers don't use program records, so remove them by default to save memory - boolean removeProgramRecords = (this.walker instanceof LocusWalker) || argCollection.removeProgramRecords; + boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class); if (argCollection.keepProgramRecords) removeProgramRecords = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index e94d01d5a..2a92d8831 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -19,6 +19,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES}) @PartitionBy(PartitionType.LOCUS) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) +@RemoveProgramRecords public abstract class LocusWalker extends Walker { // Do we actually want to operate on the context? public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java new file mode 100644 index 000000000..d9abc7925 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java @@ -0,0 +1,21 @@ +package org.broadinstitute.sting.gatk.walkers; + +/** + * Created with IntelliJ IDEA. + * User: thibault + * Date: 8/2/12 + * Time: 1:58 PM + * To change this template use File | Settings | File Templates. + */ + +import java.lang.annotation.*; + +/** + * Indicates that program records should be removed from SAM headers by default for this walker + */ +@Documented +@Inherited +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface RemoveProgramRecords { +} From 51bd03cc36cd709b0a5a070246729858bba5bc54 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Thu, 2 Aug 2012 14:46:00 -0400 Subject: [PATCH 17/18] Add RemoveProgramRecords annotation to ActiveRegionWalker --- .../broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java | 1 + 1 file changed, 1 insertion(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index b2975cbbf..aba508b3e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -32,6 +32,7 @@ import java.util.List; @PartitionBy(PartitionType.READ) @ActiveRegionExtension(extension=50,maxRegion=1500) @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) +@RemoveProgramRecords public abstract class ActiveRegionWalker extends Walker { @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false) From f40d0a0a28935382296d26e3de577f81fe458198 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Fri, 3 Aug 2012 10:44:36 -0400 Subject: [PATCH 18/18] Updating VQSR to work with the MNP and symbolic variants that are coming out of the HaplotypeCaller. Integration tests change because of the MNPs in dbSNP. --- .../ApplyRecalibration.java | 2 +- .../VariantDataManager.java | 31 +++++++++++++++++-- .../VariantRecalibrator.java | 8 +---- ...ntRecalibrationWalkersIntegrationTest.java | 6 ++-- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 073f56707..011f3471c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -196,7 +196,7 @@ public class ApplyRecalibration extends RodWalker implements T for( final VariantContext vc : VCs ) { - if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) { + if( VariantDataManager.checkVariationClass( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) { final VariantContext recalDatum = getMatchingRecalVC(vc, recals); if( recalDatum == null ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index d6df4ff1b..45fdad4f8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -273,11 +274,37 @@ public class VariantDataManager { } private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) { - return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && - ((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) && + return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && checkVariationClass( evalVC, trainVC ) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples()); } + protected static boolean checkVariationClass( final VariantContext evalVC, final VariantContext trainVC ) { + switch( trainVC.getType() ) { + case SNP: + case MNP: + return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.SNP ); + case INDEL: + case MIXED: + case SYMBOLIC: + return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL ); + default: + return false; + } + } + + protected static boolean checkVariationClass( final VariantContext evalVC, final VariantRecalibratorArgumentCollection.Mode mode ) { + switch( mode ) { + case SNP: + return evalVC.isSNP() || evalVC.isMNP(); + case INDEL: + return evalVC.isIndel() || evalVC.isMixed() || evalVC.isSymbolic(); + case BOTH: + return true; + default: + throw new ReviewedStingException( "Encountered unknown recal mode: " + mode ); + } + } + public void writeOutRecalibrationTable( final VariantContextWriter recalWriter ) { // we need to sort in coordinate order in order to produce a valid VCF Collections.sort( data, new Comparator() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 5e2125fb2..ab2ff6176 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -245,7 +245,7 @@ public class VariantRecalibrator extends RodWalker