From e86da63d049d5c5a30f9e6228e4cdd488e580f2f Mon Sep 17 00:00:00 2001
From: Geraldine Van der Auwera
Date: Mon, 12 Dec 2016 08:53:23 -0500
Subject: [PATCH 01/58] Update pom versions to mark the start of GATK 3.8
development
---
pom.xml | 2 +-
protected/gatk-package-distribution/pom.xml | 2 +-
protected/gatk-queue-extensions-distribution/pom.xml | 2 +-
protected/gatk-queue-package-distribution/pom.xml | 2 +-
protected/gatk-tools-protected/pom.xml | 2 +-
protected/pom.xml | 2 +-
public/VectorPairHMM/pom.xml | 2 +-
public/external-example/pom.xml | 2 +-
public/gatk-engine/pom.xml | 2 +-
public/gatk-queue-extensions-generator/pom.xml | 2 +-
public/gatk-queue-extensions-public/pom.xml | 2 +-
public/gatk-queue/pom.xml | 2 +-
public/gatk-root/pom.xml | 2 +-
public/gatk-tools-public/pom.xml | 2 +-
public/gatk-utils/pom.xml | 2 +-
public/gsalib/pom.xml | 2 +-
public/package-tests/pom.xml | 2 +-
public/pom.xml | 2 +-
18 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/pom.xml b/pom.xml
index 900a862b8..2e3c07bb4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -13,7 +13,7 @@
org.broadinstitute.gatkgatk-root
- 3.7
+ 3.8-SNAPSHOTpublic/gatk-root
diff --git a/protected/gatk-package-distribution/pom.xml b/protected/gatk-package-distribution/pom.xml
index c97741bf2..1b2f536b9 100644
--- a/protected/gatk-package-distribution/pom.xml
+++ b/protected/gatk-package-distribution/pom.xml
@@ -5,7 +5,7 @@
org.broadinstitute.gatkgatk-aggregator
- 3.7
+ 3.8-SNAPSHOT../..
diff --git a/protected/gatk-queue-extensions-distribution/pom.xml b/protected/gatk-queue-extensions-distribution/pom.xml
index 3e08662b5..4642aeb68 100644
--- a/protected/gatk-queue-extensions-distribution/pom.xml
+++ b/protected/gatk-queue-extensions-distribution/pom.xml
@@ -5,7 +5,7 @@
org.broadinstitute.gatkgatk-aggregator
- 3.7
+ 3.8-SNAPSHOT../..
diff --git a/protected/gatk-queue-package-distribution/pom.xml b/protected/gatk-queue-package-distribution/pom.xml
index 90c1da59d..20ec5fc6f 100644
--- a/protected/gatk-queue-package-distribution/pom.xml
+++ b/protected/gatk-queue-package-distribution/pom.xml
@@ -5,7 +5,7 @@
org.broadinstitute.gatkgatk-aggregator
- 3.7
+ 3.8-SNAPSHOT../..
diff --git a/protected/gatk-tools-protected/pom.xml b/protected/gatk-tools-protected/pom.xml
index 551a9b4b1..eb1bfbdca 100644
--- a/protected/gatk-tools-protected/pom.xml
+++ b/protected/gatk-tools-protected/pom.xml
@@ -5,7 +5,7 @@
org.broadinstitute.gatkgatk-aggregator
- 3.7
+ 3.8-SNAPSHOT../..
diff --git a/protected/pom.xml b/protected/pom.xml
index f7784df3b..38677c7c6 100644
--- a/protected/pom.xml
+++ b/protected/pom.xml
@@ -5,7 +5,7 @@
org.broadinstitute.gatkgatk-root
- 3.7
+ 3.8-SNAPSHOT../public/gatk-root
diff --git a/public/VectorPairHMM/pom.xml b/public/VectorPairHMM/pom.xml
index e065b3b69..9ae0b3260 100644
--- a/public/VectorPairHMM/pom.xml
+++ b/public/VectorPairHMM/pom.xml
@@ -5,7 +5,7 @@
org.broadinstitute.gatkgatk-root
- 3.7
+ 3.8-SNAPSHOT../../public/gatk-root
diff --git a/public/external-example/pom.xml b/public/external-example/pom.xml
index 545a11b84..f7fc49613 100644
--- a/public/external-example/pom.xml
+++ b/public/external-example/pom.xml
@@ -9,7 +9,7 @@
External Example
- 3.7
+ 3.8-SNAPSHOT
- 2.8.1
- 2.7.2
+ 2.9.0
+ 2.8.3
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/VariantContextAdaptors.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/VariantContextAdaptors.java
index 868106bc6..d697682ab 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/VariantContextAdaptors.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/VariantContextAdaptors.java
@@ -25,15 +25,10 @@
package org.broadinstitute.gatk.utils.refdata;
-import htsjdk.samtools.util.SequenceUtil;
import htsjdk.tribble.Feature;
-import htsjdk.tribble.annotation.Strand;
-import htsjdk.tribble.gelitext.GeliTextFeature;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
-import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.classloader.PluginManager;
import org.broadinstitute.gatk.utils.codecs.hapmap.RawHapMapFeature;
-import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.*;
import java.util.*;
@@ -59,10 +54,10 @@ public class VariantContextAdaptors {
//
// --------------------------------------------------------------------------------------------------------------
- private static Map,VCAdaptor> adaptors = new HashMap,VCAdaptor>();
+ private static Map,VCAdaptor> adaptors = new HashMap<>();
static {
- PluginManager vcAdaptorManager = new PluginManager(VCAdaptor.class);
+ PluginManager vcAdaptorManager = new PluginManager<>(VCAdaptor.class);
List adaptorInstances = vcAdaptorManager.createAllTypes();
for(VCAdaptor adaptor: adaptorInstances)
adaptors.put(adaptor.getAdaptableFeatureType(),adaptor);
@@ -110,67 +105,6 @@ public class VariantContextAdaptors {
}
}
-
- // --------------------------------------------------------------------------------------------------------------
- //
- // GELI to VariantContext
- //
- // --------------------------------------------------------------------------------------------------------------
-
- private static class GeliTextAdaptor implements VCAdaptor {
- /**
- * Converts Geli text records to VariantContext.
- * @return GeliTextFeature.
- */
- @Override
- public Class extends Feature> getAdaptableFeatureType() { return GeliTextFeature.class; }
-
- /**
- * convert to a Variant Context, given:
- * @param name the name of the ROD
- * @param input the Rod object, in this case a RodGeliText
- * @param ref the reference context
- * @return a VariantContext object
- */
- @Override
- public VariantContext convert(String name, Object input, ReferenceContext ref) {
- GeliTextFeature geli = (GeliTextFeature)input;
- if ( ! Allele.acceptableAlleleBases(String.valueOf(geli.getRefBase())) )
- return null;
- Allele refAllele = Allele.create(String.valueOf(geli.getRefBase()), true);
-
- // make sure we can convert it
- if ( geli.getGenotype().isHet() || !geli.getGenotype().containsBase(geli.getRefBase())) {
- // add the reference allele
- List alleles = new ArrayList();
- List genotypeAlleles = new ArrayList();
- // add all of the alt alleles
- for ( char alt : geli.getGenotype().toString().toCharArray() ) {
- if ( ! Allele.acceptableAlleleBases(String.valueOf(alt)) ) {
- return null;
- }
- Allele allele = Allele.create(String.valueOf(alt), false);
- if (!alleles.contains(allele) && !refAllele.basesMatch(allele.getBases())) alleles.add(allele);
-
- // add the allele, first checking if it's reference or not
- if (!refAllele.basesMatch(allele.getBases())) genotypeAlleles.add(allele);
- else genotypeAlleles.add(refAllele);
- }
-
- Map attributes = new HashMap();
- Collection genotypes = new ArrayList();
- Genotype call = GenotypeBuilder.create(name, genotypeAlleles);
-
- // add the call to the genotype list, and then use this list to create a VariantContext
- genotypes.add(call);
- alleles.add(refAllele);
- GenomeLoc loc = ref.getGenomeLocParser().createGenomeLoc(geli.getChr(),geli.getStart());
- return new VariantContextBuilder(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles).genotypes(genotypes).log10PError(-1 * geli.getLODBestToReference()).attributes(attributes).make();
- } else
- return null; // can't handle anything else
- }
- }
-
// --------------------------------------------------------------------------------------------------------------
//
// HapMap to VariantContext
@@ -203,7 +137,7 @@ public class VariantContextAdaptors {
if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext
- HashSet alleles = new HashSet();
+ HashSet alleles = new HashSet<>();
Allele refSNPAllele = Allele.create(ref.getBase(), true);
int deletionLength = -1;
@@ -231,7 +165,7 @@ public class VariantContextAdaptors {
String a1 = genotypeStrings[i].substring(0,1);
String a2 = genotypeStrings[i].substring(1);
- ArrayList myAlleles = new ArrayList(2);
+ ArrayList myAlleles = new ArrayList<>(2);
// use the mapping to actual alleles, if available
if ( alleleMap != null ) {
diff --git a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/codecs/hapmap/HapMapUnitTest.java b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/codecs/hapmap/HapMapUnitTest.java
index f41f728ec..e79dfc22e 100644
--- a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/codecs/hapmap/HapMapUnitTest.java
+++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/codecs/hapmap/HapMapUnitTest.java
@@ -28,7 +28,7 @@ package org.broadinstitute.gatk.utils.codecs.hapmap;
import htsjdk.tribble.annotation.Strand;
import htsjdk.tribble.readers.LineIterator;
import htsjdk.tribble.readers.LineIteratorImpl;
-import htsjdk.tribble.readers.LineReaderUtil;
+import htsjdk.tribble.readers.SynchronousLineReader;
import htsjdk.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.gatk.utils.BaseTest;
import org.testng.Assert;
@@ -160,11 +160,11 @@ public class HapMapUnitTest extends BaseTest {
Assert.assertFalse(codec.canDecode("filename." + RawHapMapCodec.FILE_EXT + EXTRA_CHAR));
Assert.assertFalse(codec.canDecode("filename" + RawHapMapCodec.FILE_EXT));
}
-
-
+
public LineIterator getLineIterator() {
try {
- return new LineIteratorImpl(LineReaderUtil.fromBufferedStream(new PositionalBufferedStream(new FileInputStream(hapMapFile))));
+
+ return new LineIteratorImpl(new SynchronousLineReader(new PositionalBufferedStream(new FileInputStream(hapMapFile))));
} catch (FileNotFoundException e) {
Assert.fail("Unable to open hapmap file : " + hapMapFile);
}
From ab75b08ea7f9353e2b0fec0e52ff8dbbbee7db8c Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Fri, 17 Feb 2017 12:21:51 -0500
Subject: [PATCH 09/58] Remove unused compressed_block_size and close
BlockCompressedInputStream
---
.../engine/datasources/reads/utilities/UnzipSingleBlock.java | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/UnzipSingleBlock.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/UnzipSingleBlock.java
index c65a7e4ce..662babdf2 100644
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/UnzipSingleBlock.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/UnzipSingleBlock.java
@@ -39,15 +39,13 @@ public class UnzipSingleBlock extends CommandLineProgram {
@Input(fullName = "block_file", shortName = "b", doc = "block file over which to test unzipping", required = true)
private File blockFile;
- @Input(fullName = "compressed_block_size", shortName = "cbs", doc = "size of compressed block", required = true)
- private int compressedBufferSize;
-
public int execute() throws IOException {
final byte[] uncompressedBuffer = new byte[65536];
final BlockCompressedInputStream gunzipper = new BlockCompressedInputStream(blockFile);
gunzipper.setCheckCrcs(true);
gunzipper.read(uncompressedBuffer);
+ gunzipper.close();
System.out.printf("SUCCESS!%n");
From 3c347c138587c98c344bd303112e8ef970da7367 Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Mon, 20 Feb 2017 20:28:41 -0500
Subject: [PATCH 10/58] Total ploidy does not have to equal to the number of
priors
---
.../tools/walkers/genotyper/CustomAFPriorProvider.java | 2 --
.../variantutils/GenotypeGVCFsIntegrationTest.java | 10 ++++++++++
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/CustomAFPriorProvider.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/CustomAFPriorProvider.java
index c43106eec..90bb95b5b 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/CustomAFPriorProvider.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/CustomAFPriorProvider.java
@@ -86,8 +86,6 @@ public class CustomAFPriorProvider extends AFPriorProvider {
@Override
protected double[] buildPriors(final int totalPloidy) {
- if (totalPloidy != priors.length - 1)
- throw new IllegalStateException("requesting an invalid prior total ploidy " + totalPloidy + " != " + (priors.length - 1));
return priors;
}
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
index 8ddfe3123..00f08851c 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
@@ -700,4 +700,14 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
spec.disableShadowBCF();
executeTest("testHomRefHighMQ", spec);
}
+
+ @Test
+ public void testInputPrior() {
+ final WalkerTestSpec spec = new WalkerTestSpec(
+ baseTestString(" -V " + privateTestDir + "gvcfExample1.vcf -V " + privateTestDir + "gvcfExample2.vcf " +
+ "-L 1:69485-69791 -inputPrior 0.2 -inputPrior 0.2 -inputPrior 0.2 -inputPrior 0.2", b37KGReference),
+ Collections.singletonList("860d133262160bbc75ce5849e5fa491f"));
+ spec.disableShadowBCF();
+ executeTest("testInputPrior", spec);
+ }
}
\ No newline at end of file
From 61875e4dfa4e2b6c251ccb264fbb03a0af0a7c35 Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Tue, 14 Feb 2017 18:01:48 -0500
Subject: [PATCH 11/58] Fix LeftAlignAndTrimVariants -split to not change
no-call genotypes to hom-ref
---
...eftAlignAndTrimVariantsIntegrationTest.java | 18 ++++++++++++++++++
.../utils/variant/GATKVariantContextUtils.java | 8 ++++++--
2 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java
index 1bcbc50e3..9cb294497 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java
@@ -175,4 +175,22 @@ public class LeftAlignAndTrimVariantsIntegrationTest extends WalkerTest {
Arrays.asList("67657ee509665fd0d7a2c9024981ba92"));
executeTest("test left alignment of multiple alleles with genoptypes, keep original AC", spec);
}
+
+ @Test
+ public void testSplitLeftAlignmentWithMultiallelicNoCallGenotypes() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T LeftAlignAndTrimVariants -o %s -R " + hg19ReferenceWithChrPrefixInChromosomeNames + " --variant:vcf " + privateTestDir + "multiallelic-nocall.vcf -L chr12:104350950-104350960 --no_cmdline_in_header -split",
+ 1,
+ Arrays.asList("c7ce4310117f993593ce35f586451c53"));
+ executeTest("test splitting left alignment of multiple alleles with no-call genoptypes", spec);
+ }
+
+ @Test
+ public void testSplitLeftAlignmentWithMultiallelicBadAD() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T LeftAlignAndTrimVariants -o %s -R " + hg19ReferenceWithChrPrefixInChromosomeNames + " --variant:vcf " + privateTestDir + "multiallelic-nocall-badAD.vcf -L chr12:104350950-104350960 --no_cmdline_in_header -split",
+ 1,
+ IllegalStateException.class);
+ executeTest("test splitting left alignment of multiple alleles with bad AD", spec);
+ }
}
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java
index d67dd6c01..ce40acebe 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java
@@ -927,7 +927,7 @@ public class GATKVariantContextUtils {
final List best = new LinkedList<>();
final Allele ref = allelesToUse.get(0);
for ( final Allele originalAllele : originalGT ) {
- best.add(allelesToUse.contains(originalAllele) ? originalAllele : ref);
+ best.add((allelesToUse.contains(originalAllele) || originalAllele.isNoCall()) ? originalAllele : ref);
}
gb.alleles(best);
break;
@@ -1045,7 +1045,7 @@ public class GATKVariantContextUtils {
else {
final List biallelics = new LinkedList<>();
- // if any of the genotypes ar ehet-not-ref (i.e. 1/2), set all of them to no-call
+ // if any of the genotypes are het-not-ref (i.e. 1/2), set all of them to no-call
final GenotypeAssignmentMethod genotypeAssignmentMethodUsed = hasHetNonRef(vc.getGenotypes()) ? GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS : genotypeAssignmentMethod;
for ( final Allele alt : vc.getAlternateAlleles() ) {
@@ -1466,6 +1466,10 @@ public class GATKVariantContextUtils {
int currentIndex = 0;
for ( int i = alleleIndexesToUse.nextSetBit(0); i >= 0; i = alleleIndexesToUse.nextSetBit(i+1) ) {
+ if ( i >= oldAD.length ) {
+ throw new IllegalStateException("AD has " + oldAD.length + " items. It should have at least " + (i+1) + ".");
+ }
+
newAD[currentIndex++] = oldAD[i];
}
From 91339c12c9ae9553fa94bce61d26af6b586180f1 Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Wed, 22 Feb 2017 19:01:26 -0500
Subject: [PATCH 12/58] Move htsjdk to ver 2.9.1 and picard to ver 2.9.0
---
public/gatk-root/pom.xml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/public/gatk-root/pom.xml b/public/gatk-root/pom.xml
index 6d6365c83..5937b95ff 100644
--- a/public/gatk-root/pom.xml
+++ b/public/gatk-root/pom.xml
@@ -44,8 +44,8 @@
org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter
- 2.9.0
- 2.8.3
+ 2.9.1
+ 2.9.0
From 8316fa92f3e1e552e7a30865e8f8c8a82ae47496 Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Thu, 26 Jan 2017 08:42:25 -0500
Subject: [PATCH 13/58] Set AD and DP to zero if no read coverage
---
.../walkers/annotator/DepthPerAlleleBySample.java | 7 +++++--
.../gatk/tools/walkers/annotator/DepthPerSampleHC.java | 4 ++--
.../gatk/tools/walkers/annotator/QualByDepth.java | 10 ++++++----
...iedGenotyperGeneralPloidySuite1IntegrationTest.java | 6 +++---
.../genotyper/UnifiedGenotyperIntegrationTest.java | 4 ++--
...allerComplexAndSymbolicVariantsIntegrationTest.java | 2 +-
6 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/DepthPerAlleleBySample.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/DepthPerAlleleBySample.java
index eac2c05dc..dd5c2b8d7 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/DepthPerAlleleBySample.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/DepthPerAlleleBySample.java
@@ -108,10 +108,13 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
if ( g == null || !g.isCalled() || ( stratifiedContext == null && alleleLikelihoodMap == null) )
return;
- if (alleleLikelihoodMap != null && !alleleLikelihoodMap.isEmpty())
+ if (alleleLikelihoodMap != null && !alleleLikelihoodMap.isEmpty()) {
annotateWithLikelihoods(alleleLikelihoodMap, vc, gb);
- else if ( stratifiedContext != null && (vc.isSNP()))
+ } else if ( stratifiedContext != null && (vc.isSNP())) {
annotateWithPileup(stratifiedContext, vc, gb);
+ } else {
+ gb.AD(new int[vc.getNAlleles()]);
+ }
}
private void annotateWithPileup(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/DepthPerSampleHC.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/DepthPerSampleHC.java
index 3b0fb7338..25b9ba1e3 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/DepthPerSampleHC.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/DepthPerSampleHC.java
@@ -137,9 +137,9 @@ public class DepthPerSampleHC extends GenotypeAnnotation implements StandardHCAn
dp++;
}
}
-
- gb.DP(dp);
}
+
+ gb.DP(dp);
}
@Override
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/QualByDepth.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/QualByDepth.java
index 032067352..3ad46c8d2 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/QualByDepth.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/QualByDepth.java
@@ -146,10 +146,12 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
if ( genotype.hasAD() ) {
final int[] AD = genotype.getAD();
final int totalADdepth = (int)MathUtils.sum(AD);
- if ( totalADdepth - AD[0] > 1 )
- ADrestrictedDepth += totalADdepth;
- standardDepth += totalADdepth;
- continue;
+ if ( totalADdepth != 0 ) {
+ if (totalADdepth - AD[0] > 1)
+ ADrestrictedDepth += totalADdepth;
+ standardDepth += totalADdepth;
+ continue;
+ }
}
if (stratifiedContexts!= null && !stratifiedContexts.isEmpty()) {
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java
index 9382cbe2a..5fa61312d 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java
@@ -69,17 +69,17 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testSNP_ACS_Pools() {
- executor.PC_LSV_Test_short("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES", "LSV_SNP_ACS", "SNP", "90ed6f1c268b9c57ecb52b35a88b9368");
+ executor.PC_LSV_Test_short("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES", "LSV_SNP_ACS", "SNP", "963e128314aceaab06c240850b836b10");
}
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
- executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "5ad4dd6b0c3c170ba44fdad6d4fa58cf");
+ executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "f3bf8b59a04db9fb52dcdea76664606d");
}
@Test(enabled = true)
public void testINDEL_GGA_Pools() {
- executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "d26b0ba07e056b73fe4cfe873636d0d6");
+ executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "f8bae1491695d02ccb929205b4458759");
}
@Test(enabled = true)
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index d76a83128..1ff30ee47 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -129,12 +129,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameterAllConfident() {
- testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "182af9490667cb6ce1415305de4f3fdd");
+ testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "96567418000ec94abee98d70199a700a");
}
@Test
public void testOutputParameterAllSites() {
- testOutputParameters("--output_mode EMIT_ALL_SITES", "524e85c225ce330fd094de93f078fa56");
+ testOutputParameters("--output_mode EMIT_ALL_SITES", "5ccff12b34ace5e882e465047a286c5a");
}
private void testOutputParameters(final String args, final String md5) {
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java
index 755e4af49..e7c2c4030 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java
@@ -72,7 +72,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleComplex1() {
- HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "4f30d9c9f1eb4529071b7060e497235d");
+ HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "590428bdfe466159cb8e1637aaa4f47c");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
From 1e88d849e3e3c2a462a207b5cd530b7169bdd734 Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Tue, 13 Dec 2016 13:01:00 -0500
Subject: [PATCH 14/58] Fixed ClippingOP.getNewAlignmentStartOffset() to handle
N cases properly and added unit test
---
.../gatk/utils/clipping/ClippingOp.java | 51 +++++++-----
.../utils/clipping/ClippingOpUnitTest.java | 81 +++++++++++++++++++
2 files changed, 111 insertions(+), 21 deletions(-)
create mode 100644 public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/clipping/ClippingOpUnitTest.java
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/clipping/ClippingOp.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/clipping/ClippingOp.java
index 26147e2e9..1873bc1cb 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/clipping/ClippingOp.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/clipping/ClippingOp.java
@@ -26,6 +26,7 @@
package org.broadinstitute.gatk.utils.clipping;
import com.google.java.contract.Requires;
+import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
@@ -196,35 +197,43 @@ public class ClippingOp {
}
/**
- * Given a cigar string, get the number of bases hard or soft clipped at the start
+ * Given two cigar strings corresponding to read before and after soft-clipping, returns an integer
+ * corresponding to the number of reference bases that the new string must be offset by in order to have the
+ * correct start according to the reference.
+ *
+ * @param clippedCigar the new cigar string after clipping
+ * @param oldCigar the cigar string of the read before it was soft clipped
*/
- private int getNewAlignmentStartOffset(final Cigar __cigar, final Cigar __oldCigar) {
- int num = 0;
- for (CigarElement e : __cigar.getCigarElements()) {
+ @VisibleForTesting
+ static int getNewAlignmentStartOffset(final Cigar clippedCigar, final Cigar oldCigar) {
+ int readBasesBeforeReference = 0; // The number of read bases consumed on the new cigar before reference bases are consumed
+
+ int basesBeforeReferenceOld = 0; // The number of read bases consumed on the old cigar before reference bases are consumed
+ int curReadCounter = 0; // A measure of the reference offset between the oldCigar and the clippedCigar
+
+ for (final CigarElement e : clippedCigar.getCigarElements()) {
if (!e.getOperator().consumesReferenceBases()) {
if (e.getOperator().consumesReadBases()) {
- num += e.getLength();
+ readBasesBeforeReference += e.getLength();
}
} else {
- break;
+ if (!e.getOperator().consumesReadBases()) {
+ basesBeforeReferenceOld -= e.getLength(); // Accounting for any D or N cigar operators at the front of the string
+ } else {
+ break;
+ }
}
}
- int oldNum = 0;
- int curReadCounter = 0;
- for (CigarElement e : __oldCigar.getCigarElements()) {
+ for (final CigarElement e : oldCigar.getCigarElements()) {
int curRefLength = e.getLength();
- int curReadLength = e.getLength();
- if (!e.getOperator().consumesReadBases()) {
- curReadLength = 0;
- }
+ int curReadLength = e.getOperator().consumesReadBases() ? e.getLength() : 0;
- boolean truncated = false;
- if (curReadCounter + curReadLength > num) {
- curReadLength = num - curReadCounter;
- curRefLength = num - curReadCounter;
- truncated = true;
+ final boolean truncated = curReadCounter + curReadLength > readBasesBeforeReference;
+ if (truncated) {
+ curReadLength = readBasesBeforeReference - curReadCounter;
+ curRefLength = readBasesBeforeReference - curReadCounter;
}
if (!e.getOperator().consumesReferenceBases()) {
@@ -232,14 +241,14 @@ public class ClippingOp {
}
curReadCounter += curReadLength;
- oldNum += curRefLength;
+ basesBeforeReferenceOld += curRefLength;
- if (curReadCounter > num || truncated) {
+ if (curReadCounter > readBasesBeforeReference || truncated) {
break;
}
}
- return oldNum;
+ return Math.abs(basesBeforeReferenceOld); // if oldNum is negative it means some of the preceding N/Ds were trimmed but not all so we take absolute value
}
/**
diff --git a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/clipping/ClippingOpUnitTest.java b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/clipping/ClippingOpUnitTest.java
new file mode 100644
index 000000000..f0610429d
--- /dev/null
+++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/clipping/ClippingOpUnitTest.java
@@ -0,0 +1,81 @@
+/*
+* Copyright 2012-2016 Broad Institute, Inc.
+*
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation
+* files (the "Software"), to deal in the Software without
+* restriction, including without limitation the rights to use,
+* copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following
+* conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+package org.broadinstitute.gatk.utils.clipping;
+
+import htsjdk.samtools.Cigar;
+import htsjdk.samtools.TextCigarCodec;
+import org.broadinstitute.gatk.utils.BaseTest;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public final class ClippingOpUnitTest extends BaseTest {
+
+ @Test (dataProvider = "SoftClipedReadsNewStart")
+ public void testGetNewAlignmentStartOffset(final String preClip, final String postClip, final int expectedResult) {
+ Cigar cpreClip = TextCigarCodec.decode(preClip);
+ Cigar cpostClip = TextCigarCodec.decode(postClip);
+ Assert.assertEquals(ClippingOp.getNewAlignmentStartOffset(cpostClip, cpreClip), expectedResult,
+ "getNewAlignmentStartOffset returned "+ClippingOp.getNewAlignmentStartOffset(cpostClip, cpreClip)+
+ " when "+expectedResult+" was expected for "+preClip.toString()+" which was clipped to "+postClip.toString());
+ }
+
+ // provider fields: cigar string before clip, cigar string after clip, expected result for getNewAlignmentStartOffset() method
+ @DataProvider(name = "SoftClipedReadsNewStart")
+ public Object[][] makeRevertSoftClipsBeforeContig() {
+ return new Object[][] {
+ {"70M", "10S60M", 10},
+ {"70M", "60M10S", 0},
+
+ {"30M10N30M", "30S10N30M", 30},
+ {"30M10N30M", "30S5N30M", 35},
+ {"30M10N30M", "30M10N30S", 0},
+ {"30M10N30M", "30S30M", 40},
+ {"30M10N30M", "30M30S", 0},
+ {"30M10N30M", "15S15M10N30M", 15},
+ {"30M10N30M", "30M10N15M15S", 0},
+ // Testing multiple sequential reference but not sequence consuming base types
+ {"10N10D40M", "40M", 20},
+ {"10N10D40M", "20S20M", 40},
+ {"10N10D40M", "5N10D40M", 5},
+ {"10N10D40M", "5D40M", 15},
+ {"10N10D40M", "10N10D20M20S", 0},
+
+ {"10S10N20M10N10M", "10S20M10N10M", 10},
+ {"10S10N20M10N10M", "10S10N20M10S", 0},
+
+ {"10S10I20M10I10M", "20S20M10I10M", 0},
+ {"10S10I20M10I10M", "10S10I20M20S", 0},
+ {"10S10I20M10I10M", "15S5I20M10I10M", 0},
+
+ {"10H60M", "10H10S50M", 10},
+ {"10H60M", "10H50M10S", 0},
+ {"10H10S50M", "10H20S40M", 10},
+ {"10X60M", "20S50M", 20},
+ {"10I40N20M","10S20M",40}
+ };
+ }
+}
From 49083fc7042c9f3d3a2aaa9bf391b7b84f9f130b Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Thu, 2 Mar 2017 12:20:36 -0500
Subject: [PATCH 15/58] Fix FindCoveredIntervals so -minBQ, -minMQ and -cov
work as expected
---
.../diagnostics/FindCoveredIntervals.java | 21 ++--
.../FindCoveredIntervalsIntegrationTest.java | 106 ++++++++++++++++++
2 files changed, 114 insertions(+), 13 deletions(-)
create mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/diagnostics/FindCoveredIntervalsIntegrationTest.java
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/diagnostics/FindCoveredIntervals.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/diagnostics/FindCoveredIntervals.java
index 245370d8e..a6b2d7ae4 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/diagnostics/FindCoveredIntervals.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/diagnostics/FindCoveredIntervals.java
@@ -94,6 +94,8 @@ import java.io.PrintStream;
* -R reference.fasta \
* -I my_file.bam \
* [-cov 10 \]
+ * [-minBQ 20 \]
+ * [-minMQ 20 \]
* [-uncovered \]
* -o output.list
*
@@ -110,7 +112,7 @@ public class FindCoveredIntervals extends ActiveRegionWalker {
private boolean outputUncovered = false;
@Argument(fullName = "coverage_threshold", shortName = "cov", doc = "The minimum allowable coverage to be considered covered", required = false)
- private int coverageThreshold = 20;
+ private int coverageThreshold = 0;
@Argument(fullName = "minBaseQuality", shortName = "minBQ", doc = "The minimum allowable base quality score to be counted for coverage",required = false)
private int minBaseQuality = 0;
@@ -118,22 +120,15 @@ public class FindCoveredIntervals extends ActiveRegionWalker {
@Argument(fullName = "minMappingQuality", shortName = "minMQ", doc = "The minimum allowable mapping quality score to be counted for coverage",required = false)
private int minMappingQuality = 0;
-
-
-
@Override
// Look to see if the region has sufficient coverage
public ActivityProfileState isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
+ final int filteredByQualityDepth = (minBaseQuality == 0 && minMappingQuality == 0) ? context.getBasePileup().depthOfCoverage() :
+ context.getBasePileup().getBaseAndMappingFilteredPileup(minBaseQuality,minMappingQuality).depthOfCoverage();
+ // The region is active if passes the base quality, mapping quality and coverage threshold.
+ final double isActiveProb = filteredByQualityDepth == 0 ? 0.0 : filteredByQualityDepth >= coverageThreshold ? 1.0 : 0.0;
- int depth;
- if(minBaseQuality == 0 && minMappingQuality == 0)
- depth = context.getBasePileup().getBaseFilteredPileup(coverageThreshold).depthOfCoverage();
- else
- depth = context.getBasePileup().getBaseAndMappingFilteredPileup(minBaseQuality,minMappingQuality).depthOfCoverage();
-
- // note the linear probability scale
- return new ActivityProfileState(ref.getLocus(), Math.min(depth / coverageThreshold, 1));
-
+ return new ActivityProfileState(ref.getLocus(), isActiveProb);
}
@Override
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/diagnostics/FindCoveredIntervalsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/diagnostics/FindCoveredIntervalsIntegrationTest.java
new file mode 100644
index 000000000..610677168
--- /dev/null
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/diagnostics/FindCoveredIntervalsIntegrationTest.java
@@ -0,0 +1,106 @@
+/*
+* By downloading the PROGRAM you agree to the following terms of use:
+*
+* BROAD INSTITUTE
+* SOFTWARE LICENSE AGREEMENT
+* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
+*
+* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE").
+*
+* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
+* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
+* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
+*
+* 1. DEFINITIONS
+* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
+*
+* 2. LICENSE
+* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
+* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
+* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
+* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
+*
+* 3. PHONE-HOME FEATURE
+* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
+*
+* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
+* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
+* Copyright 2012-2016 Broad Institute, Inc.
+* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
+* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
+*
+* 5. INDEMNIFICATION
+* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
+*
+* 6. NO REPRESENTATIONS OR WARRANTIES
+* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
+* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
+*
+* 7. ASSIGNMENT
+* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
+*
+* 8. MISCELLANEOUS
+* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
+* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
+* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
+* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
+* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
+* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
+* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
+*/
+
+package org.broadinstitute.gatk.tools.walkers.diagnostics;
+
+import org.broadinstitute.gatk.engine.walkers.WalkerTest;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+
+public class FindCoveredIntervalsIntegrationTest extends WalkerTest {
+
+ private static String EMPTY_FILE_MD5 = "d41d8cd98f00b204e9800998ecf8427e";
+ private static String FULL_INTERVAL_MD5 = "68ac56c935428cce1e4bd621ea8b2f36"; // depends upon interval used
+
+ @DataProvider(name = "ThresholdOptions")
+ public Object[][] createThresholdOptionsData() {
+ return new Object[][]{
+ {0, 0, 0, FULL_INTERVAL_MD5},
+ {1, 0, 0, FULL_INTERVAL_MD5},
+ {2, 0, 0, "8bd0537367921ff1addcf4e59c49c2b8"},
+ {3, 0, 0, "39dc6acd174d5abeedbea118b78776bf"},
+ {4, 0, 0, "c6de100a90070e44cc6edc08c8410888"},
+ {5, 0, 0, "4ba47914151f1445a8756a85e0a0473f"},
+ {10, 0, 0, "0d1bce17890a30b0f8bdf46566a21f0f"},
+ {20, 0, 0, EMPTY_FILE_MD5},
+ {1, 30, 0, FULL_INTERVAL_MD5},
+ {1, 35, 0, "bdfb78fe674c7ce8fbd2e36ea36bbcc6"},
+ {1, 40, 0, EMPTY_FILE_MD5},
+ {1, 0, 10, FULL_INTERVAL_MD5},
+ {1, 0, 20, FULL_INTERVAL_MD5},
+ {1, 0, 30, "4cb41c110d62c35c92b50a9bff799fa6"},
+ {1, 0, 40, "175597ce332c561e9d1bfba77d661520"},
+ {1, 0, 50, "175597ce332c561e9d1bfba77d661520"},
+ {1, 0, 60, "175597ce332c561e9d1bfba77d661520"},
+ {1, 0, 70, EMPTY_FILE_MD5},
+ {3, 30, 40, "3c3eebcd72c2254169b984379cbf0b0c"},
+ {3, 35, 40, "899f6e97a6c25f09d806ea5551e14142"},
+ {3, 35, 50, "899f6e97a6c25f09d806ea5551e14142"}
+ };
+ }
+
+ @Test(dataProvider = "ThresholdOptions")
+ public void testFindCoveredIntervals(final int coverageThreshold, final int minBaseQuality, final int minMappingQuality, final String md5) {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T FindCoveredIntervals" +
+ " -R " + hg18Reference +
+ " -I " + privateTestDir + "HiSeq.1mb.1RG.bam" +
+ " -L chr1:10,000,000-10,010,000" +
+ " -cov " + coverageThreshold +
+ " -minBQ " + minBaseQuality +
+ " -minMQ " + minMappingQuality +
+ " -o %s",
+ Arrays.asList(md5));
+ executeTest("testFindCoveredIntervals", spec);
+ }
+}
From 687dee22acc9877f6c6222e64e2c399eb52ebbf6 Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Fri, 10 Mar 2017 10:44:42 -0500
Subject: [PATCH 16/58] Fix CombineVariants merge for different types
---
.../CombineVariantsIntegrationTest.java | 23 +++++++++++++++----
.../variant/GATKVariantContextUtils.java | 21 +++++++++++++----
2 files changed, 35 insertions(+), 9 deletions(-)
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java
index eddf113ac..acdee6644 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java
@@ -144,18 +144,18 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "9bdda937754e1407183406808f560723"); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "6344953a82a422115bd647ec1d696b94"); } // official project VCF files in tabix format
- @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "51cf4543e46c8434e32c426f59507d1e"); }
+ @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "9a62e340fe17c8659786f56cc992975a"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f9d1d7e6246f0ce9e493357d5b320323"); }
@Test public void uniqueSNPs() {
// parallelism must be disabled because the input VCF is malformed (DB=0) and parallelism actually fixes this which breaks the md5s
//both of these files have the YRI trio and merging of duplicate samples without priority must be specified with UNSORTED merge type
- combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", " -genotypeMergeOptions UNSORTED", "5aece78046bfb7d6ee8dc4d551542e3a", true);
+ combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", " -genotypeMergeOptions UNSORTED", "698f591bdc8dca1c9a3473ebc447c787", true);
}
- @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "0897efcc0046bd94760315838d4d0fa5"); }
- @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "8b12b09a6ec4e3fde2352bbf82637f1e"); }
+ @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "7750b77316b8f02df5c9a60cc64c156d"); }
+ @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "3af8a310fca5a4ace3172528d0bc1d38"); }
@Test public void threeWayWithRefs() {
WalkerTestSpec spec = new WalkerTestSpec(
@@ -169,7 +169,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
" -genotypeMergeOptions UNIQUIFY -L 1"),
1,
- Arrays.asList("8b75e835ed19c06c358a2185cd0e14db"));
+ Arrays.asList("085258a3c507e30860ec87b6b65f5f11"));
cvExecuteTest("threeWayWithRefs", spec, true);
}
@@ -251,4 +251,17 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
Arrays.asList(""));
executeTest("combineSpanningDels: ", spec);
}
+
+ @Test
+ public void combineDifferentTypes() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T CombineVariants --no_cmdline_in_header -o %s "
+ + " -R " + b37KGReference
+ + " -V " + privateTestDir + "same.variant.indel.vcf"
+ + " -V " + privateTestDir + "same.variant.mixed.vcf"
+ + " -multipleAllelesMergeType MIX_TYPES",
+ 1,
+ Arrays.asList("629bb763e9af8edbec0a77cc91eea617"));
+ executeTest("combineDifferentTypes: ", spec);
+ }
}
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java
index ce40acebe..8df0185c8 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java
@@ -99,7 +99,7 @@ public class GATKVariantContextUtils {
return Collections.nCopies(ploidy,allele);
}
- private static boolean hasPLIncompatibleAlleles(final Collection alleleSet1, final Collection alleleSet2) {
+ private static boolean hasIncompatibleAlleles(final Collection alleleSet1, final Collection alleleSet2) {
final Iterator it1 = alleleSet1.iterator();
final Iterator it2 = alleleSet2.iterator();
@@ -1135,8 +1135,6 @@ public class GATKVariantContextUtils {
* simpleMerge does not verify any more unique sample names EVEN if genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE. One should use
* SampleUtils.verifyUniqueSamplesNames to check that before using simpleMerge.
*
- * For more information on this method see: http://www.thedistractionnetwork.com/programmer-problem/
- *
* @param unsortedVCs collection of unsorted VCs
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
* @param filteredRecordMergeType merge type for filtered records
@@ -1190,6 +1188,7 @@ public class GATKVariantContextUtils {
final Set inconsistentAttributes = new HashSet<>();
final Set variantSources = new HashSet<>(); // contains the set of sources we found in our set of VCs that are variant
final Set rsIDs = new LinkedHashSet<>(1); // most of the time there's one id
+ final Map nonBooleanAttributeOccurrences = new HashMap<>();
VariantContext longestVC = first;
int depth = 0;
@@ -1262,6 +1261,13 @@ public class GATKVariantContextUtils {
for (final Map.Entry p : vc.getAttributes().entrySet()) {
final String key = p.getKey();
final Object value = p.getValue();
+ if ( !(value instanceof Boolean) ) {
+ if ( nonBooleanAttributeOccurrences.containsKey(key) ) {
+ nonBooleanAttributeOccurrences.put(key, nonBooleanAttributeOccurrences.get(key) + 1);
+ } else {
+ nonBooleanAttributeOccurrences.put(key, 1);
+ }
+ }
// only output annotations that have the same value in every input VC
// if we don't like the key already, don't go anywhere
if ( ! inconsistentAttributes.contains(key) ) {
@@ -1280,12 +1286,15 @@ public class GATKVariantContextUtils {
}
}
+ // if the non-boolean attribute is not in all of the VCs, remove it.
+ nonBooleanAttributeOccurrences.entrySet().stream().filter(a -> a.getValue() < VCs.size()).map(a -> a.getKey()).forEach(attributes::remove);
+
// if we have more alternate alleles in the merged VC than in one or more of the
// original VCs, we need to strip out the GL/PLs (because they are no longer accurate), as well as allele-dependent attributes like AC,AF, and AD
for ( final VariantContext vc : VCs ) {
if (vc.getAlleles().size() == 1)
continue;
- if ( hasPLIncompatibleAlleles(alleles, vc.getAlleles())) {
+ if ( hasIncompatibleAlleles(alleles, vc.getAlleles())) {
if ( ! genotypes.isEmpty() ) {
logger.debug(String.format("Stripping PLs at %s:%d-%d due to incompatible alleles merged=%s vs. single=%s",
vc.getChr(), vc.getStart(), vc.getEnd(), alleles, vc.getAlleles()));
@@ -1348,6 +1357,10 @@ public class GATKVariantContextUtils {
// Trim the padded bases of all alleles if necessary
final VariantContext merged = builder.make();
+
+ // Recalculate chromosome count annotations or remove them if no genotypes
+ VariantContextUtils.calculateChromosomeCounts(merged, attributes, true);
+
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
return merged;
}
From c20e682a979506ad2d74c1df155257d42718a805 Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Mon, 20 Mar 2017 22:41:01 -0400
Subject: [PATCH 17/58] Update MuTect2 documentation about not computing
obvious germline variants
---
.../gatk/tools/walkers/cancer/m2/MuTect2.java | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java
index 5e04f39d1..2f5ad5a38 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java
@@ -109,11 +109,13 @@ import static java.lang.Math.pow;
/**
* Call somatic SNPs and indels via local re-assembly of haplotypes
*
- *
MuTect2 is a somatic SNP and indel caller that combines the DREAM challenge-winning somatic genotyping engine of the original MuTect (Cibulskis et al., 2013) with the assembly-based machinery of HaplotypeCaller. The basic operation of MuTect2 proceeds similarly to that of the HaplotypeCaller.
+ *
MuTect2 is a somatic SNP and indel caller that combines the DREAM challenge-winning somatic genotyping engine of the original MuTect (Cibulskis et al., 2013) with the assembly-based machinery of HaplotypeCaller.
*
- *
Differences from HaplotypeCaller
- *
While the HaplotypeCaller relies on a ploidy assumption (diploid by default) to inform its genotype likelihood and variant quality calculations, MuTect2 allows for a varying allelic fraction for each variant, as is often seen in tumors with purity less than 100%, multiple subclones, and/or copy number variation (either local or aneuploidy). MuTect2 also differs from the HaplotypeCaller in that it does apply some hard filters to variants before producing output.
- *
Note that the GVCF generation capabilities of HaplotypeCaller are NOT available in MuTect2, even though some of the relevant arguments are listed below. There are currently no plans to make GVCF calling available in MuTect2.
+ *
How MuTect2 works
+ *
The basic operation of MuTect2 proceeds similarly to that of the HaplotypeCaller, with a few key differences.
+ *
While the HaplotypeCaller relies on a ploidy assumption (diploid by default) to inform its genotype likelihood and variant quality calculations, MuTect2 allows for a varying allelic fraction for each variant, as is often seen in tumors with purity less than 100%, multiple subclones, and/or copy number variation (either local or aneuploidy). MuTect2 also differs from the HaplotypeCaller in that it does apply some hard filters to variants before producing output. Finally, some of the parameters used for ActiveRegion determination and graph assembly are set to different default values in MuTect2 compared to HaplotypeCaller.
+ *
Note that MuTect2 is designed to produce somatic variant calls only, and includes some logic to skip variant sites that are very clearly germline based on the evidence present in the Normal sample compared to the Tumor sample. This is done at an early stage to avoid spending computational resources on germline events. As a result the tool is NOT capable of emitting records for variant calls that are clearly germline unless it is run in artifact-detection mode, which is used for Panel-Of-Normals creation.
+ *
Note also that the GVCF generation capabilities of HaplotypeCaller are NOT available in MuTect2, even though some of the relevant arguments are listed below. There are currently no plans to make GVCF calling available in MuTect2.
*
*
Usage examples
*
These are example commands that show how to run MuTect2 for typical use cases. Square brackets ("[ ]") indicate optional arguments. Note that parameter values shown here may not be the latest recommended; see the Best Practices documentation for detailed recommendations.
From 4188745074ef56981867428c2307d25ca9159124 Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Tue, 21 Mar 2017 12:55:30 -0400
Subject: [PATCH 18/58] Document how VTT orders samples in output table
---
.../tools/walkers/variantutils/VariantsToTable.java | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToTable.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToTable.java
index ef4546181..5a4b7e2b5 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToTable.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToTable.java
@@ -69,12 +69,10 @@ import java.util.*;
*
If a VCF record is missing a value, then the tool by default throws an error, but the special value NA can
- * be emitted instead if requested at the command line using --allowMissingData.
+ *
Caveats
+ *
+ *
Some annotations cannot be applied to all variant sites, so VCFs typically contain records where some annotation values are missing. By default this tool throws an error if you request export of an annotation for which not all records have values. You can override this behavior by setting --allowMissingData in the command line. As a result, the tool will emit the special value NA for the missing annotations in those records.
+ *
When you request export of sample-level annotations (FORMAT field annotations such as GT), the annotations will be identified per-sample. If multiple samples are present in the VCF, the columns will be ordered alphabetically by sample name (SM tag).
+ *
*
* @author Mark DePristo
* @since 2010
From 278985b8bd4c22e0858c6d9377cc41fe6233c253 Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Thu, 23 Mar 2017 11:46:38 -0400
Subject: [PATCH 19/58] Update PLINK link and change lines to columns
---
.../tools/walkers/variantutils/VariantsToBinaryPed.java | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToBinaryPed.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToBinaryPed.java
index 57c0fe7ff..8ec77ff2b 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToBinaryPed.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToBinaryPed.java
@@ -53,14 +53,14 @@ import java.util.*;
* Convert VCF to binary pedigree file
*
*
This tool takes a VCF and produces a binary pedigree as used by
- * PLINK, consisting of three associated files (.bed/.bim/.fam).
+ * PLINK, consisting of three associated files (.bed/.bim/.fam).
*
*
Inputs
*
* A VCF file and a metadata file.
*
*
-*
The metaData file can take two formats, the first of which is the first 6 lines of the standard pedigree file. This
+*
The metaData file can take two formats, the first of which is the first 6 columns of the standard pedigree file. This
* is what Plink describes as a .fam file. Note that the sex encoding convention is 1=male; 2=female; other=unknown. An example .fam file is as follows (note that there is no header):
- * A binary pedigree in PLINK format, composed of three files (.bed/.bim/.fam). See the PLINK format specification for more details.
+ * A binary pedigree in PLINK format, composed of three files (.bed/.bim/.fam). See the PLINK format specification for more details.
*
*
*
Example
From a950d0778ff3276102e4858f7176e10716725337 Mon Sep 17 00:00:00 2001
From: David Roazen
Date: Fri, 31 Mar 2017 12:36:53 -0400
Subject: [PATCH 20/58] Add the Intel GKL as a dependency, and turn on the
Intel inflater/deflater by default
---
public/gatk-engine/pom.xml | 4 ++++
.../gatk/engine/GenomeAnalysisEngine.java | 22 +++++++++++++++++++
.../arguments/GATKArgumentCollection.java | 13 +++++++++++
public/gatk-root/pom.xml | 5 +++++
4 files changed, 44 insertions(+)
diff --git a/public/gatk-engine/pom.xml b/public/gatk-engine/pom.xml
index e35726a06..db1e33bb5 100644
--- a/public/gatk-engine/pom.xml
+++ b/public/gatk-engine/pom.xml
@@ -24,6 +24,10 @@
gatk-utils${project.version}
+
+ com.intel.gkl
+ gkl
+ net.java.dev.jets3tjets3t
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java
index c564d78d6..6a8479645 100644
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java
@@ -26,11 +26,15 @@
package org.broadinstitute.gatk.engine;
import com.google.java.contract.Ensures;
+import com.intel.gkl.compression.IntelDeflaterFactory;
+import com.intel.gkl.compression.IntelInflaterFactory;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFile;
+import htsjdk.samtools.util.BlockCompressedOutputStream;
+import htsjdk.samtools.util.BlockGunzipper;
import htsjdk.variant.vcf.VCFConstants;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
@@ -268,6 +272,9 @@ public class GenomeAnalysisEngine {
if (args.nonDeterministicRandomSeed)
Utils.resetRandomGenerator(System.currentTimeMillis());
+ // Try to use the accelerated Intel zlib implementations if possible, or fall back to the JDK implementation if necessary (or requested)
+ initializeCompressionAndDecompression();
+
// if the use specified an input BQSR recalibration table then enable on the fly recalibration
if (args.BQSR_RECAL_FILE != null) {
if (args.BQSR_RECAL_FILE.exists()) {
@@ -386,6 +393,21 @@ public class GenomeAnalysisEngine {
return Collections.unmodifiableList(filters);
}
+ public void initializeCompressionAndDecompression() {
+ // Use the Intel Inflater/Deflater for accelerated BAM reading/writing, if possible:
+ if (! getArguments().useJdkDeflater) {
+ BlockCompressedOutputStream.setDefaultDeflaterFactory(new IntelDeflaterFactory());
+ }
+ if (! getArguments().useJdkInflater) {
+ BlockGunzipper.setDefaultInflaterFactory(new IntelInflaterFactory());
+ }
+
+ final boolean usingIntelDeflater = (BlockCompressedOutputStream.getDefaultDeflaterFactory() instanceof IntelDeflaterFactory && ((IntelDeflaterFactory)BlockCompressedOutputStream.getDefaultDeflaterFactory()).usingIntelDeflater());
+ logger.info("Deflater: " + (usingIntelDeflater ? "IntelDeflater": "JdkDeflater"));
+ final boolean usingIntelInflater = (BlockGunzipper.getDefaultInflaterFactory() instanceof IntelInflaterFactory && ((IntelInflaterFactory)BlockGunzipper.getDefaultInflaterFactory()).usingIntelInflater());
+ logger.info("Inflater: " + (usingIntelInflater ? "IntelInflater": "JdkInflater"));
+ }
+
/**
* Returns a list of active, initialized read transformers
*
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java
index f7d7ca0a1..b7d0d5a4b 100644
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java
@@ -402,6 +402,19 @@ public class GATKArgumentCollection {
@Advanced
@Argument(fullName = "unsafe", shortName = "U", doc = "Enable unsafe operations: nothing will be checked at runtime", required = false)
public ValidationExclusion.TYPE unsafe;
+
+ /**
+ * There are two different libraries that can be used for compression when writing BAM files: IntelDeflater (the new default in GATK version 3.8) and the JDK Deflater (the previous GATK default) which is an older implementation and is slower in our tests. Use this flag to disable the IntelDeflater and use the JDK Deflater in its place.
+ */
+ @Argument(fullName = "use_jdk_deflater", shortName = "jdk_deflater", doc = "Use the JDK Deflater instead of the IntelDeflater for writing BAMs")
+ public boolean useJdkDeflater = false;
+
+ /**
+ * There are two different libraries that can be used for decompression when reading BAM files: IntelInflater (the new default in GATK version 3.8) and the JDK Inflater (the previous GATK default) which is an older implementation and is slower in our tests. Use this flag to disable the IntelInflater and use the JDK Inflater in its place.
+ */
+ @Argument(fullName = "use_jdk_inflater", shortName = "jdk_inflater", doc = "Use the JDK Inflater instead of the IntelInflater for reading BAMs")
+ public boolean useJdkInflater = false;
+
/**
* Not recommended for general use. Disables both auto-generation of index files and index file locking
* when reading VCFs and other rods and an index isn't present or is out-of-date. The file locking necessary for auto index
diff --git a/public/gatk-root/pom.xml b/public/gatk-root/pom.xml
index 5937b95ff..e6427dae5 100644
--- a/public/gatk-root/pom.xml
+++ b/public/gatk-root/pom.xml
@@ -86,6 +86,11 @@
picard${picard.version}
+
+ com.intel.gkl
+ gkl
+ 0.4.3
+ log4jlog4j
From 4fe4ace232c6f01a9d1066ed4e0219d83b446bf3 Mon Sep 17 00:00:00 2001
From: skwalker
Date: Wed, 26 Apr 2017 10:42:54 -0400
Subject: [PATCH 21/58] minor bug fix in depth of coverage for partitioning by
library + test (#1574)
minor bug fix in depth of coverage for partitioning by library
---
.../tools/walkers/coverage/CoverageUtils.java | 9 ++++---
.../DepthOfCoverageIntegrationTest.java | 26 +++++++++++++++++++
2 files changed, 31 insertions(+), 4 deletions(-)
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/CoverageUtils.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/CoverageUtils.java
index f222380dc..62880ac19 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/CoverageUtils.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/CoverageUtils.java
@@ -206,12 +206,13 @@ public class CoverageUtils {
for (PileupElement e : countPileup) {
SAMReadGroupRecord readGroup = getReadGroup(e.getRead());
- String readGroupId = readGroup.getSample() + "_" + readGroup.getReadGroupId();
- int[] counts = countsByRGName.get(readGroupId);
+ // uniqueReadGroupID is unique across the library, read group ID, and the sample
+ String uniqueReadGroupId = readGroup.getSample() + "_" + readGroup.getReadGroupId() + "_" + readGroup.getLibrary() + "_" + readGroup.getPlatformUnit();
+ int[] counts = countsByRGName.get(uniqueReadGroupId);
if (counts == null) {
counts = new int[6];
- countsByRGName.put(readGroupId, counts);
- RGByName.put(readGroupId, readGroup);
+ countsByRGName.put(uniqueReadGroupId, counts);
+ RGByName.put(uniqueReadGroupId, readGroup);
}
updateCounts(counts, e);
diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverageIntegrationTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverageIntegrationTest.java
index 9515978cf..767339724 100644
--- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverageIntegrationTest.java
+++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverageIntegrationTest.java
@@ -255,4 +255,30 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 0, new ArrayList());
execute("testMissingSAMHeaderReadGroupSample", spec);
}
+
+ @Test
+ public void testPartitionByLibrary() {
+ final String[] intervals = {"1:2329607"};
+ final String[] bams = {
+ // These 3 bams all have the same 2 read group IDs and the same sample name (HapMapVal_10plex)
+ privateTestDir + "partitionByLibraryExample1.bam",
+ privateTestDir + "partitionByLibraryExample2.bam",
+ privateTestDir + "partitionByLibraryExample3.bam"};
+
+ final String cmd = buildRootCmd(hg19Reference, new ArrayList<>(Arrays.asList(bams)), new ArrayList<>(Arrays.asList(intervals))) + " -mmq 20 -mbq 20 -pt library";
+ final WalkerTestSpec spec = new WalkerTestSpec(cmd, 0, new ArrayList());
+
+ final File baseOutputFile = createTempFile("depthofcoveragepartitionbylibrary", ".tmp");
+ spec.setOutputFileLocation(baseOutputFile);
+
+ spec.addAuxFile("e0f0ab161225bf24fed20eb86777508e", baseOutputFile);
+ spec.addAuxFile("744e4d654536ef327288c9ee62adea15", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts"));
+ spec.addAuxFile("dc8b3aa0fee653f0e310cc8ada2bdb10", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
+ spec.addAuxFile("a1050deac4a01fd661149bb47253b590", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics"));
+ spec.addAuxFile("e3f0f03233e0dc7d17811ed84101d8d8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
+ spec.addAuxFile("76a0fde72b9c42546ee5ca1f88d4fb09", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
+ spec.addAuxFile("ed5c74dc512eae95987ed3b52f9a8743", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
+
+ execute("testPartitionByLibrary", spec);
+ }
}
From a8f70c891f3ea1fe4f44b935913d32624128854b Mon Sep 17 00:00:00 2001
From: Samuel Friedman
Date: Fri, 4 Nov 2016 15:05:23 -0400
Subject: [PATCH 22/58] Create GMM from model reports in VQSR
---
.../GaussianMixtureModel.java | 9 +
.../MultivariateGaussian.java | 10 +
.../VariantRecalibrator.java | 209 +++++++++++++++++-
.../gatk/utils/report/GATKReportTable.java | 1 -
4 files changed, 216 insertions(+), 13 deletions(-)
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java
index 0995fc10c..59a5af92d 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java
@@ -86,6 +86,11 @@ public class GaussianMixtureModel {
gaussians = new ArrayList<>( numGaussians );
for( int iii = 0; iii < numGaussians; iii++ ) {
final MultivariateGaussian gaussian = new MultivariateGaussian( numAnnotations );
+ gaussian.pMixtureLog10 = Math.log10( 1.0 / ((double)numGaussians) );
+ gaussian.sumProb = 1.0 / ((double) numGaussians);
+ gaussian.hyperParameter_a = priorCounts;
+ gaussian.hyperParameter_b = shrinkage;
+ gaussian.hyperParameter_lambda = dirichletParameter;
gaussians.add( gaussian );
}
this.shrinkage = shrinkage;
@@ -190,6 +195,9 @@ public class GaussianMixtureModel {
final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10, false );
gaussianIndex = 0;
for( final MultivariateGaussian gaussian : gaussians ) {
+ if (Double.isNaN(pVarInGaussianNormalized[gaussianIndex])){
+ logger.info(" Got a NaN at gaussian:" + Integer.toString(gaussianIndex) + " datum:" + datum.toString());
+ }
gaussian.assignPVarInGaussian( pVarInGaussianNormalized[gaussianIndex++] );
}
}
@@ -315,4 +323,5 @@ public class GaussianMixtureModel {
protected List getModelGaussians() {return Collections.unmodifiableList(gaussians);}
protected int getNumAnnotations() {return empiricalMu.length;}
+
}
\ No newline at end of file
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/MultivariateGaussian.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/MultivariateGaussian.java
index db46b0c33..5e9d18ffd 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/MultivariateGaussian.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/MultivariateGaussian.java
@@ -271,4 +271,14 @@ public class MultivariateGaussian {
resetPVarInGaussian(); // clean up some memory
}
+
+
+ public void setSumProb( final List data ) {
+ sumProb = 0.0;
+
+ for( int datumIndex = 0; datumIndex < data.size(); datumIndex++ ) {
+ final double prob = pVarInGaussian.get(datumIndex);
+ if(!Double.isNaN(prob)) sumProb += prob;
+ }
+ }
}
\ No newline at end of file
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
index 416d72c67..470ce3cdb 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
@@ -66,6 +66,7 @@ import org.broadinstitute.gatk.utils.R.RScriptExecutor;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.help.HelpConstants;
import org.broadinstitute.gatk.utils.report.GATKReport;
+import org.broadinstitute.gatk.utils.report.GATKReportColumn;
import org.broadinstitute.gatk.utils.report.GATKReportTable;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.vcf.VCFHeader;
@@ -80,10 +81,15 @@ import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
+import java.nio.file.Files;
import java.util.*;
import Jama.Matrix;
+
+import java.io.FileWriter;
+import java.io.BufferedWriter;
+import java.io.IOException;
/**
* Build a recalibration model to score variant quality for filtering purposes
*
@@ -274,6 +280,8 @@ public class VariantRecalibrator extends RodWalker inputCollection : inputCollections )
input.addAll(inputCollection.getRodBindings());
+
+
}
+
//---------------------------------------------------------------------------------------------------------------
//
// map
@@ -480,18 +491,76 @@ public class VariantRecalibrator extends RodWalker positiveTrainingData = dataManager.getTrainingData();
- final GaussianMixtureModel goodModel = engine.generateModel(positiveTrainingData, VRAC.MAX_GAUSSIANS);
- engine.evaluateData(dataManager.getData(), goodModel, false);
+ final List negativeTrainingData;
- // Generate the negative model using the worst performing data and evaluate each variant contrastively
- final List negativeTrainingData = dataManager.selectWorstVariants();
- final GaussianMixtureModel badModel = engine.generateModel(negativeTrainingData, Math.min(VRAC.MAX_GAUSSIANS_FOR_NEGATIVE_MODEL, VRAC.MAX_GAUSSIANS));
- dataManager.dropAggregateData(); // Don't need the aggregate data anymore so let's free up the memory
- engine.evaluateData(dataManager.getData(), badModel, true);
+ File inputFile = new File(inputModel);
+ if (inputFile.exists()){ // Load GMM from a file
+ GATKReport reportIn = new GATKReport(inputFile);
+ GATKReportTable amTable = reportIn.getTable("AnnotationMeans");
+ GATKReportTable astdTable = reportIn.getTable("AnnotationStdevs");
- if (badModel.failedToConverge || goodModel.failedToConverge) {
- throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider " + (badModel.failedToConverge ? "raising the number of variants used to train the negative model (via --minNumBadVariants 5000, for example)." : "lowering the maximum number of Gaussians allowed for use in the model (via --maxGaussians 4, for example)."));
+ GATKReportTable nmcTable = reportIn.getTable("NegativeModelCovariances");
+ GATKReportTable nmmTable = reportIn.getTable("NegativeModelMeans");
+ GATKReportTable nPMixTable = reportIn.getTable("BadGaussianPMix");
+ GATKReportTable pmcTable = reportIn.getTable("PositiveModelCovariances");
+ GATKReportTable pmmTable = reportIn.getTable("PositiveModelMeans");
+ GATKReportTable pPMixTable = reportIn.getTable("GoodGaussianPMix");
+
+ double[] meanVector;
+ double[] stdVector;
+ int numAnnotations = 0;
+
+ for (GATKReportColumn reportColumn : amTable.getColumnInfo() ) {
+ if (reportColumn.getColumnName().equals("Mean")) {
+ meanVector = new double[amTable.getNumRows()];
+ numAnnotations = amTable.getNumRows();
+ for (int row = 0; row < amTable.getNumRows(); row++) {
+ meanVector[row] = (double) amTable.get(row, reportColumn.getColumnName());
+ }
+ logger.info("Got mean Vector:" + Arrays.toString(meanVector));
+ }
+ }
+
+ for (GATKReportColumn reportColumn : astdTable.getColumnInfo() ) {
+ logger.info("Report column name is:" + reportColumn.getColumnName());
+ if (reportColumn.getColumnName().equals("Standarddeviation")) {
+ stdVector = new double[astdTable.getNumRows()];
+
+ for (int row = 0; row < astdTable.getNumRows(); row++) {
+ stdVector[row] = (double) astdTable.get(row, reportColumn.getColumnName());
+ }
+ }
+ }
+
+ goodModel = GMMFromTables(pmmTable, pmcTable, pPMixTable, numAnnotations);
+ //Utils.getRandomGenerator().setSeed(12878);
+ engine.evaluateData(dataManager.getData(), goodModel, false);
+ negativeTrainingData = dataManager.selectWorstVariants();
+ badModel = GMMFromTables(nmmTable, nmcTable, nPMixTable, numAnnotations);
+ logger.info("Loaded GMM from file:" + inputModel);
+
+ dataManager.dropAggregateData(); // Don't need the aggregate data anymore so let's free up the memory
+ //Utils.getRandomGenerator().setSeed(12878);
+ engine.evaluateData(dataManager.getData(), badModel, true);
+
+ } else { // Generate the GMMs from scratch
+ goodModel = engine.generateModel(positiveTrainingData, VRAC.MAX_GAUSSIANS);
+ //Utils.getRandomGenerator().setSeed(12878);
+ engine.evaluateData(dataManager.getData(), goodModel, false);
+ // Generate the negative model using the worst performing data and evaluate each variant contrastively
+ negativeTrainingData = dataManager.selectWorstVariants();
+
+ badModel = engine.generateModel(negativeTrainingData, Math.min(VRAC.MAX_GAUSSIANS_FOR_NEGATIVE_MODEL, VRAC.MAX_GAUSSIANS));
+ dataManager.dropAggregateData(); // Don't need the aggregate data anymore so let's free up the memory
+ //Utils.getRandomGenerator().setSeed(12878);
+ engine.evaluateData(dataManager.getData(), badModel, true);
+
+
+ if (badModel.failedToConverge || goodModel.failedToConverge) {
+ throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider " + (badModel.failedToConverge ? "raising the number of variants used to train the negative model (via --minNumBadVariants 5000, for example)." : "lowering the maximum number of Gaussians allowed for use in the model (via --maxGaussians 4, for example)."));
+ }
}
if (outputModel) {
@@ -499,6 +568,9 @@ public class VariantRecalibrator extends RodWalker gaussianList = new ArrayList<>();
+
+ int curAnnotation = 0;
+ for (GATKReportColumn reportColumn : muTable.getColumnInfo() ) {
+ logger.info("Report column name is:" + reportColumn.getColumnName());
+ if (!reportColumn.getColumnName().equals("Gaussian")) {
+ for (int row = 0; row < muTable.getNumRows(); row++) {
+ if (gaussianList.size() <= row){
+ MultivariateGaussian mg = new MultivariateGaussian(numAnnotations);
+ gaussianList.add(mg);
+ }
+ gaussianList.get(row).mu[curAnnotation] = (double) muTable.get(row, reportColumn.getColumnName());
+ }
+ curAnnotation++;
+ }
+ }
+
+ for (GATKReportColumn reportColumn : pmixTable.getColumnInfo() ) {
+ if (reportColumn.getColumnName().equals("pMixLog10")) {
+ for (int row = 0; row < pmixTable.getNumRows(); row++) {
+ gaussianList.get(row).pMixtureLog10 = (double) pmixTable.get(row, reportColumn.getColumnName());
+ }
+ }
+ }
+
+ int curJ = 0;
+ for (GATKReportColumn reportColumn : sigmaTable.getColumnInfo() ) {
+ if (reportColumn.getColumnName().equals("Gaussian")) continue;
+ if (reportColumn.getColumnName().equals("Annotation")) continue;
+
+ for (int row = 0; row < sigmaTable.getNumRows(); row++) {
+ int curGaussian = row / numAnnotations;
+ int curI = row % numAnnotations;
+ double curVal = (double) sigmaTable.get(row, reportColumn.getColumnName());
+ gaussianList.get(curGaussian).sigma.set(curI, curJ, curVal);
+
+ }
+ curJ++;
+
+ }
+
+ return new GaussianMixtureModel(gaussianList, VRAC.SHRINKAGE, VRAC.DIRICHLET_PARAMETER, VRAC.PRIOR_COUNTS);
+
+ }
+
+ private void writeFeaturesFiles(List positiveTrainingData, List negativeTrainingData){
+ //Begin Sam Hacking
+ try {
+ File file = new File("/Users/sam/data/haploid_features.txt");
+ file.createNewFile();
+ File badFile = new File("/Users/sam/data/haploid_bad_features.txt");
+ badFile.createNewFile();
+
+ FileWriter fw = new FileWriter(file.getAbsoluteFile());
+ BufferedWriter bw = new BufferedWriter(fw);
+ for(int jj = 0; jj < positiveTrainingData.size(); jj++){
+ VariantDatum v = positiveTrainingData.get(jj);
+ for(int kk = 0; kk < v.annotations.length; kk++){
+ bw.write(Double.toString(v.annotations[kk]));
+ bw.write(" ");
+ }
+ bw.write("\n");
+ }
+ bw.close();
+
+ fw = new FileWriter(badFile.getAbsoluteFile());
+ bw = new BufferedWriter(fw);
+ for(int jj = 0; jj < negativeTrainingData.size(); jj++){
+ VariantDatum v = negativeTrainingData.get(jj);
+ for(int kk = 0; kk < v.annotations.length; kk++){
+ bw.write(Double.toString(v.annotations[kk]));
+ bw.write(" ");
+ }
+ bw.write("\n");
+ }
+ bw.close();
+ }catch(IOException e){
+ e.printStackTrace();
+ }
+ // End Sam Hacking
+ }
+
protected GATKReport writeModelReport(final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, List annotationList) {
- final String formatString = "%.3f";
+ final String formatString = "%.25f";
final GATKReport report = new GATKReport();
if (dataManager != null) { //for unit test
@@ -547,10 +702,36 @@ public class VariantRecalibrator extends RodWalker gaussianStrings = new ArrayList<>();
+ final double[] pMixtureLog10s = new double[goodModel.getModelGaussians().size()];
+ int idx = 0;
+
+ for( final MultivariateGaussian gaussian : goodModel.getModelGaussians() ) {
+ pMixtureLog10s[idx] = gaussian.pMixtureLog10;
+ logger.info("Good normalize PMix log 10 is:" + Double.toString(gaussian.pMixtureLog10) );
+ gaussianStrings.add(Integer.toString(idx++) );
+ }
+
+ GATKReportTable goodPMix = makeVectorTable("GoodGaussianPMix", "Pmixture log 10 used to evaluate model", gaussianStrings, pMixtureLog10s, "pMixLog10", formatString, "Gaussian");
+ report.addTable(goodPMix);
+
+ gaussianStrings.clear();
+ final double[] pMixtureLog10sBad = new double[badModel.getModelGaussians().size()];
+ idx = 0;
+
+ for( final MultivariateGaussian gaussian : badModel.getModelGaussians() ) {
+ pMixtureLog10sBad[idx] = gaussian.pMixtureLog10;
+ logger.info("Bad normalize PMix log 10 is:" + Double.toString(gaussian.pMixtureLog10));
+ gaussianStrings.add(Integer.toString(idx++));
+ }
+ GATKReportTable badPMix = makeVectorTable("BadGaussianPMix", "Pmixture log 10 used to evaluate model", gaussianStrings, pMixtureLog10sBad, "pMixLog10", formatString, "Gaussian");
+ report.addTable(badPMix);
+
+
//The model and Gaussians don't know what the annotations are, so get them from this class
//VariantDataManager keeps the annotation in the same order as the argument list
GATKReportTable positiveMeans = makeMeansTable("PositiveModelMeans", "Vector of annotation values to describe the (normalized) mean for each Gaussian in the positive model", annotationList, goodModel, formatString);
@@ -570,8 +751,12 @@ public class VariantRecalibrator extends RodWalker annotationList, final double[] perAnnotationValues, final String columnName, final String formatString) {
+ return makeVectorTable(tableName, tableDescription, annotationList, perAnnotationValues, columnName, formatString, "Annotation");
+ }
+
+ protected GATKReportTable makeVectorTable(final String tableName, final String tableDescription, final List annotationList, final double[] perAnnotationValues, final String columnName, final String formatString, final String firstColumn) {
GATKReportTable vectorTable = new GATKReportTable(tableName, tableDescription, annotationList.size(), GATKReportTable.TableSortingWay.DO_NOT_SORT);
- vectorTable.addColumn("Annotation");
+ vectorTable.addColumn(firstColumn);
vectorTable.addColumn(columnName, formatString);
for (int i = 0; i < perAnnotationValues.length; i++) {
vectorTable.addRowIDMapping(annotationList.get(i), i, true);
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/report/GATKReportTable.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/report/GATKReportTable.java
index 85d2386a0..4d87bab84 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/report/GATKReportTable.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/report/GATKReportTable.java
@@ -150,7 +150,6 @@ public class GATKReportTable {
// read a data line
final String dataLine = reader.readLine();
final List lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(dataLine, columnStarts));
-
underlyingData.add(new Object[nColumns]);
for ( int columnIndex = 0; columnIndex < nColumns; columnIndex++ ) {
From 57c064eaa3d3a287f95b29ca701d6d9ae7a0cc7b Mon Sep 17 00:00:00 2001
From: Samuel Friedman
Date: Wed, 26 Apr 2017 16:01:16 -0400
Subject: [PATCH 23/58] small code cleanup
---
.../VariantRecalibrator.java | 114 +++++++-----------
1 file changed, 46 insertions(+), 68 deletions(-)
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
index 470ce3cdb..bb6f80835 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
@@ -280,7 +280,7 @@ public class VariantRecalibrator extends RodWalker gaussianList = new ArrayList<>();
int curAnnotation = 0;
@@ -621,7 +604,7 @@ public class VariantRecalibrator extends RodWalker positiveTrainingData, List negativeTrainingData){
- //Begin Sam Hacking
- try {
- File file = new File("/Users/sam/data/haploid_features.txt");
- file.createNewFile();
- File badFile = new File("/Users/sam/data/haploid_bad_features.txt");
- badFile.createNewFile();
+ private double[] getStandardDeviationsFromTable(GATKReportTable astdTable){
+ double[] stdVector = {};
- FileWriter fw = new FileWriter(file.getAbsoluteFile());
- BufferedWriter bw = new BufferedWriter(fw);
- for(int jj = 0; jj < positiveTrainingData.size(); jj++){
- VariantDatum v = positiveTrainingData.get(jj);
- for(int kk = 0; kk < v.annotations.length; kk++){
- bw.write(Double.toString(v.annotations[kk]));
- bw.write(" ");
+ for (GATKReportColumn reportColumn : astdTable.getColumnInfo() ) {
+ logger.info("Report column name is:" + reportColumn.getColumnName());
+ if (reportColumn.getColumnName().equals("Standarddeviation")) {
+ stdVector = new double[astdTable.getNumRows()];
+ for (int row = 0; row < astdTable.getNumRows(); row++) {
+ stdVector[row] = Double.parseDouble((String) astdTable.get(row, reportColumn.getColumnName()));
}
- bw.write("\n");
}
- bw.close();
-
- fw = new FileWriter(badFile.getAbsoluteFile());
- bw = new BufferedWriter(fw);
- for(int jj = 0; jj < negativeTrainingData.size(); jj++){
- VariantDatum v = negativeTrainingData.get(jj);
- for(int kk = 0; kk < v.annotations.length; kk++){
- bw.write(Double.toString(v.annotations[kk]));
- bw.write(" ");
- }
- bw.write("\n");
- }
- bw.close();
- }catch(IOException e){
- e.printStackTrace();
}
- // End Sam Hacking
+
+ return stdVector;
+ }
+
+ private double[] getMeansFromTable(GATKReportTable amTable){
+ double[] meanVector = {};
+
+ for (GATKReportColumn reportColumn : amTable.getColumnInfo() ) {
+ if (reportColumn.getColumnName().equals("Mean")) {
+ meanVector = new double[amTable.getNumRows()];
+ for (int row = 0; row < amTable.getNumRows(); row++) {
+ meanVector[row] = Double.parseDouble((String) amTable.get(row, reportColumn.getColumnName()));
+ }
+ logger.info("Got mean Vector:" + Arrays.toString(meanVector));
+ }
+ }
+
+ return meanVector;
}
protected GATKReport writeModelReport(final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, List annotationList) {
From 85cb1c281068e563c31e8bce00012586a40d5df9 Mon Sep 17 00:00:00 2001
From: Samuel Friedman
Date: Wed, 26 Apr 2017 17:33:06 -0400
Subject: [PATCH 24/58] dont spam on NaNs
---
.../variantrecalibration/GaussianMixtureModel.java | 3 ---
.../variantrecalibration/MultivariateGaussian.java | 9 ---------
2 files changed, 12 deletions(-)
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java
index 59a5af92d..17b3a63d7 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java
@@ -195,9 +195,6 @@ public class GaussianMixtureModel {
final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10, false );
gaussianIndex = 0;
for( final MultivariateGaussian gaussian : gaussians ) {
- if (Double.isNaN(pVarInGaussianNormalized[gaussianIndex])){
- logger.info(" Got a NaN at gaussian:" + Integer.toString(gaussianIndex) + " datum:" + datum.toString());
- }
gaussian.assignPVarInGaussian( pVarInGaussianNormalized[gaussianIndex++] );
}
}
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/MultivariateGaussian.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/MultivariateGaussian.java
index 5e9d18ffd..51662dc92 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/MultivariateGaussian.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/MultivariateGaussian.java
@@ -272,13 +272,4 @@ public class MultivariateGaussian {
resetPVarInGaussian(); // clean up some memory
}
-
- public void setSumProb( final List data ) {
- sumProb = 0.0;
-
- for( int datumIndex = 0; datumIndex < data.size(); datumIndex++ ) {
- final double prob = pVarInGaussian.get(datumIndex);
- if(!Double.isNaN(prob)) sumProb += prob;
- }
- }
}
\ No newline at end of file
From f55b932cfc04359b32907f8f38be7a5bd3740002 Mon Sep 17 00:00:00 2001
From: Samuel Friedman
Date: Thu, 27 Apr 2017 17:16:57 -0400
Subject: [PATCH 25/58] addressed review comments
---
.../GaussianMixtureModel.java | 1 -
.../VariantRecalibrator.java | 43 ++++++-------------
2 files changed, 14 insertions(+), 30 deletions(-)
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java
index 17b3a63d7..2a9b2b5cc 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java
@@ -320,5 +320,4 @@ public class GaussianMixtureModel {
protected List getModelGaussians() {return Collections.unmodifiableList(gaussians);}
protected int getNumAnnotations() {return empiricalMu.length;}
-
}
\ No newline at end of file
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
index bb6f80835..b5145f76d 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
@@ -490,57 +490,46 @@ public class VariantRecalibrator extends RodWalker positiveTrainingData = dataManager.getTrainingData();
final List negativeTrainingData;
- File inputFile = new File(inputModel);
+ final File inputFile = new File(inputModel);
if (inputFile.exists()){ // Load GMM from a file
logger.info("Loading model from:"+inputModel);
- GATKReport reportIn = new GATKReport(inputFile);
+ final GATKReport reportIn = new GATKReport(inputFile);
// Read all the tables
- GATKReportTable amTable = reportIn.getTable("AnnotationMeans");
- GATKReportTable astdTable = reportIn.getTable("AnnotationStdevs");
+ final GATKReportTable nmcTable = reportIn.getTable("NegativeModelCovariances");
+ final GATKReportTable nmmTable = reportIn.getTable("NegativeModelMeans");
+ final GATKReportTable nPMixTable = reportIn.getTable("BadGaussianPMix");
+ final GATKReportTable pmcTable = reportIn.getTable("PositiveModelCovariances");
+ final GATKReportTable pmmTable = reportIn.getTable("PositiveModelMeans");
+ final GATKReportTable pPMixTable = reportIn.getTable("GoodGaussianPMix");
+ final int numAnnotations = dataManager.getMeanVector().length;
- // Should have same number of means and standard deviations.
- assert(amTable.getNumRows() == astdTable.getNumRows() );
-
- GATKReportTable nmcTable = reportIn.getTable("NegativeModelCovariances");
- GATKReportTable nmmTable = reportIn.getTable("NegativeModelMeans");
- GATKReportTable nPMixTable = reportIn.getTable("BadGaussianPMix");
- GATKReportTable pmcTable = reportIn.getTable("PositiveModelCovariances");
- GATKReportTable pmmTable = reportIn.getTable("PositiveModelMeans");
- GATKReportTable pPMixTable = reportIn.getTable("GoodGaussianPMix");
-
- int numAnnotations = amTable.getNumRows();
goodModel = GMMFromTables(pmmTable, pmcTable, pPMixTable, numAnnotations);
engine.evaluateData(dataManager.getData(), goodModel, false);
negativeTrainingData = dataManager.selectWorstVariants();
badModel = GMMFromTables(nmmTable, nmcTable, nPMixTable, numAnnotations);
- logger.info("Loaded GMM from file:" + inputModel);
- dataManager.dropAggregateData(); // Don't need the aggregate data anymore so let's free up the memory
- engine.evaluateData(dataManager.getData(), badModel, true);
} else { // Generate the GMMs from scratch
+ // Generate the positive model using the training data and evaluate each variant
goodModel = engine.generateModel(positiveTrainingData, VRAC.MAX_GAUSSIANS);
- //Utils.getRandomGenerator().setSeed(12878);
engine.evaluateData(dataManager.getData(), goodModel, false);
// Generate the negative model using the worst performing data and evaluate each variant contrastively
negativeTrainingData = dataManager.selectWorstVariants();
badModel = engine.generateModel(negativeTrainingData, Math.min(VRAC.MAX_GAUSSIANS_FOR_NEGATIVE_MODEL, VRAC.MAX_GAUSSIANS));
- dataManager.dropAggregateData(); // Don't need the aggregate data anymore so let's free up the memory
- //Utils.getRandomGenerator().setSeed(12878);
- engine.evaluateData(dataManager.getData(), badModel, true);
-
if (badModel.failedToConverge || goodModel.failedToConverge) {
throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider " + (badModel.failedToConverge ? "raising the number of variants used to train the negative model (via --minNumBadVariants 5000, for example)." : "lowering the maximum number of Gaussians allowed for use in the model (via --maxGaussians 4, for example)."));
}
}
+ dataManager.dropAggregateData(); // Don't need the aggregate data anymore so let's free up the memory
+ engine.evaluateData(dataManager.getData(), badModel, true);
+
if (outputModel) {
GATKReport report = writeModelReport(goodModel, badModel, USE_ANNOTATIONS);
report.print(modelReport);
@@ -592,7 +581,7 @@ public class VariantRecalibrator extends RodWalker gaussianList = new ArrayList<>();
int curAnnotation = 0;
@@ -642,7 +631,6 @@ public class VariantRecalibrator extends RodWalker
Date: Thu, 27 Apr 2017 14:12:43 -0400
Subject: [PATCH 26/58] Fixes rounding errors in FS using the same solution as
R
---
.../gatk/tools/walkers/annotator/StrandBiasTableUtils.java | 4 +++-
.../tools/walkers/annotator/StrandBiasTableUtilsTest.java | 2 ++
.../haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java | 6 +++---
.../haplotypecaller/HaplotypeCallerIntegrationTest.java | 2 +-
.../walkers/variantutils/GenotypeGVCFsIntegrationTest.java | 4 ++--
5 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTableUtils.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTableUtils.java
index 7768d2e6d..5abe90a93 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTableUtils.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTableUtils.java
@@ -65,6 +65,8 @@ public class StrandBiasTableUtils {
private final static Logger logger = Logger.getLogger(StrandBiasTableUtils.class);
+ private static final double REL_ERR = 1 + 10e-7;
+
//For now this is only for 2x2 contingency tables
protected static final int ARRAY_DIM = 2;
protected static final int ARRAY_SIZE = ARRAY_DIM * ARRAY_DIM;
@@ -117,7 +119,7 @@ public class StrandBiasTableUtils {
final HypergeometricDistribution dist = new HypergeometricDistribution(N, numberOfSuccesses, sampleSize);
//Then we determine a given probability with the sampled successes (k = a) from the first entry in the table.
- double pCutoff = dist.probability(table[0][0]);
+ double pCutoff = dist.probability(table[0][0]) * REL_ERR;
double pValue = 0.0;
/**
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTableUtilsTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTableUtilsTest.java
index 1b0156090..b472f48d6 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTableUtilsTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTableUtilsTest.java
@@ -79,6 +79,8 @@ public class StrandBiasTableUtilsTest {
tests.add(new Object[]{9,13,12,10, 0.5466948});
tests.add(new Object[]{12,10,9,13, 0.5466948});
tests.add(new Object[]{9,12,11,9, 0.5377362});
+ tests.add(new Object[]{12,4,26,7, 1.0}); //tests rounding the probabilities from the Hypergeometric
+ tests.add(new Object[]{12,26,4,7, 1.0}); //tests rounding the probabilities from the Hypergeometric
tests.add(new Object[]{0, 0, 0, 0, 1.0});
tests.add(new Object[]{100000, 100000, 100000, 100000, 1.0} );
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
index b671764c0..cbc10ef2d 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
@@ -108,7 +108,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data
- tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "f2807ff921854059746da2954dc44a7b"});
+ tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "cd21856eec2f1c1920408f20fd08411b"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "d146c8dc4fc0605b3776ab5fec837d53"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "c317193f0d1c9a8168f2625c8bf1dd2b"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "63ff771eed3e62340c8938b4963d0add"});
@@ -131,7 +131,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data
- tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "126527c225d24a2a0bb329ad9b3f682a"});
+ tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "093f129861ea93526f6b80b4c8c70178"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "6c727b804084a2324ecd1c98b72734b9"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "190cef14684c95ba290d7a5fa13fdc07"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "6ad7855dbf6dda2060aa93a3ee010b3e"});
@@ -149,7 +149,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data
- tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "8e17f26d07fbba596d3cfd2e344c4cd2"});
+ tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "468550db971e29b3696e5a14f3e31bfc"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "48521b89cecceb9846e4dfc0dd415874"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "eaacbeaff99a37ffa07e1f11e7f1deb2"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "af0fe243e3b96e59097187cd16ba1597"});
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
index befb87b39..d2d35c5d8 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@@ -319,7 +319,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " " + ALWAYS_LOAD_VECTOR_HMM + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
- Arrays.asList("04ff9b301bd6f50df848800fbe09de5c"));
+ Arrays.asList("fc71471b01f93bc531e3cf19cdf78b1f"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
index 00f08851c..1ab774e7d 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
@@ -83,7 +83,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
final WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V " + privateTestDir + "testUpdatePGT.vcf", b37KGReference),
1,
- Collections.singletonList("cdff1a18cd820c9d9c2b5b05ab7ef8a9"));
+ Collections.singletonList("326ec5afa27ade4d0c562aa227997d88"));
executeTest("testUpdatePGT", spec);
}
@@ -93,7 +93,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
final WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V " + privateTestDir + "testUpdatePGT.vcf -A StrandAlleleCountsBySample -log " + logFileName, b37KGReference),
1,
- Collections.singletonList("7a459c5ff606239620e5f7b089186dfb"));
+ Collections.singletonList("b995bf820c3edff5b721338ea9cf44aa"));
executeTest("testUpdatePGTStrandAlleleCountsBySample", spec);
final File file = new File(logFileName);
From d06a6c7318d1428cb40221709b80badff88d0cf1 Mon Sep 17 00:00:00 2001
From: Samuel Friedman
Date: Fri, 28 Apr 2017 17:32:36 -0400
Subject: [PATCH 27/58] string cast bug
---
.../VariantRecalibrator.java | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
index b5145f76d..9ed65b87d 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
@@ -593,7 +593,7 @@ public class VariantRecalibrator extends RodWalker
Date: Mon, 1 May 2017 15:52:45 -0400
Subject: [PATCH 29/58] move model file parsing to initialize
---
.../VariantRecalibrator.java | 67 +++++++++----------
1 file changed, 33 insertions(+), 34 deletions(-)
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
index 9ed65b87d..f305c780b 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java
@@ -319,6 +319,8 @@ public class VariantRecalibrator extends RodWalker ignoreInputFilterSet = new TreeSet<>();
private final VariantRecalibratorEngine engine = new VariantRecalibratorEngine( VRAC );
+ private GaussianMixtureModel goodModel = null;
+ private GaussianMixtureModel badModel = null;
//---------------------------------------------------------------------------------------------------------------
//
@@ -356,7 +358,28 @@ public class VariantRecalibrator extends RodWalker hInfo = new HashSet<>();
+ final File inputFile = new File(inputModel);
+ if (inputFile.exists()) { // Load GMM from a file
+ logger.info("Loading model from:" + inputModel);
+ final GATKReport reportIn = new GATKReport(inputFile);
+
+ // Read all the tables
+ final GATKReportTable nmcTable = reportIn.getTable("NegativeModelCovariances");
+ final GATKReportTable nmmTable = reportIn.getTable("NegativeModelMeans");
+ final GATKReportTable nPMixTable = reportIn.getTable("BadGaussianPMix");
+ final GATKReportTable pmcTable = reportIn.getTable("PositiveModelCovariances");
+ final GATKReportTable pmmTable = reportIn.getTable("PositiveModelMeans");
+ final GATKReportTable pPMixTable = reportIn.getTable("GoodGaussianPMix");
+ final int numAnnotations = dataManager.getMeanVector().length;
+
+ goodModel = GMMFromTables(pmmTable, pmcTable, pPMixTable, numAnnotations);
+ badModel = GMMFromTables(nmmTable, nmcTable, nPMixTable, numAnnotations);
+ }
+
+
+
+
+ final Set hInfo = new HashSet<>();
ApplyRecalibration.addVQSRStandardHeaderLines(hInfo);
recalWriter.writeHeader( new VCFHeader(hInfo) );
@@ -490,29 +513,14 @@ public class VariantRecalibrator extends RodWalker positiveTrainingData = dataManager.getTrainingData();
final List negativeTrainingData;
- final File inputFile = new File(inputModel);
- if (inputFile.exists()){ // Load GMM from a file
- logger.info("Loading model from:"+inputModel);
- final GATKReport reportIn = new GATKReport(inputFile);
-
- // Read all the tables
- final GATKReportTable nmcTable = reportIn.getTable("NegativeModelCovariances");
- final GATKReportTable nmmTable = reportIn.getTable("NegativeModelMeans");
- final GATKReportTable nPMixTable = reportIn.getTable("BadGaussianPMix");
- final GATKReportTable pmcTable = reportIn.getTable("PositiveModelCovariances");
- final GATKReportTable pmmTable = reportIn.getTable("PositiveModelMeans");
- final GATKReportTable pPMixTable = reportIn.getTable("GoodGaussianPMix");
- final int numAnnotations = dataManager.getMeanVector().length;
-
- goodModel = GMMFromTables(pmmTable, pmcTable, pPMixTable, numAnnotations);
+ if (goodModel != null && badModel != null){ // GMMs were loaded from a file
+ // Keeping this to maintain reproducibility between runs with and without serialized GMMs
engine.evaluateData(dataManager.getData(), goodModel, false);
negativeTrainingData = dataManager.selectWorstVariants();
- badModel = GMMFromTables(nmmTable, nmcTable, nPMixTable, numAnnotations);
-
} else { // Generate the GMMs from scratch
// Generate the positive model using the training data and evaluate each variant
goodModel = engine.generateModel(positiveTrainingData, VRAC.MAX_GAUSSIANS);
@@ -586,14 +594,13 @@ public class VariantRecalibrator extends RodWalker annotationList) {
- final String formatString = "%.25f";
+ final String formatString = "%.8f";
final GATKReport report = new GATKReport();
if (dataManager != null) { //for unit test
From 505383e5048472d856c0204605fe3e3571e49fb2 Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Tue, 2 May 2017 08:50:12 -0400
Subject: [PATCH 30/58] Remove obsolete unmapped reads test for
FindCoveredIntervals (#1580)
Remove obsolete unmapped reads test for FindCoveredIntervals
---
.../examples/UnmappedExcludedQueueTest.scala | 16 ----------------
.../gatk/engine/walkers/ActiveRegionWalker.java | 4 ----
2 files changed, 20 deletions(-)
diff --git a/protected/gatk-queue-extensions-distribution/src/test/scala/org/broadinstitute/gatk/queue/pipeline/examples/UnmappedExcludedQueueTest.scala b/protected/gatk-queue-extensions-distribution/src/test/scala/org/broadinstitute/gatk/queue/pipeline/examples/UnmappedExcludedQueueTest.scala
index 12c227f51..e08e3a8c8 100644
--- a/protected/gatk-queue-extensions-distribution/src/test/scala/org/broadinstitute/gatk/queue/pipeline/examples/UnmappedExcludedQueueTest.scala
+++ b/protected/gatk-queue-extensions-distribution/src/test/scala/org/broadinstitute/gatk/queue/pipeline/examples/UnmappedExcludedQueueTest.scala
@@ -60,22 +60,6 @@ class UnmappedExcludedQueueTest {
@Test(timeOut=36000000)
def testUnmappedExclusion(): Unit = {
- //FindCoveredIntervals is an ActiveRegionWalker, which throws an exception if it encounters unmapped reads
- //But it's partitioned by contigs, which by default includes unmapped reads. Verify that the unmapped reads
- //are correctly not added in this case
- val testOut = "fci.out"
- val spec = new QueueTestSpec
- spec.name = "findcoveredintervals"
- spec.args = Array(
- " -S " + QueueTest.protectedQScriptsPackageDir + "examples/ExampleFindCoveredIntervals.scala",
- " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta",
- " -I " + BaseTest.publicTestDir + "exampleBAM.bam",
- " -out " + testOut).mkString
-
- //The output file is blank - the real test is simply that it runs to completion
- spec.fileMD5s += testOut -> "d41d8cd98f00b204e9800998ecf8427e"
- QueueTest.executeTest(spec)
-
//Regression Test: HaplotypeCaller is also an ActiveRegionWalker, and is much more widely used. Explicitly test
//it as well
val hcTestOut = "hctest.vcf"
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionWalker.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionWalker.java
index 006b2530b..21af352f4 100644
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionWalker.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionWalker.java
@@ -189,10 +189,6 @@ public abstract class ActiveRegionWalker extends Walker
*
- *
Caveat
+ *
Caveats
*
Only gVCF files produced by HaplotypeCaller (or CombineGVCFs) can be used as input for this tool. Some other
* programs produce files that they call gVCFs but those lack some important information (accurate genotype likelihoods
* for every position) that GenotypeGVCFs requires for its operation.
+ *
If the gVCF files contain allele specific annotations, add -G Standard -G AS_Standard to the command line.
Only gVCF files produced by HaplotypeCaller (or CombineGVCFs) can be used as input for this tool. Some other
* programs produce files that they call gVCFs but those lack some important information (accurate genotype likelihoods
* for every position) that GenotypeGVCFs requires for its operation.
+ *
If the gVCF files contain allele specific annotations, add -G Standard -G AS_Standard to the command line.
*
*
Special note on ploidy
*
This tool is able to handle any ploidy (or mix of ploidies) intelligently; there is no need to specify ploidy
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ReferenceConfidenceVariantContextMerger.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ReferenceConfidenceVariantContextMerger.java
index 05c0a930d..7443c8d8d 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ReferenceConfidenceVariantContextMerger.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ReferenceConfidenceVariantContextMerger.java
@@ -78,6 +78,8 @@ public class ReferenceConfidenceVariantContextMerger {
private final static Logger logger = Logger.getLogger(ReferenceConfidenceVariantContextMerger.class);
+ static final String ADD_AS_STANDARD_MSG = " Add -G Standard -G AS_Standard to the command to annotate in the final VC.";
+
private static Comparable combineAnnotationValues( final List array ) {
return MathUtils.median(array); // right now we take the median but other options could be explored
}
@@ -244,7 +246,9 @@ public class ReferenceConfidenceVariantContextMerger {
}
} catch (final NumberFormatException e) {
- logger.warn("WARNING: remaining (non-reducible) annotations are assumed to be ints or doubles or booleans, but " + value.getRawData() + " doesn't parse and will not be annotated in the final VC.");
+ final String baseMsg = "Remaining (non-reducible) annotations are assumed to be ints or doubles, but " + value.getRawData() + " doesn't parse and will not be annotated in the final VC.";
+ final String msg = value.getRawData().contains("|") ? baseMsg + ADD_AS_STANDARD_MSG : baseMsg;
+ logger.warn(msg);
}
}
parsedAnnotations.put(currentData.getKey(),annotationValues);
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java
index 286ef6604..6945d82b3 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java
@@ -51,6 +51,7 @@
package org.broadinstitute.gatk.tools.walkers.variantutils;
+import org.apache.commons.io.FileUtils;
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.engine.GATKVCFUtils;
import htsjdk.variant.variantcontext.VariantContext;
@@ -58,6 +59,7 @@ import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.File;
+import java.io.IOException;
import java.util.Arrays;
import java.util.List;
@@ -282,6 +284,18 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
executeTest("testAlleleSpecificAnnotations", spec);
}
+ @Test
+ public void testMissingAlleleSpecificAnnotationGroup() throws IOException {
+ final File logFile = createTempFile("testMissingAlleleSpecificAnnotationGroup.log", ".tmp");
+ final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V "
+ + privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12892.AS.chr20snippet.g.vcf -log " +
+ logFile.getAbsolutePath();
+ final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList(""));
+ spec.disableShadowBCF();
+ executeTest("testMissingAlleleSpecificAnnotationGroup", spec);
+ Assert.assertTrue(FileUtils.readFileToString(logFile).contains(ReferenceConfidenceVariantContextMerger.ADD_AS_STANDARD_MSG));
+ }
+
@Test
public void testASMateRankSumAnnotation() throws Exception {
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard -A AS_MQMateRankSumTest -V "
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
index 2531fb570..3639f236f 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
@@ -604,6 +604,16 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
executeTest("testAlleleSpecificAnnotations", spec);
}
+ @Test
+ public void testMissingAlleleSpecificAnnotationGroup() throws IOException {
+ final File logFile = createTempFile("testMissingAlleleSpecificAnnotationGroup.log", ".tmp");
+ final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header --disableDithering -V "
+ + privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12892.AS.chr20snippet.g.vcf -log " + logFile.getAbsolutePath();
+ final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList(""));
+ spec.disableShadowBCF();
+ executeTest("testMissingAlleleSpecificAnnotationGroup", spec);
+ }
+
@Test
public void testASMateRankSumAnnotation() {
final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard -A AS_MQMateRankSumTest --disableDithering -V "
From 1d6756fbd09f14d31e0bd9bec4e22eb357211aad Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Fri, 12 May 2017 17:07:05 -0400
Subject: [PATCH 40/58] Add assert to testMissingAlleleSpecificAnnotationGroup
(#1587)
---
.../tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java | 1 +
1 file changed, 1 insertion(+)
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
index 3639f236f..0a7eb5d0f 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
@@ -612,6 +612,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList(""));
spec.disableShadowBCF();
executeTest("testMissingAlleleSpecificAnnotationGroup", spec);
+ Assert.assertTrue(FileUtils.readFileToString(logFile).contains(ReferenceConfidenceVariantContextMerger.ADD_AS_STANDARD_MSG));
}
@Test
From 3de98263f545b8e69265d36b7b896fa7cb474c24 Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Tue, 30 May 2017 15:51:57 -0400
Subject: [PATCH 41/58] Copy Program Records (PG) from input BAM
---
.../haplotypeBAMWriter/ReadDestination.java | 1 +
.../cancer/m2/MuTect2IntegrationTest.java | 2 +-
.../HaplotypeCallerIntegrationTest.java | 38 +++++++++++++--
.../gatk/engine/GenomeAnalysisEngine.java | 5 +-
.../engine/walkers/ActiveRegionWalker.java | 1 -
.../gatk/engine/walkers/LocusWalker.java | 1 -
.../engine/walkers/RemoveProgramRecords.java | 46 -------------------
7 files changed, 36 insertions(+), 58 deletions(-)
delete mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/RemoveProgramRecords.java
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/haplotypeBAMWriter/ReadDestination.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/haplotypeBAMWriter/ReadDestination.java
index 9eab1e239..000224472 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/haplotypeBAMWriter/ReadDestination.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/haplotypeBAMWriter/ReadDestination.java
@@ -83,6 +83,7 @@ public abstract class ReadDestination {
bamHeader = new SAMFileHeader();
bamHeader.setSequenceDictionary(header.getSequenceDictionary());
bamHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
+ bamHeader.setProgramRecords(header.getProgramRecords());
// include the original read groups plus an artificial one for haplotypes
final List readGroups = new ArrayList<>(header.getReadGroups());
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java
index 1c99946db..feedfeb35 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java
@@ -138,7 +138,7 @@ public class MuTect2IntegrationTest extends WalkerTest {
public void testTruePositivesDream3TrackedDropped() {
M2TestWithDroppedReads(DREAM3_TUMOR_BAM, DREAM3_NORMAL_BAM, "21:10935369", "",
"48a446d47bb10434cb7f0ee726d15721",
- "b536e76870326b4be01b8d6b83c1cf1c");
+ "6ecaeb74893249dfa5723b2266c957e2");
}
/**
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
index d2d35c5d8..f7dde74c5 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@@ -51,6 +51,10 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
+import htsjdk.samtools.SAMProgramRecord;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.tribble.AbstractFeatureReader;
@@ -64,6 +68,7 @@ import org.apache.commons.io.FileUtils;
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
+import org.broadinstitute.gatk.utils.MD5DB;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.engine.GATKVCFUtils;
import org.broadinstitute.gatk.utils.exceptions.UserException;
@@ -107,9 +112,30 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
executeTest("testHaplotypeCallerBamout", spec);
}
+ /**
+ * Check that the bamout program records (@PG) contain all of the program records forwarded from the input BAMs
+ */
+ private void validateForwardedProgramRecords(final List bamInFiles, final String bamOutMd5) throws FileNotFoundException {
+ final List bamInProgramRecords = new ArrayList<>();
+ for (final File file : bamInFiles) {
+ final SamReader bamInReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(file);
+ bamInProgramRecords.addAll(bamInReader.getFileHeader().getProgramRecords());
+ }
+ final String bamOutFilePath = new MD5DB().getMD5FilePath(bamOutMd5, null);
+ if (bamOutFilePath == null) {
+ throw new FileNotFoundException("Could not find " + bamOutMd5 + " in the MD5 DB");
+ }
+ final SamReader bamOutReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).
+ open(new File(bamOutFilePath));
+ final List bamOutProgramRecords = bamOutReader.getFileHeader().getProgramRecords();
+ Assert.assertTrue(bamOutProgramRecords.containsAll(bamInProgramRecords));
+ }
+
@Test
public void testHaplotypeBAMOutFlags() throws IOException {
- HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "6588123afd06ff6acc9f10ea25250f54", "9d6bd79cdae3e3222fa93f542fbca153");
+ final String md5BAMOut = "69aae17f8cd384666ec7c3c1f3d3eb57";
+ HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "6588123afd06ff6acc9f10ea25250f54", md5BAMOut);
+ validateForwardedProgramRecords(new ArrayList<>(Arrays.asList(new File(NA12878_BAM))), md5BAMOut);
}
@Test
@@ -301,14 +327,15 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
private static final String LEFT_ALIGNMENT_BAMOUT_TEST_OUTPUT = privateTestDir + "/bamout-indel-left-align-bugfix-expected-output.bam";
@Test
- public void testLeftAlignmentBamOutBugFix() {
+ public void testLeftAlignmentBamOutBugFix() throws FileNotFoundException {
+ final String md5BAMOut = "27e729df3b166c81792a62a5b57ef7b3";
final String base = String.format("-T HaplotypeCaller -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, LEFT_ALIGNMENT_BAMOUT_TEST_INPUT)
+ " --no_cmdline_in_header -bamout %s -o /dev/null -L 1:11740000-11740700 --allowNonUniqueKmersInRef";
- final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("01deba68f7a7d562b0e466f6858d42e3"));
+ final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(md5BAMOut));
executeTest("LeftAlignmentBamOutBugFix", spec);
+ validateForwardedProgramRecords(new ArrayList<>(Arrays.asList(new File(LEFT_ALIGNMENT_BAMOUT_TEST_INPUT))), md5BAMOut);
}
-
// --------------------------------------------------------------------------------------------------------------
//
// test dbSNP annotation
@@ -513,11 +540,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testHaplotypeCallerReadPosRankSum() throws IOException {
final File testBAM = new File(privateTestDir + "testReadPos.snippet.bam");
final String md5Variants = "03b3c464f22a3572f7d66890c18bdda4";
- final String md5BAMOut = "2e0843f6e8e90c407825e9c47ce4a32d";
+ final String md5BAMOut = "3ef35732e49980093ad445e3ac5731fa";
final String base = String.format("-T HaplotypeCaller -R %s -I %s -L 1:3753063 -ip 100 ", REF, testBAM) +
" --no_cmdline_in_header -o %s -bamout %s";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList(md5Variants, md5BAMOut));
executeTest("testHaplotypeCallerReadPosRankSum", spec);
+ validateForwardedProgramRecords(new ArrayList(Arrays.asList(testBAM)), md5BAMOut);
}
@Test
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java
index 6a8479645..57570ed87 100644
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java
@@ -944,10 +944,7 @@ public class GenomeAnalysisEngine {
if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
- boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
-
- if (argCollection.keepProgramRecords)
- removeProgramRecords = false;
+ final boolean removeProgramRecords = argCollection.keepProgramRecords ? false : argCollection.removeProgramRecords;
final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker;
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionWalker.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionWalker.java
index 21af352f4..053c482bb 100644
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionWalker.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionWalker.java
@@ -60,7 +60,6 @@ import java.util.*;
@ActiveRegionTraversalParameters(extension=50,maxRegion=1500)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000)
-@RemoveProgramRecords
public abstract class ActiveRegionWalker extends Walker {
/**
* If provided, this walker will write out its activity profile (per bp probabilities of being active)
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/LocusWalker.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/LocusWalker.java
index 90ff64f33..d1bace4b5 100644
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/LocusWalker.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/LocusWalker.java
@@ -46,7 +46,6 @@ import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
@PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000)
-@RemoveProgramRecords
public abstract class LocusWalker extends Walker {
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/RemoveProgramRecords.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/RemoveProgramRecords.java
deleted file mode 100644
index af7df6363..000000000
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/RemoveProgramRecords.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
-* Copyright 2012-2016 Broad Institute, Inc.
-*
-* Permission is hereby granted, free of charge, to any person
-* obtaining a copy of this software and associated documentation
-* files (the "Software"), to deal in the Software without
-* restriction, including without limitation the rights to use,
-* copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following
-* conditions:
-*
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-package org.broadinstitute.gatk.engine.walkers;
-
-/**
- * Created with IntelliJ IDEA.
- * User: thibault
- * Date: 8/2/12
- * Time: 1:58 PM
- * To change this template use File | Settings | File Templates.
- */
-
-import java.lang.annotation.*;
-
-/**
- * Indicates that program records should be removed from SAM headers by default for this walker
- */
-@Documented
-@Inherited
-@Retention(RetentionPolicy.RUNTIME)
-@Target(ElementType.TYPE)
-public @interface RemoveProgramRecords {
-}
From 70b2575c2937bf8f5674a7785132a4e3e07dff45 Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Thu, 1 Jun 2017 16:36:52 -0400
Subject: [PATCH 42/58] Move htsjdk to ver 2.10.0 and picard to ver 2.9.2
---
.../UnifiedGenotyperIndelCallingIntegrationTest.java | 4 +++-
.../haplotypecaller/HaplotypeCallerIntegrationTest.java | 6 ++++--
.../HaplotypeCallerModesIntegrationTest.java | 6 ++++--
public/gatk-root/pom.xml | 4 ++--
4 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java
index 5c54252d7..0d9b8628c 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java
@@ -204,8 +204,10 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
// No testing of MD5 here, we previously blew up due to a 0 length haplotypes, so we just need to pass
@Test
public void testHaplotype0Length() {
+ final String outputVCF = createTempFile("temp", ".vcf").getAbsolutePath();
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
- "-T UnifiedGenotyper --disableDithering -I " + privateTestDir + "haplotype0.bam -L 20:47507681 -R " + b37KGReference + " -baq CALCULATE_AS_NECESSARY -glm BOTH -o /dev/null",
+ "-T UnifiedGenotyper --disableDithering -I " + privateTestDir + "haplotype0.bam -L 20:47507681 -R " + b37KGReference +
+ " -baq CALCULATE_AS_NECESSARY -glm BOTH -o " + outputVCF,
0,
Collections.emptyList());
executeTest("testHaplotype0Length", spec);
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
index f7dde74c5..59f77e3c9 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@@ -297,7 +297,8 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestDoesNotFailOnBadRefBase() {
// don't care about the output - just want to make sure it doesn't fail
- final String base = String.format("-T HaplotypeCaller --disableDithering -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, privateTestDir + "NA12878.readsOverBadBase.chr3.bam") + " --no_cmdline_in_header -o /dev/null -L 3:60830000-60840000 --minPruning 3 -stand_call_conf 2";
+ final String outputVCF = createTempFile("temp", ".vcf").getAbsolutePath();
+ final String base = String.format("-T HaplotypeCaller --disableDithering -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, privateTestDir + "NA12878.readsOverBadBase.chr3.bam") + " --no_cmdline_in_header -o " + outputVCF + " -L 3:60830000-60840000 --minPruning 3 -stand_call_conf 2";
final WalkerTestSpec spec = new WalkerTestSpec(base, Collections.emptyList());
executeTest("HCTestDoesNotFailOnBadRefBase: ", spec);
}
@@ -328,9 +329,10 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testLeftAlignmentBamOutBugFix() throws FileNotFoundException {
+ final String outputVCF = createTempFile("temp", ".vcf").getAbsolutePath();
final String md5BAMOut = "27e729df3b166c81792a62a5b57ef7b3";
final String base = String.format("-T HaplotypeCaller -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, LEFT_ALIGNMENT_BAMOUT_TEST_INPUT)
- + " --no_cmdline_in_header -bamout %s -o /dev/null -L 1:11740000-11740700 --allowNonUniqueKmersInRef";
+ + " --no_cmdline_in_header -bamout %s -o " + outputVCF + " -L 1:11740000-11740700 --allowNonUniqueKmersInRef";
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(md5BAMOut));
executeTest("LeftAlignmentBamOutBugFix", spec);
validateForwardedProgramRecords(new ArrayList<>(Arrays.asList(new File(LEFT_ALIGNMENT_BAMOUT_TEST_INPUT))), md5BAMOut);
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerModesIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerModesIntegrationTest.java
index d1bf66c97..3cc756cdb 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerModesIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerModesIntegrationTest.java
@@ -84,9 +84,11 @@ public class HaplotypeCallerModesIntegrationTest extends WalkerTest {
}
public void HCTestBamWriter(final HaplotypeBAMWriter.Type type, final String md5) {
+ final String outputVCF = createTempFile("temp", ".vcf").getAbsolutePath();
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
- "-T HaplotypeCaller -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " " + ALWAYS_LOAD_VECTOR_HMM + " -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam -o /dev/null " +
- "-bamout %s -L 20:10,000,000-10,010,000 -bamWriterType " + type, 1,
+ "-T HaplotypeCaller -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " " + ALWAYS_LOAD_VECTOR_HMM + " -R " + b37KGReference +
+ " --no_cmdline_in_header -I " + privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam -o " + outputVCF +
+ " -bamout %s -L 20:10,000,000-10,010,000 -bamWriterType " + type, 1,
Arrays.asList(md5));
executeTest("HC writing bams with mode " + type, spec);
}
diff --git a/public/gatk-root/pom.xml b/public/gatk-root/pom.xml
index e6427dae5..96481ce57 100644
--- a/public/gatk-root/pom.xml
+++ b/public/gatk-root/pom.xml
@@ -44,8 +44,8 @@
org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter
- 2.9.1
- 2.9.0
+ 2.10.0
+ 2.9.2
From e8b42c4dfd7b2ebbdceafd28b0e5af420c77219e Mon Sep 17 00:00:00 2001
From: Ernesto Brau
Date: Thu, 6 Apr 2017 10:42:56 -0400
Subject: [PATCH 43/58] Use GKL's implementation of Vector Logless Pair HMM
Fixed MuTect2 tests by removing old -alwaysloadVectorHmm flag; upgraded to GKL 0.5.2; FASTEST_AVAILABLE pairhmm option; improved documenting comments in code
Made FASTEST_AVAIALABLE the default for the pairHMM cmdline argument
Make LOGLESS_CACHING the default pairHMM unified cmdline argument, fix error message if choose an unrecognized pairHMM
Updae MD5s
---
.../gatk/tools/walkers/cancer/m2/MuTect2.java | 4 +-
.../genotyper/UnifiedGenotypingEngine.java | 2 +-
.../haplotypecaller/HaplotypeCaller.java | 4 +-
.../LikelihoodEngineArgumentCollection.java | 29 +-
.../PairHMMLikelihoodCalculationEngine.java | 39 +-
.../PairHMMNativeArgumentCollection.java | 79 +++
.../utils/pairhmm/DebugJNILoglessPairHMM.java | 5 +-
.../utils/pairhmm/VectorLoglessPairHMM.java | 273 ++-------
.../cancer/m2/MuTect2IntegrationTest.java | 4 +-
...LikelihoodCalculationEnginesBenchmark.java | 2 +-
...lexAndSymbolicVariantsIntegrationTest.java | 17 +-
.../HaplotypeCallerGVCFIntegrationTest.java | 75 ++-
.../HaplotypeCallerIntegrationTest.java | 61 +-
.../HaplotypeCallerModesIntegrationTest.java | 4 +-
...aplotypeCallerParallelIntegrationTest.java | 4 +-
...MMLikelihoodCalculationEngineUnitTest.java | 2 +-
...ngLikelihoodCalculationEngineUnitTest.java | 2 +-
public/VectorPairHMM/README.md | 72 ---
public/VectorPairHMM/pom.xml | 122 ----
public/VectorPairHMM/src/main/c++/.gitignore | 16 -
.../src/main/c++/LoadTimeInitializer.cc | 205 -------
.../src/main/c++/LoadTimeInitializer.h | 86 ---
public/VectorPairHMM/src/main/c++/Makefile | 126 ----
public/VectorPairHMM/src/main/c++/Sandbox.cc | 106 ----
public/VectorPairHMM/src/main/c++/Sandbox.h | 96 ---
.../VectorPairHMM/src/main/c++/Sandbox.java | 306 ----------
.../c++/Sandbox_JNIHaplotypeDataHolderClass.h | 13 -
.../main/c++/Sandbox_JNIReadDataHolderClass.h | 13 -
.../main/c++/avx_function_instantiations.cc | 45 --
public/VectorPairHMM/src/main/c++/baseline.cc | 157 -----
.../src/main/c++/common_data_structure.h | 215 -------
.../src/main/c++/define-double.h | 205 -------
.../VectorPairHMM/src/main/c++/define-float.h | 206 -------
.../src/main/c++/define-sse-double.h | 173 ------
.../src/main/c++/define-sse-float.h | 173 ------
public/VectorPairHMM/src/main/c++/headers.h | 71 ---
.../VectorPairHMM/src/main/c++/jni_common.h | 60 --
public/VectorPairHMM/src/main/c++/jnidebug.h | 191 ------
...tk_utils_pairhmm_DebugJNILoglessPairHMM.cc | 175 ------
...atk_utils_pairhmm_DebugJNILoglessPairHMM.h | 86 ---
...gatk_utils_pairhmm_VectorLoglessPairHMM.cc | 416 -------------
..._gatk_utils_pairhmm_VectorLoglessPairHMM.h | 96 ---
.../src/main/c++/pairhmm-1-base.cc | 65 --
.../src/main/c++/pairhmm-template-kernel.cc | 380 ------------
.../src/main/c++/pairhmm-template-main.cc | 113 ----
public/VectorPairHMM/src/main/c++/run.sh | 32 -
.../src/main/c++/shift_template.c | 113 ----
.../main/c++/sse_function_instantiations.cc | 44 --
public/VectorPairHMM/src/main/c++/template.h | 119 ----
public/VectorPairHMM/src/main/c++/utils.cc | 567 ------------------
public/VectorPairHMM/src/main/c++/utils.h | 85 ---
public/gatk-root/pom.xml | 2 +-
public/gatk-tools-public/pom.xml | 5 +
.../gatk/utils/pairhmm/PairHMM.java | 31 +-
.../utils/pairhmm/libVectorLoglessPairHMM.so | Bin 479437 -> 0 bytes
55 files changed, 262 insertions(+), 5330 deletions(-)
create mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMNativeArgumentCollection.java
delete mode 100644 public/VectorPairHMM/README.md
delete mode 100644 public/VectorPairHMM/pom.xml
delete mode 100644 public/VectorPairHMM/src/main/c++/.gitignore
delete mode 100644 public/VectorPairHMM/src/main/c++/LoadTimeInitializer.cc
delete mode 100644 public/VectorPairHMM/src/main/c++/LoadTimeInitializer.h
delete mode 100644 public/VectorPairHMM/src/main/c++/Makefile
delete mode 100644 public/VectorPairHMM/src/main/c++/Sandbox.cc
delete mode 100644 public/VectorPairHMM/src/main/c++/Sandbox.h
delete mode 100644 public/VectorPairHMM/src/main/c++/Sandbox.java
delete mode 100644 public/VectorPairHMM/src/main/c++/Sandbox_JNIHaplotypeDataHolderClass.h
delete mode 100644 public/VectorPairHMM/src/main/c++/Sandbox_JNIReadDataHolderClass.h
delete mode 100644 public/VectorPairHMM/src/main/c++/avx_function_instantiations.cc
delete mode 100644 public/VectorPairHMM/src/main/c++/baseline.cc
delete mode 100644 public/VectorPairHMM/src/main/c++/common_data_structure.h
delete mode 100644 public/VectorPairHMM/src/main/c++/define-double.h
delete mode 100644 public/VectorPairHMM/src/main/c++/define-float.h
delete mode 100644 public/VectorPairHMM/src/main/c++/define-sse-double.h
delete mode 100644 public/VectorPairHMM/src/main/c++/define-sse-float.h
delete mode 100644 public/VectorPairHMM/src/main/c++/headers.h
delete mode 100644 public/VectorPairHMM/src/main/c++/jni_common.h
delete mode 100644 public/VectorPairHMM/src/main/c++/jnidebug.h
delete mode 100644 public/VectorPairHMM/src/main/c++/org_broadinstitute_gatk_utils_pairhmm_DebugJNILoglessPairHMM.cc
delete mode 100644 public/VectorPairHMM/src/main/c++/org_broadinstitute_gatk_utils_pairhmm_DebugJNILoglessPairHMM.h
delete mode 100644 public/VectorPairHMM/src/main/c++/org_broadinstitute_gatk_utils_pairhmm_VectorLoglessPairHMM.cc
delete mode 100644 public/VectorPairHMM/src/main/c++/org_broadinstitute_gatk_utils_pairhmm_VectorLoglessPairHMM.h
delete mode 100644 public/VectorPairHMM/src/main/c++/pairhmm-1-base.cc
delete mode 100644 public/VectorPairHMM/src/main/c++/pairhmm-template-kernel.cc
delete mode 100644 public/VectorPairHMM/src/main/c++/pairhmm-template-main.cc
delete mode 100755 public/VectorPairHMM/src/main/c++/run.sh
delete mode 100644 public/VectorPairHMM/src/main/c++/shift_template.c
delete mode 100644 public/VectorPairHMM/src/main/c++/sse_function_instantiations.cc
delete mode 100644 public/VectorPairHMM/src/main/c++/template.h
delete mode 100644 public/VectorPairHMM/src/main/c++/utils.cc
delete mode 100644 public/VectorPairHMM/src/main/c++/utils.h
delete mode 100644 public/gatk-utils/src/main/resources/org/broadinstitute/gatk/utils/pairhmm/libVectorLoglessPairHMM.so
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java
index 2f5ad5a38..f329c4f6c 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java
@@ -101,6 +101,8 @@ import org.broadinstitute.gatk.utils.sam.*;
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
+import org.broadinstitute.gatk.nativebindings.pairhmm.PairHMMNativeArguments;
+
import java.io.FileNotFoundException;
import java.util.*;
@@ -874,7 +876,7 @@ public class MuTect2 extends ActiveRegionWalker, Integer> i
* @return never {@code null}.
*/
private ReadLikelihoodCalculationEngine createLikelihoodCalculationEngine() {
- return new PairHMMLikelihoodCalculationEngine( (byte)LEAC.gcpHMM, LEAC.pairHMM, LEAC.pairHMMSub, LEAC.alwaysLoadVectorLoglessPairHMMLib, log10GlobalReadMismappingRate, LEAC.noFpga, pcrErrorModel );
+ return new PairHMMLikelihoodCalculationEngine( (byte)LEAC.gcpHMM, LEAC.pairHMM, LEAC.pairHMMNativeArgs.getPairHMMArgs(), log10GlobalReadMismappingRate, LEAC.noFpga, pcrErrorModel );
}
/**
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java
index d1774160e..ce56ed04e 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java
@@ -248,7 +248,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine, In
private ReadLikelihoodCalculationEngine createLikelihoodCalculationEngine() {
switch (likelihoodEngineImplementation) {
case PairHMM:
- return new PairHMMLikelihoodCalculationEngine( (byte) LEAC.gcpHMM, LEAC.pairHMM, LEAC.pairHMMSub, LEAC.alwaysLoadVectorLoglessPairHMMLib, log10GlobalReadMismappingRate, LEAC.noFpga, pcrErrorModel );
+ return new PairHMMLikelihoodCalculationEngine( (byte) LEAC.gcpHMM, LEAC.pairHMM, LEAC.pairHMMNativeArgs.getPairHMMArgs(), log10GlobalReadMismappingRate, LEAC.noFpga, pcrErrorModel );
case GraphBased:
return new GraphBasedLikelihoodCalculationEngine( (byte) LEAC.gcpHMM,log10GlobalReadMismappingRate, heterogeneousKmerSizeResolution, HCAC.DEBUG, RTAC.debugGraphTransformations);
case Random:
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/LikelihoodEngineArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/LikelihoodEngineArgumentCollection.java
index d65daad3e..a82cdcdf3 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/LikelihoodEngineArgumentCollection.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/LikelihoodEngineArgumentCollection.java
@@ -53,6 +53,7 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.broadinstitute.gatk.utils.commandline.Advanced;
import org.broadinstitute.gatk.utils.commandline.Argument;
+import org.broadinstitute.gatk.utils.commandline.ArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.Hidden;
import org.broadinstitute.gatk.utils.pairhmm.PairHMM;
@@ -71,29 +72,7 @@ public class LikelihoodEngineArgumentCollection {
*/
@Hidden
@Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for genotype likelihood calculations", required = false)
- public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING;
-
- /**
- * This argument is intended for use in the test suite only. It gives developers the ability to select of the
- * hardware dependent vectorized implementation of the vectorized PairHMM library (pairHMM=VECTOR_LOGLESS_CACHING).
- * For normal usage, you should rely on the architecture auto-detection.
- */
- @Hidden
- @Advanced
- @Argument(fullName = "pair_hmm_sub_implementation", shortName = "pairHMMSub", doc = "The PairHMM machine-dependent sub-implementation to use for genotype likelihood calculations", required = false)
- public PairHMM.HMM_SUB_IMPLEMENTATION pairHMMSub = PairHMM.HMM_SUB_IMPLEMENTATION.ENABLE_ALL;
-
- /**
- * This argument is intended for use in the test suite only. It gives developers the ability to load different
- * hardware dependent sub-implementations (-pairHMMSub) of the vectorized PairHMM library (-pairHMM=VECTOR_LOGLESS_CACHING)
- * for each test. Without this option, the library is only loaded once (for the first test executed in the suite) even if
- * subsequent tests specify a different implementation.
- * Each test will output the corresponding library loading messages.
- */
- @Hidden
- @Advanced
- @Argument(fullName = "always_load_vector_logless_PairHMM_lib", shortName = "alwaysloadVectorHMM", doc = "Load the vector logless PairHMM library each time a GATK run is initiated in the test suite", required = false)
- public boolean alwaysLoadVectorLoglessPairHMMLib = false;
+ public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.FASTEST_AVAILABLE;
/**
* The phredScaledGlobalReadMismappingRate reflects the average global mismapping rate of all reads, regardless of their
@@ -115,6 +94,6 @@ public class LikelihoodEngineArgumentCollection {
@Argument(fullName="noFpga", shortName="noFpga", doc="Disable the use of the FPGA HMM implementation", required = false)
public boolean noFpga = false;
-
-
+ @ArgumentCollection
+ public PairHMMNativeArgumentCollection pairHMMNativeArgs = new PairHMMNativeArgumentCollection();
}
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java
index 9ae44688a..5b22138fb 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java
@@ -67,6 +67,7 @@ import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.pairhmm.*;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.variant.TandemRepeatFinder;
+import org.broadinstitute.gatk.nativebindings.pairhmm.PairHMMNativeArguments;
import java.io.File;
import java.io.FileNotFoundException;
@@ -82,8 +83,7 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
private final double log10globalReadMismappingRate;
private final PairHMM.HMM_IMPLEMENTATION hmmType;
- private final PairHMM.HMM_SUB_IMPLEMENTATION hmmSubType;
- private final boolean alwaysLoadVectorLoglessPairHMMLib;
+ private final PairHMMNativeArguments pairHmmNativeArgs;
private final boolean noFpga;
private final ThreadLocal pairHMMThreadLocal = new ThreadLocal() {
@@ -98,24 +98,34 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
else
return new CnyPairHMM();
case VECTOR_LOGLESS_CACHING:
- try
- {
- return new VectorLoglessPairHMM(hmmSubType, alwaysLoadVectorLoglessPairHMMLib);
+ return new VectorLoglessPairHMM(VectorLoglessPairHMM.Implementation.AVX, pairHmmNativeArgs);
+ case VECTOR_LOGLESS_CACHING_OMP:
+ return new VectorLoglessPairHMM(VectorLoglessPairHMM.Implementation.OMP, pairHmmNativeArgs);
+ case FASTEST_AVAILABLE:
+ try {
+ return new VectorLoglessPairHMM(VectorLoglessPairHMM.Implementation.OMP, pairHmmNativeArgs);
}
- catch(UnsatisfiedLinkError ule)
- {
- logger.warn("Failed to load native library for VectorLoglessPairHMM - using Java implementation of LOGLESS_CACHING");
+ catch(UserException.HardwareFeatureException hfe) {
+ logger.warn("OpenMP multi-threaded AVX-accelerated native PairHMM implementation is not supported");
+ }
+ try {
+ return new VectorLoglessPairHMM(VectorLoglessPairHMM.Implementation.AVX, pairHmmNativeArgs);
+ }
+ catch(UserException.HardwareFeatureException hfe) {
+ logger.warn("AVX-accelerated native PairHMM implementation is not supported. Falling back to slower LOGLESS_CACHING implementation");
return new LoglessPairHMM();
}
case DEBUG_VECTOR_LOGLESS_CACHING:
- return new DebugJNILoglessPairHMM(PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING, hmmSubType, alwaysLoadVectorLoglessPairHMMLib);
+ return new DebugJNILoglessPairHMM(PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING, pairHmmNativeArgs);
case ARRAY_LOGLESS:
if (noFpga || !CnyPairHMM.isAvailable())
return new ArrayLoglessPairHMM();
else
return new CnyPairHMM();
default:
- throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are ORIGINAL, EXACT, CACHING, LOGLESS_CACHING, and ARRAY_LOGLESS.");
+ throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or " +
+ "incompatible with the HaplotypeCaller. Acceptable options are ORIGINAL, EXACT, CACHING, LOGLESS_CACHING, " +
+ "VECTOR_LOGLESS_CACHING, VECTOR_LOGLESS_CACHING_OMP, and ARRAY_LOGLESS.");
}
}
};
@@ -160,8 +170,7 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
*
* @param constantGCP the gap continuation penalty to use with the PairHMM
* @param hmmType the type of the HMM to use
- * @param hmmSubType the type of the machine dependent sub-implementation of HMM to use
- * @param alwaysLoadVectorLoglessPairHMMLib always load the vector logless HMM library instead of once
+ * @param pairHmmNativeArgs arguments to the vector logless PairHMM implementation
* @param log10globalReadMismappingRate the global mismapping probability, in log10(prob) units. A value of
* -3 means that the chance that a read doesn't actually belong at this
* location in the genome is 1 in 1000. The effect of this parameter is
@@ -173,11 +182,9 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
* @param noFpga disable FPGA acceleration
* @param pcrErrorModel model to correct for PCR indel artifacts
*/
- public PairHMMLikelihoodCalculationEngine( final byte constantGCP, final PairHMM.HMM_IMPLEMENTATION hmmType, final PairHMM.HMM_SUB_IMPLEMENTATION hmmSubType,
- final boolean alwaysLoadVectorLoglessPairHMMLib, final double log10globalReadMismappingRate, final boolean noFpga, final PCR_ERROR_MODEL pcrErrorModel ) {
+ public PairHMMLikelihoodCalculationEngine( final byte constantGCP, final PairHMM.HMM_IMPLEMENTATION hmmType, final PairHMMNativeArguments pairHmmNativeArgs, final double log10globalReadMismappingRate, final boolean noFpga, final PCR_ERROR_MODEL pcrErrorModel ) {
this.hmmType = hmmType;
- this.hmmSubType = hmmSubType;
- this.alwaysLoadVectorLoglessPairHMMLib = alwaysLoadVectorLoglessPairHMMLib;
+ this.pairHmmNativeArgs = pairHmmNativeArgs;
this.constantGCP = constantGCP;
this.log10globalReadMismappingRate = log10globalReadMismappingRate;
this.noFpga = noFpga;
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMNativeArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMNativeArgumentCollection.java
new file mode 100644
index 000000000..baf246bd3
--- /dev/null
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/PairHMMNativeArgumentCollection.java
@@ -0,0 +1,79 @@
+/*
+* By downloading the PROGRAM you agree to the following terms of use:
+*
+* BROAD INSTITUTE
+* SOFTWARE LICENSE AGREEMENT
+* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
+*
+* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE").
+*
+* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
+* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
+* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
+*
+* 1. DEFINITIONS
+* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
+*
+* 2. LICENSE
+* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
+* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
+* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
+* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
+*
+* 3. PHONE-HOME FEATURE
+* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
+*
+* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
+* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
+* Copyright 2012-2016 Broad Institute, Inc.
+* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
+* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
+*
+* 5. INDEMNIFICATION
+* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
+*
+* 6. NO REPRESENTATIONS OR WARRANTIES
+* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
+* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
+*
+* 7. ASSIGNMENT
+* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
+*
+* 8. MISCELLANEOUS
+* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
+* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
+* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
+* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
+* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
+* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
+* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
+*/
+
+package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
+
+import org.broadinstitute.gatk.nativebindings.pairhmm.PairHMMNativeArguments;
+import org.broadinstitute.gatk.utils.commandline.Argument;
+import org.broadinstitute.gatk.utils.commandline.Hidden;
+
+/**
+ * Arguments for native PairHMM implementations
+ */
+public class PairHMMNativeArgumentCollection {
+
+ @Hidden
+ @Argument(fullName = "nativePairHmmThreads", shortName = "threads", doc="How many threads should a native pairHMM implementation use", required = false)
+ private int pairHmmNativeThreads = 1;
+
+ @Hidden
+ @Argument(fullName = "useDoublePrecision", shortName = "useDoublePrecision", doc="use double precision in the native pairHmm. " +
+ "This is slower but matches the java implementation better", required = false)
+ private boolean useDoublePrecision = false;
+
+ public PairHMMNativeArguments getPairHMMArgs(){
+ final PairHMMNativeArguments args = new PairHMMNativeArguments();
+ args.maxNumberOfThreads = pairHmmNativeThreads;
+ args.useDoublePrecision = useDoublePrecision;
+ return args;
+ }
+
+}
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/DebugJNILoglessPairHMM.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/DebugJNILoglessPairHMM.java
index 645039614..595b28212 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/DebugJNILoglessPairHMM.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/DebugJNILoglessPairHMM.java
@@ -59,6 +59,7 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
+import org.broadinstitute.gatk.nativebindings.pairhmm.PairHMMNativeArguments;
import java.io.BufferedWriter;
import java.io.File;
@@ -92,11 +93,11 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
protected HashMap filenameToWriter = new HashMap();
private JNILoglessPairHMM jniPairHMM = null;
- public DebugJNILoglessPairHMM(final PairHMM.HMM_IMPLEMENTATION hmmType, PairHMM.HMM_SUB_IMPLEMENTATION pairHMMSub, final boolean alwaysLoadVectorLoglessPairHMMLib) {
+ public DebugJNILoglessPairHMM(final PairHMM.HMM_IMPLEMENTATION hmmType, PairHMMNativeArguments pairHmmNativeArgs) {
super();
switch(hmmType) {
case VECTOR_LOGLESS_CACHING:
- jniPairHMM = new VectorLoglessPairHMM(pairHMMSub, alwaysLoadVectorLoglessPairHMMLib);
+ jniPairHMM = new VectorLoglessPairHMM(VectorLoglessPairHMM.Implementation.AVX, pairHmmNativeArgs);
break;
default:
throw new UserException.BadArgumentValue("pairHMM","Specified JNIPairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are VECTOR_LOGLESS_CACHING");
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/VectorLoglessPairHMM.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/VectorLoglessPairHMM.java
index f535cf2ac..d096bbf33 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/VectorLoglessPairHMM.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/pairhmm/VectorLoglessPairHMM.java
@@ -51,7 +51,13 @@
package org.broadinstitute.gatk.utils.pairhmm;
+import com.intel.gkl.pairhmm.IntelPairHmm;
+import com.intel.gkl.pairhmm.IntelPairHmmOMP;
import org.apache.log4j.Logger;
+import org.broadinstitute.gatk.nativebindings.pairhmm.HaplotypeDataHolder;
+import org.broadinstitute.gatk.nativebindings.pairhmm.PairHMMNativeArguments;
+import org.broadinstitute.gatk.nativebindings.pairhmm.PairHMMNativeBinding;
+import org.broadinstitute.gatk.nativebindings.pairhmm.ReadDataHolder;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
@@ -71,100 +77,61 @@ import java.util.Map;
*/
public class VectorLoglessPairHMM extends JNILoglessPairHMM {
+ /**
+ * Implementation of PairHMM to use
+ */
+ public enum Implementation
+ {
+ /* Use AVX acceleration */
+ AVX,
+ /* Use AVX acceleration with mult-threading via OpenMP */
+ OMP
+ }
+
protected final static Logger logger = Logger.getLogger(VectorLoglessPairHMM.class);
-
- //Used to copy references to byteArrays to JNI from reads
- protected class JNIReadDataHolderClass {
- public byte[] readBases = null;
- public byte[] readQuals = null;
- public byte[] insertionGOP = null;
- public byte[] deletionGOP = null;
- public byte[] overallGCP = null;
- }
-
- //Used to copy references to byteArrays to JNI from haplotypes
- protected class JNIHaplotypeDataHolderClass {
- public byte[] haplotypeBases = null;
- }
-
- /**
- * Return 64-bit mask representing machine capabilities
- * Bit 0 is LSB, bit 63 MSB
- * Bit 0 represents sse4.1 availability
- * Bit 1 represents sse4.2 availability
- * Bit 2 represents AVX availability
- */
- public native long jniGetMachineType();
-
- /**
- * Function to initialize the fields of JNIReadDataHolderClass and JNIHaplotypeDataHolderClass from JVM.
- * C++ codegets FieldIDs for these classes once and re-uses these IDs for the remainder of the program. Field IDs do not
- * change per JVM session
- *
- * @param readDataHolderClass class type of JNIReadDataHolderClass
- * @param haplotypeDataHolderClass class type of JNIHaplotypeDataHolderClass
- * @param mask a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing bits in the mask
- */
- private native void jniInitializeClassFieldsAndMachineMask(Class> readDataHolderClass, Class> haplotypeDataHolderClass, long mask);
-
- private static Boolean isVectorLoglessPairHMMLibraryLoaded = false;
+ private final PairHMMNativeBinding pairHmm;
//The constructor is called only once inside PairHMMLikelihoodCalculationEngine
- public VectorLoglessPairHMM(final PairHMM.HMM_SUB_IMPLEMENTATION pairHMMSub, final boolean alwaysLoadVectorLoglessPairHMMLib) throws UserException.HardwareFeatureException {
- super();
+ public VectorLoglessPairHMM(Implementation implementation, PairHMMNativeArguments args) throws UserException.HardwareFeatureException {
+ final boolean isSupported;
- synchronized (isVectorLoglessPairHMMLibraryLoaded) {
- // Get the mask for the requested hardware sub-implementation
- // If a specifically requested hardware feature can not be supported, throw an exception
- long mask = pairHMMSub.getMask();
- throwIfHardwareFeatureNotSupported(mask, pairHMMSub);
-
- // Load the library and initialize the FieldIDs
- // Load if not loaded or if the the always load flag is true
- if (!isVectorLoglessPairHMMLibraryLoaded || alwaysLoadVectorLoglessPairHMMLib) {
- try
- {
- //Try loading from Java's library path first
- //Useful if someone builds his/her own library and wants to override the bundled
- //implementation without modifying the Java code
- System.loadLibrary("VectorLoglessPairHMM");
- logger.info("libVectorLoglessPairHMM found in JVM library path");
- } catch (UnsatisfiedLinkError ule) {
- //Could not load from Java's library path - try unpacking from jar
- try
- {
- logger.debug("libVectorLoglessPairHMM not found in JVM library path - trying to unpack from GATK jar file");
- loadLibraryFromJar("/org/broadinstitute/gatk/utils/pairhmm/libVectorLoglessPairHMM.so");
- logger.info("libVectorLoglessPairHMM unpacked successfully from GATK jar file");
- } catch (IOException ioe) {
- //Throw the UnsatisfiedLinkError to make it clear to the user what failed
- throw ule;
- }
+ switch (implementation) {
+ case AVX:
+ pairHmm = new IntelPairHmm();
+ isSupported = pairHmm.load(null);
+ if (!isSupported) {
+ throw new UserException.HardwareFeatureException("Machine does not support AVX PairHMM.");
}
- logger.info("Using vectorized implementation of PairHMM");
- isVectorLoglessPairHMMLibraryLoaded = true;
+ logger.info("Using AVX-accelerated native PairHMM implementation");
+ break;
- //need to do this only once
- jniInitializeClassFieldsAndMachineMask(JNIReadDataHolderClass.class, JNIHaplotypeDataHolderClass.class, mask);
- }
+ case OMP:
+ pairHmm = new IntelPairHmmOMP();
+ isSupported = pairHmm.load(null);
+ if (!isSupported) {
+ throw new UserException.HardwareFeatureException("Machine does not support OpenMP AVX PairHMM.");
+ }
+ logger.info("Using OpenMP multi-threaded AVX-accelerated native PairHMM implementation");
+ break;
+
+ default:
+ throw new UserException.HardwareFeatureException("Unknown PairHMM implementation.");
}
- }
- private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray);
+ // instantiate and initialize IntelPairHmm
+ pairHmm.initialize(args);
+ }
//Hold the mapping between haplotype and index in the list of Haplotypes passed to initialize
//Use this mapping in computeLikelihoods to find the likelihood value corresponding to a given Haplotype
private HashMap haplotypeToHaplotypeListIdxMap = new HashMap<>();
- private JNIHaplotypeDataHolderClass[] mHaplotypeDataArray;
+ private HaplotypeDataHolder[] mHaplotypeDataArray;
@Override
public HashMap getHaplotypeToHaplotypeListIdxMap() {
return haplotypeToHaplotypeListIdxMap;
}
- //Used to transfer data to JNI
- //Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region
-
/**
* {@inheritDoc}
*/
@@ -172,54 +139,35 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
public void initialize(final List haplotypes, final Map> perSampleReadList,
final int readMaxLength, final int haplotypeMaxLength) {
int numHaplotypes = haplotypes.size();
- mHaplotypeDataArray = new JNIHaplotypeDataHolderClass[numHaplotypes];
+ mHaplotypeDataArray = new HaplotypeDataHolder[numHaplotypes];
int idx = 0;
haplotypeToHaplotypeListIdxMap.clear();
for (final Haplotype currHaplotype : haplotypes) {
- mHaplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass();
+ mHaplotypeDataArray[idx] = new HaplotypeDataHolder();
mHaplotypeDataArray[idx].haplotypeBases = currHaplotype.getBases();
haplotypeToHaplotypeListIdxMap.put(currHaplotype, idx);
++idx;
}
- jniInitializeHaplotypes(numHaplotypes, mHaplotypeDataArray);
}
- /**
- * Tell JNI to release arrays - really important if native code is directly accessing Java memory, if not
- * accessing Java memory directly, still important to release memory from C++
- */
- private native void jniFinalizeRegion();
-
- /**
- * {@inheritDoc}
- */
- @Override
- public void finalizeRegion()
- {
- jniFinalizeRegion();
- }
-
- /**
- * Real compute kernel
- */
- private native void jniComputeLikelihoods(int numReads, int numHaplotypes, JNIReadDataHolderClass[] readDataArray,
- JNIHaplotypeDataHolderClass[] haplotypeDataArray, double[] likelihoodArray, int maxNumThreadsToUse);
-
/**
* {@inheritDoc}
*/
@Override
public void computeLikelihoods(final ReadLikelihoods.Matrix likelihoods, final List processedReads, final Map gcp) {
- if (processedReads.isEmpty())
+ if (processedReads.isEmpty()) {
return;
- if (doProfiling)
+ }
+ if (doProfiling) {
startTime = System.nanoTime();
+ }
int readListSize = processedReads.size();
int numHaplotypes = likelihoods.alleleCount();
- JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize];
+
+ ReadDataHolder[] readDataArray = new ReadDataHolder[readListSize];
int idx = 0;
for (GATKSAMRecord read : processedReads) {
- readDataArray[idx] = new JNIReadDataHolderClass();
+ readDataArray[idx] = new ReadDataHolder();
readDataArray[idx].readBases = read.getReadBases();
readDataArray[idx].readQuals = read.getBaseQualities();
readDataArray[idx].insertionGOP = read.getBaseInsertionQualities();
@@ -229,12 +177,13 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
}
mLikelihoodArray = new double[readListSize * numHaplotypes]; //to store results
- if (doProfiling)
+ if (doProfiling) {
threadLocalSetupTimeDiff = (System.nanoTime() - startTime);
+ }
//for(reads)
// for(haplotypes)
// compute_full_prob()
- jniComputeLikelihoods(readListSize, numHaplotypes, readDataArray, mHaplotypeDataArray, mLikelihoodArray, 12);
+ pairHmm.computeLikelihoods(readDataArray, mHaplotypeDataArray, mLikelihoodArray);
int readIdx = 0;
for (int r = 0; r < readListSize; r++) {
@@ -249,6 +198,7 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
}
readIdx += numHaplotypes;
}
+
if (doProfiling) {
threadLocalPairHMMComputeTimeDiff = (System.nanoTime() - startTime);
//synchronized(doProfiling)
@@ -259,120 +209,11 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
}
}
- /**
- * Print final profiling information from native code
- */
- public native void jniClose();
-
@Override
public void close() {
+ pairHmm.done();
if (doProfiling)
logger.info("Time spent in setup for JNI call : " + (pairHMMSetupTime * 1e-9));
super.close();
- jniClose();
- }
-
- //Copied from http://frommyplayground.com/how-to-load-native-jni-library-from-jar
-
- /**
- * Loads library from current JAR archive
- *
- * The file from JAR is copied into system temporary directory and then loaded. The temporary file is deleted after exiting.
- * Method uses String as filename because the pathname is "abstract", not system-dependent.
- *
- * @param path The filename inside JAR as absolute path (beginning with '/'), e.g. /package/File.ext
- * @throws IOException If temporary file creation or read/write operation fails
- * @throws IllegalArgumentException If source file (param path) does not exist
- * @throws IllegalArgumentException If the path is not absolute or if the filename is shorter than three characters (restriction of {@see File#createTempFile(java.lang.String, java.lang.String)}).
- */
- public static void loadLibraryFromJar(String path) throws IOException {
-
- if (!path.startsWith("/")) {
- throw new IllegalArgumentException("The path to be absolute (start with '/').");
- }
-
- // Obtain filename from path
- String[] parts = path.split("/");
- String filename = (parts.length > 1) ? parts[parts.length - 1] : null;
-
- // Split filename to prexif and suffix (extension)
- String prefix = "";
- String suffix = null;
- if (filename != null) {
- parts = filename.split("\\.", 2);
- prefix = parts[0];
- suffix = (parts.length > 1) ? "." + parts[parts.length - 1] : null; // Thanks, davs! :-)
- }
-
- // Check if the filename is okay
- if (filename == null || prefix.length() < 3) {
- throw new IllegalArgumentException("The filename has to be at least 3 characters long.");
- }
-
- // Prepare temporary file
- File temp = File.createTempFile(prefix, suffix);
- //System.out.println("Temp lib file "+temp.getAbsolutePath());
- temp.deleteOnExit();
-
- if (!temp.exists()) {
- throw new FileNotFoundException("File " + temp.getAbsolutePath() + " does not exist.");
- }
-
- // Prepare buffer for data copying
- byte[] buffer = new byte[1024];
- int readBytes;
-
- // Open and check input stream
- InputStream is = VectorLoglessPairHMM.class.getResourceAsStream(path);
- if (is == null) {
- throw new FileNotFoundException("File " + path + " was not found inside JAR.");
- }
-
- // Open output stream and copy data between source file in JAR and the temporary file
- OutputStream os = new FileOutputStream(temp);
- try {
- while ((readBytes = is.read(buffer)) != -1) {
- os.write(buffer, 0, readBytes);
- }
- } finally {
- // If read/write fails, close streams safely before throwing an exception
- os.close();
- is.close();
- }
-
- // Finally, load the library
- System.load(temp.getAbsolutePath());
- }
-
- /**
- * If the machine does not support the requested hardware feature, throw an exception
- *
- * If requesting a specific hardware feature, check if the machine supports this feature.
- * If it does not, throw an exception.
- *
- * @param mask a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing some bits in the mask.
- * @param pairHMMSub the PairHMM machine dependent sub-implementation to use for genotype likelihood calculations
- * @throws UserException.HardwareFeatureException if the hardware feature is not supported
- */
- private void throwIfHardwareFeatureNotSupported(long mask, PairHMM.HMM_SUB_IMPLEMENTATION pairHMMSub) throws UserException.HardwareFeatureException
- {
- if ( pairHMMSub.getIsSpecificHardwareRequest() ) {
- if ( !isHardwareFeatureSupported(mask) )
- throw new UserException.HardwareFeatureException("Machine does not support pairHMM hardware dependent sub-type = " + pairHMMSub);
- }
- }
-
- /**
- * Check if the machine supports the requested hardware feature
- *
- * Mask the bits for the hardware feature and check if they are set by the machine
- * If the bits are set, the machine supports this feature
- *
- * @param mask a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing some bits in the mask.
- * @return true of machine supports the requested hardware feature, false otherwise
- */
- private boolean isHardwareFeatureSupported(long mask)
- {
- return (mask & jniGetMachineType()) != 0x0;
}
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java
index feedfeb35..ae5b9d47c 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java
@@ -71,7 +71,7 @@ public class MuTect2IntegrationTest extends WalkerTest {
final static String DREAM3_FP_INTERVALS_FILE = privateTestDir + "m2_dream3.fp.intervals";
final String commandLine =
- "-T MuTect2 --no_cmdline_in_header -dt NONE --disableDithering -alwaysloadVectorHMM -pairHMM LOGLESS_CACHING -ip 50 -R %s --dbsnp %s --cosmic %s --normal_panel %s -I:tumor %s -I:normal %s -L %s";
+ "-T MuTect2 --no_cmdline_in_header -dt NONE --disableDithering -pairHMM LOGLESS_CACHING -ip 50 -R %s --dbsnp %s --cosmic %s --normal_panel %s -I:tumor %s -I:normal %s -L %s";
private void M2Test(String tumorBam, String normalBam, String intervals, String args, String md5) {
final String base = String.format(
@@ -90,7 +90,7 @@ public class MuTect2IntegrationTest extends WalkerTest {
private void m2TumorOnlyTest(String tumorBam, String intervals, String args, String md5) {
final String base = String.format(
- "-T MuTect2 --no_cmdline_in_header -dt NONE --disableDithering -alwaysloadVectorHMM -pairHMM LOGLESS_CACHING -ip 50 -R %s --dbsnp %s --cosmic %s --normal_panel %s -I:tumor %s -L %s",
+ "-T MuTect2 --no_cmdline_in_header -dt NONE --disableDithering -pairHMM LOGLESS_CACHING -ip 50 -R %s --dbsnp %s --cosmic %s --normal_panel %s -I:tumor %s -L %s",
hg19Reference, DBSNP, COSMIC, PON, tumorBam, intervals) +
" -o %s ";
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HCLikelihoodCalculationEnginesBenchmark.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HCLikelihoodCalculationEnginesBenchmark.java
index 7877ee59a..abc1136e7 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HCLikelihoodCalculationEnginesBenchmark.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HCLikelihoodCalculationEnginesBenchmark.java
@@ -126,7 +126,7 @@ public class HCLikelihoodCalculationEnginesBenchmark extends SimpleBenchmark {
public void timeLoglessPairHMM(final int reps) {
for (int i = 0; i < reps; i++) {
final PairHMMLikelihoodCalculationEngine engine = new PairHMMLikelihoodCalculationEngine((byte) 10,
- PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, PairHMM.HMM_SUB_IMPLEMENTATION.UNVECTORIZED, true, -3, true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE);
+ PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, null, -3, true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE);
engine.computeReadLikelihoods(dataSet.assemblyResultSet(), SampleListUtils.singletonList("anonymous"), Collections.singletonMap("anonymous", dataSet.readList()));
}
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java
index e7c2c4030..72b7ba61b 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java
@@ -61,22 +61,19 @@ import static org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCal
public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends WalkerTest {
- final static String HMM_SUB_IMPLEMENTATION = "UNVECTORIZED";
- final static String ALWAYS_LOAD_VECTOR_HMM = "-alwaysloadVectorHMM";
-
private void HCTestComplexVariants(String bam, String args, String md5) {
- final String base = String.format("-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 4";
+ final String base = String.format("-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 4";
final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec);
}
@Test
public void testHaplotypeCallerMultiSampleComplex1() {
- HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "590428bdfe466159cb8e1637aaa4f47c");
+ HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "93f89c04b4c74463f75da798cdcf5064");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
- final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 1";
+ final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 1";
final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testHaplotypeCallerSymbolicVariants: args=" + args, spec);
}
@@ -88,7 +85,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
}
private void HCTestComplexGGA(String bam, String args, String md5) {
- final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, bam) + " --no_cmdline_in_header -o %s -minPruning 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf";
+ final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, bam) + " --no_cmdline_in_header -o %s -minPruning 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testHaplotypeCallerComplexGGA: args=" + args, spec);
}
@@ -102,11 +99,11 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
- "82b53501bc3254def885e09866377e7c");
+ "d50cf0d1efd94227e930565a6d31de91");
}
private void HCTestComplexConsensusMode(String bam, String args, String md5) {
- final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, bam) + " --no_cmdline_in_header -o %s -consensus -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf -alleles " + validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf";
+ final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, bam) + " --no_cmdline_in_header -o %s -consensus -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf -alleles " + validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testHaplotypeCallerComplexConsensusMode: args=" + args, spec);
}
@@ -114,7 +111,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
- "47894766b0ce7d4aecd89e4938ac1c85");
+ "390236e17de3465c8ba778041f1670ad");
}
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
index 5e16a2ec0..c9d5fbb18 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
@@ -74,9 +74,6 @@ import java.util.zip.GZIPInputStream;
public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
- final static String HMM_SUB_IMPLEMENTATION = "UNVECTORIZED";
- final static String ALWAYS_LOAD_VECTOR_HMM = "-alwaysloadVectorHMM";
-
@DataProvider(name = "MyDataProviderHaploid")
public Object[][] makeMyDataProviderHaploid() {
List