*
* VQSR is probably the hardest part of the Best Practices to get right, so be sure to read the
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFs.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFs.java
index fffd52e4f..50b4c98d2 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFs.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFs.java
@@ -165,7 +165,6 @@ public class CombineGVCFs extends RodWalker vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit());
final Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
- headerLines.add(new VCFSimpleHeaderLine(GATKVCFConstants.SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG, GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_NAME, "Represents any possible spanning deletion allele at this location"));
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); // needed for gVCFs without DP tags
final Set samples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java
index 9e71a0818..b0bbe02ec 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java
@@ -219,7 +219,6 @@ public class GenotypeGVCFs extends RodWalker(vc, isSpanningEvent ? replaceWithNoCallsAndDels(vc) : remapAlleles(vc, refAllele, finalAlleleSet)));
}
// Add and to the end if at all required in in the output.
- if ( sawSpanningDeletion && (sawNonSpanningEvent || !removeNonRefSymbolicAllele) ) finalAlleleSet.add(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE);
+ if ( sawSpanningDeletion && (sawNonSpanningEvent || !removeNonRefSymbolicAllele) ) finalAlleleSet.add(Allele.SPAN_DEL);
if (!removeNonRefSymbolicAllele) finalAlleleSet.add(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
final List allelesList = new ArrayList<>(finalAlleleSet);
@@ -286,10 +288,15 @@ public class ReferenceConfidenceVariantContextMerger {
for (final Allele a : vc.getAlternateAlleles()) {
if (a.isSymbolic()) {
result.add(a);
- // we always skip when adding to finalAlleles; this is done outside if applies.
- // we also skip <*DEL> if there isn't a real alternate allele.
+ // we always skip when adding to finalAlleles; this is done outside if it applies.
+ // we also skip <*:DEL> if there isn't a real alternate allele.
if ( !a.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE) && !vc.isSymbolic() )
finalAlleles.add(a);
+ } else if ( a == Allele.SPAN_DEL ) {
+ result.add(a);
+ // we skip * if there isn't a real alternate allele.
+ if ( !vc.isBiallelic() )
+ finalAlleles.add(a);
} else if (a.isCalled()) {
final Allele newAllele;
if (extraBaseCount > 0) {
@@ -328,7 +335,7 @@ public class ReferenceConfidenceVariantContextMerger {
if ( allele.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE) )
replacement = allele;
else if ( allele.length() < vc.getReference().length() )
- replacement = GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE;
+ replacement = Allele.SPAN_DEL;
else
replacement = Allele.NO_CALL;
@@ -430,13 +437,72 @@ public class ReferenceConfidenceVariantContextMerger {
// create the index mapping, using the allele whenever such a mapping doesn't exist
for ( int i = 1; i < targetAlleles.size(); i++ ) {
- final int indexOfRemappedAllele = remappedAlleles.indexOf(targetAlleles.get(i));
+ final Allele targetAllele = targetAlleles.get(i);
+
+ // if there’s more than 1 DEL allele then we need to use the best one
+ if ( targetAllele == Allele.SPAN_DEL && g.hasPL() ) {
+ final int occurrences = Collections.frequency(remappedAlleles, Allele.SPAN_DEL);
+ if ( occurrences > 1 ) {
+ final int indexOfBestDel = indexOfBestDel(remappedAlleles, g.getPL(), g.getPloidy());
+ indexMapping[i] = ( indexOfBestDel == -1 ? indexOfNonRef : indexOfBestDel );
+ continue;
+ }
+ }
+
+ final int indexOfRemappedAllele = remappedAlleles.indexOf(targetAllele);
indexMapping[i] = indexOfRemappedAllele == -1 ? indexOfNonRef : indexOfRemappedAllele;
}
return indexMapping;
}
+ /**
+ * Returns the index of the best spanning deletion allele based on AD counts
+ *
+ * @param alleles the list of alleles
+ * @param PLs the list of corresponding PL values
+ * @param ploidy the ploidy of the sample
+ * @return the best index or -1 if not found
+ */
+ private static int indexOfBestDel(final List alleles, final int[] PLs, final int ploidy) {
+ int bestIndex = -1;
+ int bestPL = Integer.MAX_VALUE;
+
+ for ( int i = 0; i < alleles.size(); i++ ) {
+ if ( alleles.get(i) == Allele.SPAN_DEL ) {
+ final int homAltIndex = findHomIndex(i, ploidy, alleles.size());
+ final int PL = PLs[homAltIndex];
+ if ( PL < bestPL ) {
+ bestIndex = i;
+ bestPL = PL;
+ }
+ }
+ }
+
+ return bestIndex;
+ }
+
+ /**
+ * Returns the index of the PL that represents the homozygous genotype of the given i'th allele
+ *
+ * @param i the index of the allele with the list of alleles
+ * @param ploidy the ploidy of the sample
+ * @param numAlleles the total number of alleles
+ * @return the hom index
+ */
+ private static int findHomIndex(final int i, final int ploidy, final int numAlleles) {
+ // some quick optimizations for the common case
+ if ( ploidy == 2 )
+ return i + (i * (i + 1) / 2); // this is straight from the VCF spec on PLs
+ if ( ploidy == 1 )
+ return i;
+
+ final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(ploidy, numAlleles);
+ final int[] alleleIndexes = new int[ploidy];
+ Arrays.fill(alleleIndexes, i);
+ return calculator.allelesToIndex(alleleIndexes);
+ }
+
/**
* Generates a new AD array by adding zeros for missing alleles given the set of indexes of the Genotype's current
* alleles from the original AD.
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java
index 0816b00f2..6c139b2e9 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java
@@ -147,11 +147,19 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
@Test
public void testInvertFilter() {
WalkerTestSpec spec = new WalkerTestSpec(
- baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000 --invert_filter_expression", 1,
+ baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000 --invertFilterExpression", 1,
Arrays.asList("d478fd6bcf0884133fe2a47adf4cd765"));
executeTest("test inversion of selection of filter with separate names #2", spec);
}
+ @Test
+ public void testInvertJexlFilter() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ baseTestString() + " --filterName ABF -filter 'AlleleBalance >= 0.7' --filterName FSF -filter 'FisherStrand != 1.4' --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
+ Arrays.asList("6fa6cd89bfc8b6b4dfc3da25eb36d08b")); // Differs from testInvertFilter() because their VCF header FILTER description uses the -filter argument. Their filter statuses are identical.
+ executeTest("test inversion of selection of filter via JEXL with separate names #2", spec);
+ }
+
@Test
public void testGenotypeFilters1() {
WalkerTestSpec spec1 = new WalkerTestSpec(
@@ -207,8 +215,40 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testInvertGenotypeFilterExpression() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
- + " --genotypeFilterExpression 'DP < 8' --genotypeFilterName highDP -V " + privateTestDir + "filteringDepthInFormat.vcf --invert_genotype_filter_expression", 1,
+ + " --genotypeFilterExpression 'DP < 8' --genotypeFilterName highDP -V " + privateTestDir + "filteringDepthInFormat.vcf --invertGenotypeFilterExpression", 1,
Arrays.asList("d2664870e7145eb73a2295766482c823"));
executeTest("testInvertGenotypeFilterExpression", spec);
}
+
+ @Test
+ public void testInvertJexlGenotypeFilterExpression() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ + " --genotypeFilterExpression 'DP >= 8' --genotypeFilterName highDP -V " + privateTestDir + "filteringDepthInFormat.vcf", 1,
+ Arrays.asList("8ddd8f3b5ee351c4ab79cb186b1d45ba")); // Differs from testInvertFilter because FILTER description uses the -genotypeFilterExpression argument
+ executeTest("testInvertJexlGenotypeFilterExpression", spec);
+ }
+
+ @Test
+ public void testSetFilteredGtoNocall() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ + " --genotypeFilterExpression 'DP < 8' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf --setFilteredGtToNocall", 1,
+ Arrays.asList("9ff801dd726eb4fc562b278ccc6854b1"));
+ executeTest("testSetFilteredGtoNocall", spec);
+ }
+
+ @Test
+ public void testSetVcfFilteredGtoNocall() {
+ String testfile = privateTestDir + "filteredSamples.vcf";
+
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T SelectVariants --setFilteredGtToNocall -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
+ 1,
+ Arrays.asList("81b99386a64a8f2b857a7ef2bca5856e")
+ );
+
+ spec.disableShadowBCF();
+ executeTest("testSetVcfFilteredGtoNocall--" + testfile, spec);
+ }
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java
index e19cfca29..778b38093 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java
@@ -129,7 +129,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
eventLength = 5;
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
- Assert.assertEquals(alleles.size(),0);
+ Assert.assertEquals(alleles.size(),2);
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
index 42c8c6285..6f5634b56 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
@@ -224,6 +224,18 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
executeTest("testGVCFIndexNoThrow", spec);
}
+ /**
+ * Test HaplotypeCaller to ensure it does not throw an exception when a .g.vcf.gz output file is specified and the indexing arguments are omitted
+ */
+ @Test()
+ public void testGVCFGzIndexNoThrow() {
+ final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF",
+ HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000000-17000100");
+ final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(GATKVCFUtils.GVCF_GZ_EXT), Arrays.asList(""));
+ spec.disableShadowBCF();
+ executeTest("testGVCFIndexNoThrow", spec);
+ }
+
@Test()
public void testWrongParameterGVCFIndexException() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF -variant_index_type %s -variant_index_parameter %d",
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtilsUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtilsUnitTest.java
index 26a4a92f7..740f2759c 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtilsUnitTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtilsUnitTest.java
@@ -81,6 +81,7 @@ public class PhasingUtilsUnitTest extends BaseTest {
private ReferenceSequenceFile referenceFile;
private Genotype genotype1;
private Genotype genotype2;
+ private Genotype genotype2unphased;
private String contig;
private List alleleList1;
private List alleleList2;
@@ -93,8 +94,9 @@ public class PhasingUtilsUnitTest extends BaseTest {
genomeLocParser = new GenomeLocParser(referenceFile);
alleleList1 = Arrays.asList(Allele.create("T", true), Allele.create("C", false));
alleleList2 = Arrays.asList(Allele.create("G", true), Allele.create("A", false));
- genotype1 = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-1", "10-2"}).attribute("PQ", 100.0).alleles(alleleList1).make();
- genotype2 = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-2", "10-1"}).attribute("PQ", 200.0).alleles(alleleList2).make();
+ genotype1 = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-1", "10-2"}).attribute("PQ", 100.0).alleles(alleleList1).phased(true).make();
+ genotype2 = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-2", "10-1"}).attribute("PQ", 200.0).alleles(alleleList2).phased(true).make();
+ genotype2unphased = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-2", "10-1"}).attribute("PQ", 200.0).alleles(alleleList2).phased(false).make();
contig = new String("1");
vc1 = new VariantContextBuilder().chr(contig).id("id1").source("TC").start(start).stop(start).alleles(alleleList1).genotypes(genotype1).make();
vc2 = new VariantContextBuilder().chr(contig).id("id2").source("GA").start(start+1).stop(start+1).alleles(alleleList2).genotypes(genotype2).make();
@@ -184,6 +186,7 @@ public class PhasingUtilsUnitTest extends BaseTest {
@Test
public void TestAlleleSegregationIsKnown(){
Assert.assertTrue(PhasingUtils.alleleSegregationIsKnown(genotype1, genotype2));
+ Assert.assertFalse(PhasingUtils.alleleSegregationIsKnown(genotype1, genotype2unphased));
}
@Test
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java
index 88a0ee7fe..8933314c8 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java
@@ -138,7 +138,21 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
+ " -L chr20:332341-802503",
1,
Arrays.asList("ac41d1aa9c9a67c07d894f485c29c574"));
- executeTest("Use trio-phased VCF, adding read-backed phasing infomration in HP tag (as is now standard for RBP) [TEST SEVEN]", spec);
+ executeTest("Use trio-phased VCF, adding read-backed phasing information in HP tag (as is now standard for RBP) [TEST SEVEN]", spec);
}
+ @Test
+ public void testDoNotMergeUnphasedSNPs() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T ReadBackedPhasing" +
+ " -R " + hg19Reference +
+ " -I " + privateTestDir + "S17C1-8.KRAS.bam" +
+ " --variant " + privateTestDir + "S17C1-8_bwa_mutect_filtered.vcf" +
+ " --phaseQualityThresh 20.0 -enableMergeToMNP -maxDistMNP 3 -L 12:25398281-25398284" +
+ " -o %s" +
+ " --no_cmdline_in_header",
+ 1,
+ Arrays.asList("59ee67d657ee955477bca94d07014ac3"));
+ executeTest("Do not merge unphased SNPs", spec);
+ }
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
index 3d5463d0f..473ab8cd4 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
@@ -94,14 +94,14 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf",
"41e2d951a17de433fe378bb3d9ec75d4", // tranches
- "04336b2453202f286da05b69e57f66ed", // recal file
- "d29fd0bdc1c8c3a171e10d29f7ffeaec"); // cut VCF
+ "19c77724f08d90896914d3d348807399", // recal file
+ "c6a186a1a9271f5de35f1e5aeb8749a6"); // cut VCF
VRTest lowPassPlusExomes = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf",
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf",
"ce4bfc6619147fe7ce1f8331bbeb86ce", // tranches
- "1b33c10be7d8bf8e9accd11113835262", // recal file
- "4700d52a06f2ef3a5882719b86911e51"); // cut VCF
+ "b7cad6a0bbbf0330e0ac712a80c3144f", // recal file
+ "bee399765991636461599565c9634bcf"); // cut VCF
@DataProvider(name = "VRTest")
public Object[][] createData1() {
@@ -196,8 +196,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf",
"3ad7f55fb3b072f373cbce0b32b66df4", // tranches
- "e747c08131d58d9a4800720f6ca80e0c", // recal file
- "e5808af3af0f2611ba5a3d172ab2557b"); // cut VCF
+ "e91a5b25ea1eefdcff488e0326028b51", // recal file
+ "e6a0c5173d8c8fbd08afdc5e5e7d3a78"); // cut VCF
@DataProvider(name = "VRBCFTest")
public Object[][] createVRBCFTest() {
@@ -251,14 +251,14 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest indelUnfiltered = new VRTest(
validationDataLocation + "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as .
"9a331328370889168a7aa3a625f73620", // tranches
- "2cbbd146d68c40200b782e0226f71976", // recal file
- "64dd98a5ab80cf5fd9a36eb66b38268e"); // cut VCF
+ "689c7853fe2e63216da3b0d47e27740e", // recal file
+ "4147373ec8e0aba7ace3658677007990"); // cut VCF
VRTest indelFiltered = new VRTest(
validationDataLocation + "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS
"9a331328370889168a7aa3a625f73620", // tranches
- "2cbbd146d68c40200b782e0226f71976", // recal file
- "c0ec662001e829f5779a9d13b1d77d80"); // cut VCF
+ "689c7853fe2e63216da3b0d47e27740e", // recal file
+ "8dd8ea31e419f68d80422b34b14e24e4"); // cut VCF
@DataProvider(name = "VRIndelTest")
public Object[][] createTestVariantRecalibratorIndel() {
@@ -316,7 +316,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" -o %s" +
" -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
" -recalFile " + privateTestDir + "VQSR.mixedTest.recal",
- Arrays.asList("03a0ed00af6aac76d39e569f90594a02"));
+ Arrays.asList("cd42484985179c7f549e652f0f6a94d0"));
final List outputFiles = executeTest("testApplyRecalibrationSnpAndIndelTogether", spec).getFirst();
setPDFsForDeletion(outputFiles);
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java
index c9fe0eda2..8ac40aa74 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java
@@ -100,7 +100,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" +
" -L " + privateTestDir + "tetraploid-gvcfs.intervals",
1,
- Arrays.asList("ebe26077809961f53d5244643d24fd45"));
+ Arrays.asList("7b3153135e4f8e1d137d3f4beb46f182"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@@ -112,7 +112,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" +
" -L " + privateTestDir + "tetraploid-gvcfs.intervals",
1,
- Arrays.asList("2d36a5f996cad47e5d05fcd78f6e572e"));
+ Arrays.asList("4f546634213ece6f08ec9258620b92bb"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@@ -190,7 +190,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test
public void testMD5s() throws Exception {
final String cmd = baseTestString(" -L 1:69485-69791");
- final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("83ea9f4a9aadb1218c21c9d3780e8009"));
+ final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("b7c753452ab0c05f9cee538e420b87fa"));
spec.disableShadowBCF();
executeTest("testMD5s", spec);
}
@@ -198,7 +198,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test
public void testBasepairResolutionOutput() throws Exception {
final String cmd = baseTestString(" -L 1:69485-69791 --convertToBasePairResolution");
- final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("f153cb6e986efc9b50f0b8833fe5d3da"));
+ final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("bb6420ead95da4c72e76ca4bf5860ef0"));
spec.disableShadowBCF();
executeTest("testBasepairResolutionOutput", spec);
}
@@ -206,7 +206,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test
public void testBreakBlocks() throws Exception {
final String cmd = baseTestString(" -L 1:69485-69791 --breakBandsAtMultiplesOf 5");
- final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("6626ff272e7e76fba091f5bde4a1f963"));
+ final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("dd31182124c4b78a8a03edb1e0cf618b"));
spec.disableShadowBCF();
executeTest("testBreakBlocks", spec);
}
@@ -217,16 +217,49 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
"-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
" -V " + privateTestDir + "spanningDel.1.g.vcf -V " + privateTestDir + "spanningDel.2.g.vcf",
1,
- Arrays.asList("fba48ce2bf8761366ff2cd0b45d0421f"));
+ Arrays.asList("58984edf9a3a92c9fc97039b97755861"));
spec.disableShadowBCF();
executeTest("testSpanningDeletions", spec);
}
+ @Test
+ public void testMultipleSpanningDeletionsForOneSample() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
+ " -V " + privateTestDir + "spanningDel.many.g.vcf",
+ 1,
+ Arrays.asList("5c88e10211def13ba847c29d0fe9e191"));
+ spec.disableShadowBCF();
+ executeTest("testMultipleSpanningDeletionsForOneSample", spec);
+ }
+
+ @Test
+ public void testMultipleSpanningDeletionsForOneSampleHaploid() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
+ " -V " + privateTestDir + "spanningDel.many.haploid.g.vcf",
+ 1,
+ Arrays.asList("76fc5f6b949ac0b893061828af800bf8"));
+ spec.disableShadowBCF();
+ executeTest("testMultipleSpanningDeletionsForOneSampleHaploid", spec);
+ }
+
+ @Test
+ public void testMultipleSpanningDeletionsForOneSampleTetraploid() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
+ " -V " + privateTestDir + "spanningDel.many.tetraploid.g.vcf",
+ 1,
+ Arrays.asList("0ec79471550ec5e30540f68cb0651b14"));
+ spec.disableShadowBCF();
+ executeTest("testMultipleSpanningDeletionsForOneSampleTetraploid", spec);
+ }
+
@Test
public void testWrongReferenceBaseBugFix() throws Exception {
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input1.vcf"
+ " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input2.vcf") + " -o %s --no_cmdline_in_header");
- final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("331c1a4a6a72ea1617c1697a5d945d56"));
+ final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("c0fdba537399cf28b28771963e2c5174"));
spec.disableShadowBCF();
executeTest("testWrongReferenceBaseBugFix",spec);
@@ -235,7 +268,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test
public void testBasepairResolutionInput() throws Exception {
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V " + privateTestDir + "gvcf.basepairResolution.vcf";
- final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("207e89b5677fbf0ef4d1ff768262cf0c"));
+ final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("6aeb88ca94cb5223f26175da72b985f2"));
spec.disableShadowBCF();
executeTest("testBasepairResolutionInput", spec);
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java
index 293e4b21a..b6d05eefb 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java
@@ -223,4 +223,18 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
executeTest("combiningGVCFsFails", spec);
} catch (Exception e) { } // do nothing
}
+
+ @Test
+ public void combineSymbolicVariants() {
+ // Just checking that this does not fail, hence no output files and MD5
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T CombineVariants --no_cmdline_in_header -o %s "
+ + " -R " + hg19RefereneWithChrPrefixInChromosomeNames
+ + " -V " + privateTestDir + "WES-chr1.DEL.vcf"
+ + " -V " + privateTestDir + "WGS-chr1.DEL.vcf"
+ + " -genotypeMergeOptions UNIQUIFY",
+ 0,
+ Arrays.asList(""));
+ executeTest("combineSymbolicVariants: ", spec);
+ }
}
\ No newline at end of file
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
index d1ac6eca0..73f410786 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
@@ -82,7 +82,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V " + privateTestDir + "testUpdatePGT.vcf", b37KGReference),
1,
- Arrays.asList("4dfea9a9b1a77c4c6b9edc61f9ea8da2"));
+ Arrays.asList("23ff3e22262929138ca1f00fc111cadf"));
executeTest("testUpdatePGT", spec);
}
@@ -91,7 +91,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V " + privateTestDir + "testUpdatePGT.vcf -A StrandAlleleCountsBySample", b37KGReference),
1,
- Arrays.asList("a96b79e7c3689c8d5506083cb6d27390"));
+ Arrays.asList("88fa4a021e4aac9a0e48bd54b2949ece"));
executeTest("testUpdatePGT, adding StrandAlleleCountsBySample annotation", spec);
}
@@ -103,7 +103,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" -L 20:10,000,000-20,000,000", b37KGReference),
1,
- Arrays.asList("bf3c1982ab6ffee410cb6a1fff6e7105"));
+ Arrays.asList("06b4e2589c5b903f7c51ae9968bebe77"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@@ -115,7 +115,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" +
" -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference),
1,
- Arrays.asList("47d454936dc1f17cf4c4f84f02841346"));
+ Arrays.asList("599394c205c1d6641b9bebabbd29e13c"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@@ -127,7 +127,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" +
" -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference),
1,
- Arrays.asList("5d79ea9de8ada8520d01284cf0c9f720"));
+ Arrays.asList("f7d5344a85e6d7fc2437d4253b424cb0"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@@ -139,7 +139,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference),
1,
- Arrays.asList("d69b43cac448f45218e77308fc01e9e6"));
+ Arrays.asList("c9e4d1e52ee1f3a5233f1fb100f24d5e"));
executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec);
}
@@ -152,7 +152,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" -L 20:10,000,000-20,000,000", b37KGReference),
1,
- Arrays.asList("7c93d82758bfb6e7efec257ef8a46217"));
+ Arrays.asList("aa19980b9a525afed43e98c821114ae5"));
executeTest("combineSingleSamplePipelineGVCFHierarchical", spec);
}
@@ -164,7 +164,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference),
1,
- Arrays.asList("5b60a7a9575ea83407aa61123960a0cc"));
+ Arrays.asList("f23c9d62542a69b5cbf0e9f89fdd235d"));
executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec);
}
@@ -174,7 +174,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:69485-69791 -o %s -R " + b37KGReference +
" -V " + privateTestDir + "gvcfExample1.vcf",
1,
- Arrays.asList("9e59b94c84dd673b8db9d35cae7e0f68"));
+ Arrays.asList("d602d9e5d336798e4ccb52d2b5f91677"));
executeTest("testJustOneSample", spec);
}
@@ -185,14 +185,14 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V " + privateTestDir + "gvcfExample1.vcf" +
" -V " + privateTestDir + "gvcfExample2.vcf",
1,
- Arrays.asList("8407cb9a1ab34e705e5a54a0d4146d84"));
+ Arrays.asList("6c6d6ef90386eb6c6ed649379aac0c13"));
executeTest("testSamplesWithDifferentLs", spec);
}
@Test(enabled = true)
public void testNoPLsException() {
// Test with input files with (1) 0/0 and (2) ./.
- final String md5 = "3e69805dc1c0ada0a050a65b89ecab30";
+ final String md5 = "d04b32cf2fa97d303ff7fdc779a653d4";
WalkerTestSpec spec1 = new WalkerTestSpec(
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.vcf",
@@ -212,7 +212,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseBPResolutionString("-nda"),
1,
- Arrays.asList("5a036de16b7a87626d2b76727376d9df"));
+ Arrays.asList("7132a43d93a9855d03b27b4b0381194c"));
executeTest("testNDA", spec);
}
@@ -221,7 +221,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseBPResolutionString("-maxAltAlleles 1"),
1,
- Arrays.asList("2f3e6879fa27128a8be7b067ded78966"));
+ Arrays.asList("07844593a4e1ff1110ef8c1de42cc290"));
executeTest("testMaxAltAlleles", spec);
}
@@ -230,7 +230,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseBPResolutionString("-stand_call_conf 300 -stand_emit_conf 100"),
1,
- Arrays.asList("2e4a1ad71e8fc127b594077166c0344b"));
+ Arrays.asList("56caad762b26479ba5e2cc99222b9030"));
executeTest("testStandardConf", spec);
}
@@ -274,7 +274,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:combined2 " + privateTestDir + "combine.single.sample.pipeline.combined.vcf" +
" --uniquifySamples", b37KGReference),
1,
- Arrays.asList("9a472c4e101fff4892efb9255c5cd8b3"));
+ Arrays.asList("ba36b36145e038e3cb004adf11bce96c"));
executeTest("testUniquifiedSamples", spec);
}
@@ -446,7 +446,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
}
- private static final String simpleSpanningDeletionsMD5 = "e8616a396d40b4918ad30189856ceb01";
+ private static final String simpleSpanningDeletionsMD5 = "1cf4ea1da40306741ec4b9a5fe1568b9";
@Test(enabled = true)
public void testSpanningDeletionsMD5() {
@@ -476,7 +476,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
"-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
" -V " + privateTestDir + "spanningDel.1.g.vcf -V " + privateTestDir + "spanningDel.2.g.vcf -V " + privateTestDir + "spanningDel.3.g.vcf",
1,
- Arrays.asList("1c418229117bc8f148a69eda9c496309"));
+ Arrays.asList("0aa7ceae6af1dc4fda6732e978ace864"));
spec.disableShadowBCF();
executeTest("testMultipleSpanningDeletionsMD5", spec);
}
@@ -487,8 +487,31 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
"-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
" -V " + privateTestDir + "spanningDel.delOnly.g.vcf",
1,
+ Arrays.asList("02cca337e097b86c5471929036ad4b64"));
+ spec.disableShadowBCF();
+ executeTest("testSpanningDeletionDoesNotGetGenotypedWithNoOtherAlleles", spec);
+ }
+
+ @Test(enabled = true)
+ public void testDeprecatedSpanningDeletionDoesNotGetGenotypedWithNoOtherAlleles() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
+ " -V " + privateTestDir + "spanningDel.depr.delOnly.g.vcf",
+ 1,
Arrays.asList("46169d08f93e5ff57856c7b64717314b"));
spec.disableShadowBCF();
executeTest("testSpanningDeletionDoesNotGetGenotypedWithNoOtherAlleles", spec);
}
+
+ @Test(enabled = true)
+ public void testGenotypingSpanningDeletionOverSpan() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
+ " -V " + privateTestDir + "spanningDel.delOverSpan.1.g.vcf -V " +
+ privateTestDir + "spanningDel.delOverSpan.2.g.vcf",
+ 0,
+ Arrays.asList("")); // we do not care about the md5; we just want to make sure it doesn't blow up with an error
+ spec.disableShadowBCF();
+ executeTest("testGenotypingSpanningDeletionOverSpan", spec);
+ }
}
\ No newline at end of file
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsIntegrationTest.java
index e0ce9405e..dce5ff522 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsIntegrationTest.java
@@ -64,8 +64,10 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
return "-T SelectVariants -R " + b36KGReference + " -L 1 -o %s --no_cmdline_in_header" + args;
}
- private static final String sampleExclusionMD5 = "eea22fbf1e490e59389a663c3d6a6537";
- private static final String invertSelectionMD5 = "831bc0a5a723b0681a910d668ff3757b";
+ private static final String SAMPLE_EXCLUSION_MD5 = "eea22fbf1e490e59389a663c3d6a6537";
+ private static final String INVERT_SELECTION_MD5 = "831bc0a5a723b0681a910d668ff3757b";
+ private static final String MAX_FILTERED_GT_SELECTION_MD5 = "0365de1bbf7c037be00badace0a74d02";
+ private static final String MIN_FILTERED_GT_SELECTION_MD5 = "fcee8c8caa0696a6675961bb12664878";
@Test
public void testDiscordanceNoSampleSpecified() {
@@ -199,7 +201,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_se '[CDH]' --variant " + testfile,
1,
- Arrays.asList(sampleExclusionMD5)
+ Arrays.asList(SAMPLE_EXCLUSION_MD5)
);
spec.disableShadowBCF();
@@ -216,7 +218,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -se '[^CDH]' --variant " + testfile,
1,
- Arrays.asList(sampleExclusionMD5)
+ Arrays.asList(SAMPLE_EXCLUSION_MD5)
);
spec.disableShadowBCF();
@@ -557,7 +559,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
}
/**
- * Test inverting the variant selection criteria
+ * Test inverting the variant selection criteria by the -invertSelect argument
*/
@Test
public void testInvertSelection() {
@@ -565,16 +567,16 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec = new WalkerTestSpec(
- baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 20000' --invert_selection --variant " + testfile),
+ baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 20000' -invertSelect --variant " + testfile),
1,
- Arrays.asList(invertSelectionMD5)
+ Arrays.asList(INVERT_SELECTION_MD5)
);
spec.disableShadowBCF();
executeTest("testInvertSelection--" + testfile, spec);
}
/**
- * Test inverting the variant JEXL selection criteria
+ * Test inverting the variant selection criteria by inverting the JEXL expression logic following -select
*/
@Test
public void testInvertJexlSelection() {
@@ -584,7 +586,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP >= 20000'--variant " + testfile),
1,
- Arrays.asList(invertSelectionMD5)
+ Arrays.asList(INVERT_SELECTION_MD5)
);
spec.disableShadowBCF();
executeTest("testInvertJexlSelection--" + testfile, spec);
@@ -659,10 +661,76 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String pedFile = privateTestDir + "CEUtrio.ped";
WalkerTestSpec spec = new WalkerTestSpec(
- "-T SelectVariants -R "+b37KGReference + " -mv -mvq 0 -inv_mv --variant " + testFile + " -ped " + pedFile + " -o %s --no_cmdline_in_header",
+ "-T SelectVariants -R "+b37KGReference + " -mv -mvq 0 -invMv --variant " + testFile + " -ped " + pedFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("35921fb2dedca0ead83027a66b725794"));
executeTest("testMendelianViolationSelection--" + testFile, spec);
}
+
+ @Test
+ public void testMaxFilteredGenotypesSelection() {
+ String testfile = privateTestDir + "filteredSamples.vcf";
+
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T SelectVariants --maxFilteredGenotypes 1 -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
+ 1,
+ Arrays.asList(MAX_FILTERED_GT_SELECTION_MD5)
+ );
+ spec.disableShadowBCF();
+ executeTest("testMaxFilteredGenotypesSelection--" + testfile, spec);
+ }
+
+ @Test
+ public void testMinFilteredGenotypesSelection() {
+ String testfile = privateTestDir + "filteredSamples.vcf";
+
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T SelectVariants --minFilteredGenotypes 2 -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
+ 1,
+ Arrays.asList(MIN_FILTERED_GT_SELECTION_MD5)
+ );
+ spec.disableShadowBCF();
+ executeTest("testMinFilteredGenotypesSelection--" + testfile, spec);
+ }
+
+ @Test
+ public void testMaxFractionFilteredGenotypesSelection() {
+ String testfile = privateTestDir + "filteredSamples.vcf";
+
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T SelectVariants --maxFractionFilteredGenotypes 0.4 -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
+ 1,
+ Arrays.asList(MAX_FILTERED_GT_SELECTION_MD5)
+ );
+ spec.disableShadowBCF();
+ executeTest("testMaxFractionFilteredGenotypesSelection--" + testfile, spec);
+ }
+
+ @Test
+ public void testMinFractionFilteredGenotypesSelection() {
+ String testfile = privateTestDir + "filteredSamples.vcf";
+
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T SelectVariants --minFractionFilteredGenotypes 0.6 -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
+ 1,
+ Arrays.asList(MIN_FILTERED_GT_SELECTION_MD5)
+ );
+ spec.disableShadowBCF();
+ executeTest("testMinFractionFilteredGenotypesSelection--" + testfile, spec);
+ }
+
+ @Test
+ public void testSetFilteredGtoNocall() {
+ String testfile = privateTestDir + "filteredSamples.vcf";
+
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T SelectVariants --setFilteredGtToNocall -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
+ 1,
+ Arrays.asList("81b99386a64a8f2b857a7ef2bca5856e")
+ );
+
+ spec.disableShadowBCF();
+ executeTest("testSetFilteredGtoNocall--" + testfile, spec);
+ }
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantContextMergerUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantContextMergerUnitTest.java
index 6290fdd96..642733d8f 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantContextMergerUnitTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantContextMergerUnitTest.java
@@ -95,7 +95,7 @@ public class VariantContextMergerUnitTest extends BaseTest {
ATCATCT = Allele.create("ATCATCT");
ATref = Allele.create("AT",true);
Anoref = Allele.create("A",false);
- del = GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE;
+ del = Allele.SPAN_DEL;
GT = Allele.create("GT",false);
genomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(hg18Reference)));
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToVCFIntegrationTest.java
index 5d8c21926..ff9b45b98 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToVCFIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToVCFIntegrationTest.java
@@ -67,23 +67,6 @@ import java.util.ArrayList;
*/
public class VariantsToVCFIntegrationTest extends WalkerTest {
- @Test
- public void testVariantsToVCFUsingDbsnpInput() {
- List md5 = new ArrayList();
- md5.add("72e6ce7aff7dec7ca9e7580be7ddd435");
-
- WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
- "-R " + b36KGReference +
- " --variant:OldDbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" +
- " -T VariantsToVCF" +
- " -L 1:1-30,000,000" +
- " -o %s" +
- " --no_cmdline_in_header",
- 1, // just one output file
- md5);
- executeTest("testVariantsToVCFUsingDbsnpInput", spec).getFirst();
- }
-
@Test
public void testVariantsToVCFUsingGeliInput() {
List md5 = new ArrayList();
diff --git a/protected/pom.xml b/protected/pom.xml
index cae185e15..edfd9e7b3 100644
--- a/protected/pom.xml
+++ b/protected/pom.xml
@@ -5,7 +5,7 @@
org.broadinstitute.gatk
gatk-root
- 3.4
+ 3.5-SNAPSHOT
../public/gatk-root
diff --git a/public/VectorPairHMM/pom.xml b/public/VectorPairHMM/pom.xml
index 05c6a5851..ccbc99eef 100644
--- a/public/VectorPairHMM/pom.xml
+++ b/public/VectorPairHMM/pom.xml
@@ -5,7 +5,7 @@
org.broadinstitute.gatk
gatk-root
- 3.4
+ 3.5-SNAPSHOT
../../public/gatk-root
diff --git a/public/external-example/pom.xml b/public/external-example/pom.xml
index ac47625c3..02eaa9800 100644
--- a/public/external-example/pom.xml
+++ b/public/external-example/pom.xml
@@ -9,7 +9,7 @@
External Example
- 3.4
+ 3.5-SNAPSHOT
- 1.132
- 1.131
+ 1.134
+ 1.133
diff --git a/public/gatk-tools-public/pom.xml b/public/gatk-tools-public/pom.xml
index e5af4d8ff..347839ec7 100644
--- a/public/gatk-tools-public/pom.xml
+++ b/public/gatk-tools-public/pom.xml
@@ -5,7 +5,7 @@
org.broadinstitute.gatk
gatk-aggregator
- 3.4
+ 3.5-SNAPSHOT
../..
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/CoverageUtils.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/CoverageUtils.java
index e71860794..e027464d4 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/CoverageUtils.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/CoverageUtils.java
@@ -137,6 +137,9 @@ public class CoverageUtils {
public static Map getBaseCountsByReadGroup(AlignmentContext context, int minMapQ, int maxMapQ, byte minBaseQ, byte maxBaseQ, CountPileupType countType) {
Map countsByRG = new HashMap();
+ Map countsByRGName = new HashMap();
+ Map RGByName = new HashMap();
+
List countPileup = new LinkedList();
FragmentCollection fpile;
@@ -202,10 +205,20 @@ public class CoverageUtils {
for (PileupElement e : countPileup) {
SAMReadGroupRecord readGroup = getReadGroup(e.getRead());
- if (!countsByRG.keySet().contains(readGroup))
- countsByRG.put(readGroup, new int[6]);
- updateCounts(countsByRG.get(readGroup), e);
+ String readGroupId = readGroup.getSample() + "_" + readGroup.getReadGroupId();
+ int[] counts = countsByRGName.get(readGroupId);
+ if (counts == null) {
+ counts = new int[6];
+ countsByRGName.put(readGroupId, counts);
+ RGByName.put(readGroupId, readGroup);
+ }
+
+ updateCounts(counts, e);
+ }
+
+ for (String readGroupId : RGByName.keySet()) {
+ countsByRG.put(RGByName.get(readGroupId), countsByRGName.get(readGroupId));
}
return countsByRG;
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java
index aa2e96f1a..4c4d6f02d 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java
@@ -179,15 +179,21 @@ public class VariantFiltration extends RodWalker {
/**
* Invert the selection criteria for --filterExpression
*/
- @Argument(fullName="invert_filter_expression", shortName="inv_fil_sel", doc="Invert the selection criteria for --filterExpression", required=false)
+ @Argument(fullName="invertFilterExpression", shortName="invfilter", doc="Invert the selection criteria for --filterExpression", required=false)
protected boolean invertFilterExpression = false;
/**
* Invert the selection criteria for --genotypeFilterExpression
*/
- @Argument(fullName="invert_genotype_filter_expression", shortName="inv_gen_fil_sel", doc="Invert the selection criteria for --genotypeFilterExpression", required=false)
+ @Argument(fullName="invertGenotypeFilterExpression", shortName="invG_filter", doc="Invert the selection criteria for --genotypeFilterExpression", required=false)
protected boolean invertGenotypeFilterExpression = false;
+ /**
+ * If this argument is provided, set filtered genotypes to no-call (./.).
+ */
+ @Argument(fullName="setFilteredGtToNocall", required=false, doc="Set filtered genotypes to no-call")
+ private boolean setFilteredGenotypesToNocall = false;
+
// JEXL expressions for the filters
List filterExps;
List genotypeFilterExps;
@@ -201,8 +207,10 @@ public class VariantFiltration extends RodWalker {
private static final int WINDOW_SIZE = 10; // 10 variants on either end of the current one
private ArrayList windowInitializer = new ArrayList();
+ private final List diploidNoCallAlleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
+
/**
- * Prepend inverse phrase to description if --invert_filter_expression
+ * Prepend inverse phrase to description if --invertFilterExpression
*
* @param description the description
* @return the description with inverse prepended if --invert_filter_expression
@@ -379,7 +387,7 @@ public class VariantFiltration extends RodWalker {
final VariantContextBuilder builder = new VariantContextBuilder(vc);
// make new Genotypes based on filters
- if ( !genotypeFilterExps.isEmpty() ) {
+ if ( !genotypeFilterExps.isEmpty() || setFilteredGenotypesToNocall ) {
GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size());
// for each genotype, check filters then create a new object
@@ -388,12 +396,17 @@ public class VariantFiltration extends RodWalker {
final List filters = new ArrayList();
if ( g.isFiltered() ) filters.add(g.getFilters());
+ // Add if expression filters the variant context
for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) {
if ( Utils.invertLogic(VariantContextUtils.match(vc, g, exp), invertGenotypeFilterExpression) )
filters.add(exp.name);
}
- genotypes.add(new GenotypeBuilder(g).filters(filters).make());
+ // if sample is filtered and --setFilteredGtToNocall, set genotype to non-call
+ if ( !filters.isEmpty() && setFilteredGenotypesToNocall )
+ genotypes.add(new GenotypeBuilder(g).filters(filters).alleles(diploidNoCallAlleles).make());
+ else
+ genotypes.add(new GenotypeBuilder(g).filters(filters).make());
} else {
genotypes.add(g);
}
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariants.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariants.java
index 63971c497..d44fbc84f 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariants.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariants.java
@@ -146,7 +146,7 @@ import java.util.*;
* -o output.vcf \
* -se 'SAMPLE.+PARC' \
* -select "QD > 10.0"
- * --invert_selection
+ * -invertSelect
*
*
* Select a sample and exclude non-variant loci and filtered loci (trim remaining alleles by default):
@@ -207,7 +207,7 @@ import java.util.*;
* -sn mySample
*
*
- * Generating a VCF of all the variants that are mendelian violations. The optional argument `-mvq` restricts the selection to sites that have a QUAL score of 50 or more
+ * Generating a VCF of all the variants that are mendelian violations. The optional argument '-mvq' restricts the selection to sites that have a QUAL score of 50 or more
*
* java -jar GenomeAnalysisTK.jar \
* -T SelectVariants \
@@ -218,14 +218,14 @@ import java.util.*;
* -o violations.vcf
*
*
- * Generating a VCF of all the variants that are not mendelian violations. The optional argument `-mvq` together with '-inv_mv' restricts the selection to sites that have a QUAL score of 50 or less
+ * Generating a VCF of all the variants that are not mendelian violations. The optional argument '-mvq' together with '-invMv' restricts the selection to sites that have a QUAL score of 50 or less
*
* java -jar GenomeAnalysisTK.jar \
* -T SelectVariants \
* -R reference.fasta \
* -V input.vcf \
* -ped family.ped \
- * -mv -mvq 50 -inv_mv \
+ * -mv -mvq 50 -invMv \
* -o violations.vcf
*
*
@@ -275,7 +275,7 @@ import java.util.*;
*
* Select IDs in fileKeep and exclude IDs in fileExclude:
*
- * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ * java -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T SelectVariants \
* --variant input.vcf \
@@ -284,9 +284,35 @@ import java.util.*;
* -excludeIDs fileExclude
*
*
+ * Select sites where there are between 2 and 5 samples and between 10 and 50 percent of the sample genotypes are filtered:
+ *
+ * java -jar GenomeAnalysisTK.jar \
+ * -R ref.fasta \
+ * -T SelectVariants \
+ * --variant input.vcf \
+ * --maxFilteredGenotypes 5
+ * --minFilteredGenotypes 2
+ * --maxFractionFilteredGenotypes 0.60
+ * --minFractionFilteredGenotypes 0.10
+ *
+ *
+ * Set filtered genotypes to no-call (./.):
+ *
+ * java -jar GenomeAnalysisTK.jar \
+ * -R ref.fasta \
+ * -T SelectVariants \
+ * --variant input.vcf \
+ * --setFilteredGenotypesToNocall
+ *
+ *
*/
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
public class SelectVariants extends RodWalker implements TreeReducible {
+ static final int MAX_FILTERED_GENOTYPES_DEFAULT_VALUE = Integer.MAX_VALUE;
+ static final int MIN_FILTERED_GENOTYPES_DEFAULT_VALUE = 0;
+ static final double MAX_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE = 1.0;
+ static final double MIN_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE = 0.0;
+
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
/**
@@ -359,10 +385,10 @@ public class SelectVariants extends RodWalker implements TreeR
public ArrayList selectExpressions = new ArrayList<>();
/**
- * Invert the selection criteria for --select.
+ * Invert the selection criteria for -select.
*/
- @Argument(fullName="invert_selection", shortName="inv_sel", doc="Invert the selection criteria for --select", required=false)
- protected boolean invertSelection = false;
+ @Argument(shortName="invertSelect", doc="Invert the selection criteria for -select", required=false)
+ protected boolean invertSelect = false;
/*
* If this flag is enabled, sites that are found to be non-variant after the subsetting procedure (i.e. where none
@@ -436,8 +462,8 @@ public class SelectVariants extends RodWalker implements TreeR
* determined on the basis of family structure. Requires passing a pedigree file using the engine-level
* `-ped` argument.
*/
- @Argument(fullName="invert_mendelianViolation", shortName="inv_mv", doc="Output non-mendelian violation sites only", required=false)
- private Boolean invert_mendelianViolations = false;
+ @Argument(fullName="invertMendelianViolation", shortName="invMv", doc="Output non-mendelian violation sites only", required=false)
+ private Boolean invertMendelianViolations = false;
/**
* This argument specifies the genotype quality (GQ) threshold that all members of a trio must have in order
@@ -514,6 +540,36 @@ public class SelectVariants extends RodWalker implements TreeR
@Argument(fullName="minIndelSize", required=false, doc="Minimum size of indels to include")
private int minIndelSize = 0;
+ /**
+ * If this argument is provided, select sites where at most a maximum number of samples are filtered at the genotype level.
+ */
+ @Argument(fullName="maxFilteredGenotypes", required=false, doc="Maximum number of samples filtered at the genotype level")
+ private int maxFilteredGenotypes = MAX_FILTERED_GENOTYPES_DEFAULT_VALUE;
+
+ /**
+ * If this argument is provided, select sites where at least a minimum number of samples are filtered at the genotype level.
+ */
+ @Argument(fullName="minFilteredGenotypes", required=false, doc="Minimum number of samples filtered at the genotype level")
+ private int minFilteredGenotypes = MIN_FILTERED_GENOTYPES_DEFAULT_VALUE;
+
+ /**
+ * If this argument is provided, select sites where a fraction or less of the samples are filtered at the genotype level.
+ */
+ @Argument(fullName="maxFractionFilteredGenotypes", required=false, doc="Maximum fraction of samples filtered at the genotype level")
+ private double maxFractionFilteredGenotypes = MAX_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE;
+
+ /**
+ * If this argument is provided, select sites where a fraction or more of the samples are filtered at the genotype level.
+ */
+ @Argument(fullName="minFractionFilteredGenotypes", required=false, doc="Maximum fraction of samples filtered at the genotype level")
+ private double minFractionFilteredGenotypes = MIN_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE;
+
+ /**
+ * If this argument is provided, set filtered genotypes to no-call (./.).
+ */
+ @Argument(fullName="setFilteredGtToNocall", required=false, doc="Set filtered genotypes to no-call")
+ private boolean setFilteredGenotypesToNocall = false;
+
@Hidden
@Argument(fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", required=false, doc="Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.")
private boolean allowNonOverlappingCommandLineSamples = false;
@@ -547,6 +603,8 @@ public class SelectVariants extends RodWalker implements TreeR
private Set IDsToRemove = null;
private Map vcfRods;
+ private final List diploidNoCallAlleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
+
/**
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
*/
@@ -619,7 +677,7 @@ public class SelectVariants extends RodWalker implements TreeR
selectedTypes.add(t);
}
- // remove specifed types
+ // remove specified types
for (VariantContext.Type t : typesToExclude)
selectedTypes.remove(t);
@@ -713,16 +771,16 @@ public class SelectVariants extends RodWalker implements TreeR
for (VariantContext vc : vcs) {
// an option for performance testing only
- if ( fullyDecode )
- vc = vc.fullyDecode(vcfRods.get(vc.getSource()), getToolkit().lenientVCFProcessing() );
+ if (fullyDecode)
+ vc = vc.fullyDecode(vcfRods.get(vc.getSource()), getToolkit().lenientVCFProcessing());
- if ( IDsToKeep != null && !IDsToKeep.contains(vc.getID()) )
+ if (IDsToKeep != null && !IDsToKeep.contains(vc.getID()))
continue;
- if ( IDsToRemove != null && IDsToRemove.contains(vc.getID()) )
+ if (IDsToRemove != null && IDsToRemove.contains(vc.getID()))
continue;
- if ( mendelianViolations && Utils.invertLogic(mv.countViolations(this.getSampleDB().getFamilies(samples), vc) == 0, invert_mendelianViolations) )
+ if (mendelianViolations && Utils.invertLogic(mv.countViolations(this.getSampleDB().getFamilies(samples), vc) == 0, invertMendelianViolations))
break;
if (discordanceOnly) {
@@ -745,20 +803,30 @@ public class SelectVariants extends RodWalker implements TreeR
if (!selectedTypes.contains(vc.getType()))
continue;
- if ( containsIndelLargerOrSmallerThan(vc, maxIndelSize, minIndelSize) )
+ if (containsIndelLargerOrSmallerThan(vc, maxIndelSize, minIndelSize))
continue;
+ if ( needNumFilteredGenotypes()) {
+ int numFilteredSamples = numFilteredGenotypes(vc);
+ double fractionFilteredGenotypes = samples.isEmpty() ? 0.0 : numFilteredSamples / samples.size();
+ if (numFilteredSamples > maxFilteredGenotypes || numFilteredSamples < minFilteredGenotypes ||
+ fractionFilteredGenotypes > maxFractionFilteredGenotypes || fractionFilteredGenotypes < minFractionFilteredGenotypes)
+ continue;
+ }
+
VariantContext sub = subsetRecord(vc, preserveAlleles, removeUnusedAlternates);
+ VariantContext filteredGenotypeToNocall = setFilteredGenotypeToNocall(sub, setFilteredGenotypesToNocall);
+
// Not excluding non-variants or subsetted polymorphic variants AND including filtered loci or subsetted variant is not filtered
- if ( (!XLnonVariants || sub.isPolymorphicInSamples()) && (!XLfiltered || !sub.isFiltered()) ) {
+ if ( (!XLnonVariants || filteredGenotypeToNocall.isPolymorphicInSamples()) && (!XLfiltered || !filteredGenotypeToNocall.isFiltered()) ) {
// Write the subsetted variant if it matches all of the expressions
boolean failedJexlMatch = false;
try {
for (VariantContextUtils.JexlVCMatchExp jexl : jexls) {
- if ( Utils.invertLogic(!VariantContextUtils.match(sub, jexl), invertSelection) ){
+ if ( Utils.invertLogic(!VariantContextUtils.match(filteredGenotypeToNocall, jexl), invertSelect) ){
failedJexlMatch = true;
break;
}
@@ -771,7 +839,7 @@ public class SelectVariants extends RodWalker implements TreeR
if ( !failedJexlMatch &&
!justRead &&
( !selectRandomFraction || Utils.getRandomGenerator().nextDouble() < fractionRandom ) ) {
- vcfWriter.add(sub);
+ vcfWriter.add(filteredGenotypeToNocall);
}
}
}
@@ -800,6 +868,26 @@ public class SelectVariants extends RodWalker implements TreeR
return false;
}
+ /**
+ * Find the number of filtered samples
+ *
+ * @param vc the variant rod VariantContext
+ * @return number of filtered samples
+ */
+ private int numFilteredGenotypes(final VariantContext vc){
+ if (vc == null)
+ return 0;
+
+ int numFiltered = 0;
+ // check if we find it in the variant rod
+ GenotypesContext genotypes = vc.getGenotypes(samples);
+ for (final Genotype g : genotypes)
+ if ( g.isFiltered() && !g.getFilters().isEmpty())
+ numFiltered++;
+
+ return numFiltered;
+ }
+
/**
* Checks if vc has a variant call for (at least one of) the samples.
*
@@ -807,7 +895,7 @@ public class SelectVariants extends RodWalker implements TreeR
* @param compVCs the comparison VariantContext (discordance)
* @return true VariantContexts are discordant, false otherwise
*/
- private boolean isDiscordant (VariantContext vc, Collection compVCs) {
+ private boolean isDiscordant (final VariantContext vc, final Collection compVCs) {
if (vc == null)
return false;
@@ -845,7 +933,7 @@ public class SelectVariants extends RodWalker implements TreeR
* @param compVCs the comparison VariantContext
* @return true if VariantContexts are concordant, false otherwise
*/
- private boolean isConcordant (VariantContext vc, Collection compVCs) {
+ private boolean isConcordant (final VariantContext vc, final Collection compVCs) {
if (vc == null || compVCs == null || compVCs.isEmpty())
return false;
@@ -876,7 +964,7 @@ public class SelectVariants extends RodWalker implements TreeR
return true;
}
- private boolean sampleHasVariant(Genotype g) {
+ private boolean sampleHasVariant(final Genotype g) {
return (g !=null && !g.isHomRef() && (g.isCalled() || (g.isFiltered() && !XLfiltered)));
}
@@ -945,8 +1033,7 @@ public class SelectVariants extends RodWalker implements TreeR
for ( Genotype genotype : newGC ) {
//Set genotype to no call if it falls in the fraction.
if(fractionGenotypes>0 && randomGenotypes.nextDouble() alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
- genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).noGQ().make());
+ genotypes.add(new GenotypeBuilder(genotype).alleles(diploidNoCallAlleles).noGQ().make());
}
else{
genotypes.add(genotype);
@@ -966,6 +1053,30 @@ public class SelectVariants extends RodWalker implements TreeR
return trimmed;
}
+ /**
+ * If --setFilteredGtToNocall, set filtered genotypes to no-call
+ *
+ * @param vc the VariantContext record to set filtered genotypes to no-call
+ * @param filteredGenotypesToNocall set filtered genotypes to non-call?
+ * @return the VariantContext with no-call genotypes if the sample was filtered
+ */
+ private VariantContext setFilteredGenotypeToNocall(final VariantContext vc, final boolean filteredGenotypesToNocall) {
+
+ if ( !filteredGenotypesToNocall )
+ return vc;
+
+ final VariantContextBuilder builder = new VariantContextBuilder(vc);
+ final GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size());
+
+ for ( final Genotype g : vc.getGenotypes() ) {
+ if ( g.isCalled() && g.isFiltered() )
+ genotypes.add(new GenotypeBuilder(g).alleles(diploidNoCallAlleles).make());
+ else
+ genotypes.add(g);
+ }
+
+ return builder.genotypes(genotypes).make();
+ }
/*
* Add annotations to the new VC
*
@@ -1061,4 +1172,16 @@ public class SelectVariants extends RodWalker implements TreeR
}
return result;
}
-}
+
+ /**
+ * Need the number of filtered genotypes samples?
+ *
+ * @return true if any of the filtered genotype samples arguments is used (not the default value), false otherwise
+ */
+ private boolean needNumFilteredGenotypes(){
+ return maxFilteredGenotypes != MAX_FILTERED_GENOTYPES_DEFAULT_VALUE ||
+ minFilteredGenotypes != MIN_FILTERED_GENOTYPES_DEFAULT_VALUE ||
+ maxFractionFilteredGenotypes != MAX_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE ||
+ minFractionFilteredGenotypes != MIN_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE;
+ }
+}
\ No newline at end of file
diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/CatVariantsIntegrationTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/CatVariantsIntegrationTest.java
index 18a2d6c12..69ae7a472 100644
--- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/CatVariantsIntegrationTest.java
+++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/CatVariantsIntegrationTest.java
@@ -246,4 +246,20 @@ public class CatVariantsIntegrationTest {
ProcessSettings ps = new ProcessSettings(Utils.escapeExpressions(cmdLine));
pc.execAndCheck(ps);
}
+
+ @Test()
+ public void testCatVariantsGVCFGzIndexCreation() throws IOException{
+
+ String cmdLine = String.format("java -cp \"%s\" %s -R %s -V %s -V %s -out %s",
+ StringUtils.join(RuntimeUtils.getAbsoluteClassPaths(), File.pathSeparatorChar),
+ CatVariants.class.getCanonicalName(),
+ BaseTest.b37KGReference,
+ CatVariantsVcf1,
+ CatVariantsVcf2,
+ BaseTest.createTempFile("CatVariantsGVCFIndexCreationTest", "." + GATKVCFUtils.GVCF_GZ_EXT));
+
+ ProcessController pc = ProcessController.getThreadLocal();
+ ProcessSettings ps = new ProcessSettings(Utils.escapeExpressions(cmdLine));
+ pc.execAndCheck(ps);
+ }
}
\ No newline at end of file
diff --git a/public/gatk-utils/pom.xml b/public/gatk-utils/pom.xml
index fce54e9cc..25241183c 100644
--- a/public/gatk-utils/pom.xml
+++ b/public/gatk-utils/pom.xml
@@ -5,7 +5,7 @@
org.broadinstitute.gatk
gatk-aggregator
- 3.4
+ 3.5-SNAPSHOT
../..
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/SequenceDictionaryUtils.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/SequenceDictionaryUtils.java
index 22c7127c2..d0eb0b9f8 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/SequenceDictionaryUtils.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/SequenceDictionaryUtils.java
@@ -165,7 +165,12 @@ public class SequenceDictionaryUtils {
}
case OUT_OF_ORDER: {
- UserException ex = new UserException.IncompatibleSequenceDictionaries("Relative ordering of overlapping contigs differs, which is unsafe", name1, dict1, name2, dict2);
+ UserException ex = new UserException.IncompatibleSequenceDictionaries(
+ "The contig order in " + name1 + " and " + name2 + "is not "
+ + "the same; to fix this please see: "
+ + "(https://www.broadinstitute.org/gatk/guide/article?id=1328), "
+ + " which describes reordering contigs in BAM and VCF files.",
+ name1, dict1, name2, dict2);
if ( allowNonFatalIncompabilities(validationExclusion) )
logger.warn(ex.getMessage());
else
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/VariantContextAdaptors.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/VariantContextAdaptors.java
index d2c11407a..f21975cce 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/VariantContextAdaptors.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/VariantContextAdaptors.java
@@ -28,7 +28,6 @@ package org.broadinstitute.gatk.utils.refdata;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.tribble.Feature;
import htsjdk.tribble.annotation.Strand;
-import htsjdk.tribble.dbsnp.OldDbSNPFeature;
import htsjdk.tribble.gelitext.GeliTextFeature;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.GenomeLoc;
@@ -111,139 +110,6 @@ public class VariantContextAdaptors {
}
}
- // --------------------------------------------------------------------------------------------------------------
- //
- // dbSNP to VariantContext
- //
- // --------------------------------------------------------------------------------------------------------------
-
- private static class DBSnpAdaptor implements VCAdaptor {
- private static boolean isSNP(OldDbSNPFeature feature) {
- return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact");
- }
-
- private static boolean isMNP(OldDbSNPFeature feature) {
- return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range");
- }
-
- private static boolean isInsertion(OldDbSNPFeature feature) {
- return feature.getVariantType().contains("insertion");
- }
-
- private static boolean isDeletion(OldDbSNPFeature feature) {
- return feature.getVariantType().contains("deletion");
- }
-
- private static boolean isIndel(OldDbSNPFeature feature) {
- return isInsertion(feature) || isDeletion(feature) || isComplexIndel(feature);
- }
-
- public static boolean isComplexIndel(OldDbSNPFeature feature) {
- return feature.getVariantType().contains("in-del");
- }
-
- /**
- * gets the alternate alleles. This method should return all the alleles present at the location,
- * NOT including the reference base. This is returned as a string list with no guarantee ordering
- * of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
- * frequency).
- *
- * @return an alternate allele list
- */
- public static List getAlternateAlleleList(OldDbSNPFeature feature) {
- List ret = new ArrayList();
- for (String allele : getAlleleList(feature))
- if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele);
- return ret;
- }
-
- /**
- * gets the alleles. This method should return all the alleles present at the location,
- * including the reference base. The first allele should always be the reference allele, followed
- * by an unordered list of alternate alleles.
- *
- * @return an alternate allele list
- */
- public static List getAlleleList(OldDbSNPFeature feature) {
- List alleleList = new ArrayList();
- // add ref first
- if ( feature.getStrand() == Strand.POSITIVE )
- alleleList = Arrays.asList(feature.getObserved());
- else
- for (String str : feature.getObserved())
- alleleList.add(SequenceUtil.reverseComplement(str));
- if ( alleleList.size() > 0 && alleleList.contains(feature.getNCBIRefBase())
- && !alleleList.get(0).equals(feature.getNCBIRefBase()) )
- Collections.swap(alleleList, alleleList.indexOf(feature.getNCBIRefBase()), 0);
-
- return alleleList;
- }
-
- /**
- * Converts non-VCF formatted dbSNP records to VariantContext.
- * @return OldDbSNPFeature.
- */
- @Override
- public Class extends Feature> getAdaptableFeatureType() { return OldDbSNPFeature.class; }
-
- @Override
- public VariantContext convert(String name, Object input, ReferenceContext ref) {
- OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
-
- int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
- if ( index < 0 )
- return null; // we weren't given enough reference context to create the VariantContext
-
- final byte refBaseForIndel = ref.getBases()[index];
- final boolean refBaseIsDash = dbsnp.getNCBIRefBase().equals("-");
-
- boolean addPaddingBase;
- if ( isSNP(dbsnp) || isMNP(dbsnp) )
- addPaddingBase = false;
- else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
- addPaddingBase = refBaseIsDash || GATKVariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
- else
- return null; // can't handle anything else
-
- Allele refAllele;
- if ( refBaseIsDash )
- refAllele = Allele.create(refBaseForIndel, true);
- else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
- return null;
- else
- refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true);
-
- final List alleles = new ArrayList();
- alleles.add(refAllele);
-
- // add all of the alt alleles
- for ( String alt : getAlternateAlleleList(dbsnp) ) {
- if ( Allele.wouldBeNullAllele(alt.getBytes()))
- alt = "";
- else if ( ! Allele.acceptableAlleleBases(alt) )
- return null;
-
- alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false));
- }
-
- final VariantContextBuilder builder = new VariantContextBuilder();
- builder.source(name).id(dbsnp.getRsID());
- builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0));
- builder.alleles(alleles);
- return builder.make();
- }
-
- private static List stripNullDashes(final List alleles) {
- final List newAlleles = new ArrayList(alleles.size());
- for ( final String allele : alleles ) {
- if ( allele.equals("-") )
- newAlleles.add("");
- else
- newAlleles.add(allele);
- }
- return newAlleles;
- }
- }
// --------------------------------------------------------------------------------------------------------------
//
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFConstants.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFConstants.java
index 95cf3e593..a7e9bf0de 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFConstants.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFConstants.java
@@ -51,10 +51,14 @@ public final class GATKVCFConstants {
public static final String SPANNING_DELETIONS_KEY = "Dels";
public static final String ORIGINAL_DP_KEY = "DP_Orig"; //SelectVariants
public static final String DOWNSAMPLED_KEY = "DS";
+ public static final String EVENT_COUNT_IN_HAPLOTYPE_KEY = "ECNT"; //M2
+ public static final String EVENT_DISTANCE_MAX_KEY = "MAX_ED"; //M2
+ public static final String EVENT_DISTANCE_MIN_KEY = "MIN_ED"; //M2
public static final String FISHER_STRAND_KEY = "FS";
public static final String GC_CONTENT_KEY = "GC";
public static final String GQ_MEAN_KEY = "GQ_MEAN";
public static final String GQ_STDEV_KEY = "GQ_STDDEV";
+ public static final String HAPLOTYPE_COUNT_KEY = "HCNT"; //M2
public static final String HAPLOTYPE_SCORE_KEY = "HaplotypeScore";
public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo";
public static final String HOMOPOLYMER_RUN_KEY = "HRun";
@@ -80,8 +84,10 @@ public final class GATKVCFConstants {
public static final String ORIGINAL_CONTIG_KEY = "OriginalChr"; //LiftoverVariants
public static final String ORIGINAL_START_KEY = "OriginalStart"; //LiftoverVariants
public static final String N_BASE_COUNT_KEY = "PercentNBase";
+ public static final String NORMAL_LOD_KEY = "NLOD"; //M2
public static final String RBP_INCONSISTENT_KEY = "PhasingInconsistent"; //ReadBackedPhasing
public static final String GENOTYPE_PRIOR_KEY = "PG";
+ public static final String PANEL_OF_NORMALS_COUNT_KEY = "PON"; //M2
public static final String POSITIVE_LABEL_KEY = "POSITIVE_TRAIN_SITE";
public static final String QUAL_BY_DEPTH_KEY = "QD";
public static final String BEAGLE_R2_KEY = "R2"; //BeagleOutputToVCF
@@ -93,11 +99,18 @@ public final class GATKVCFConstants {
public static final String STRAND_ODDS_RATIO_KEY = "SOR";
public static final String STR_PRESENT_KEY = "STR";
public static final String TRANSMISSION_DISEQUILIBRIUM_KEY = "TDT";
+ public static final String TUMOR_LOD_KEY = "TLOD"; //M2
public static final String VARIANT_TYPE_KEY = "VariantType";
public static final String VQS_LOD_KEY = "VQSLOD";
+ public static final String OXOG_ALT_F1R2_KEY = "ALT_F1R2";
+ public static final String OXOG_ALT_F2R1_KEY = "ALT_F2R1";
+ public static final String OXOG_REF_F1R2_KEY = "REF_F1R2";
+ public static final String OXOG_REF_F2R1_KEY = "REF_F2R1";
+ public static final String OXOG_FRACTION_KEY = "FOXOG";
//FORMAT keys
public static final String ALLELE_BALANCE_KEY = "AB";
+ public static final String ALLELE_FRACTION_KEY = "AF"; //M2
public static final String PL_FOR_ALL_SNP_ALLELES_KEY = "APL";
public static final String RBP_HAPLOTYPE_KEY = "HP"; //ReadBackedPhasing
public static final String AVG_INTERVAL_DP_BY_SAMPLE_KEY = "IDP"; //DiagnoseTargets
@@ -110,6 +123,7 @@ public final class GATKVCFConstants {
public static final String HAPLOTYPE_CALLER_PHASING_GT_KEY = "PGT";
public static final String HAPLOTYPE_CALLER_PHASING_ID_KEY = "PID";
public static final String PHRED_SCALED_POSTERIORS_KEY = "PP"; //FamilyLikelihoodsUtils / PosteriorLikelihoodsUtils
+ public static final String QUALITY_SCORE_SUM_KEY = "QSS"; //M2
public static final String REFERENCE_GENOTYPE_QUALITY = "RGQ";
public static final String STRAND_COUNT_BY_SAMPLE_KEY = "SAC";
public static final String STRAND_BIAS_BY_SAMPLE_KEY = "SB";
@@ -120,13 +134,21 @@ public final class GATKVCFConstants {
/* Note that many filters used throughout GATK (most notably in VariantRecalibration) are dynamic,
their names (or descriptions) depend on some threshold. Those filters are not included here
*/
- public static final String BEAGLE_MONO_FILTER_NAME = "BGL_SET_TO_MONOMORPHIC";
- public static final String LOW_QUAL_FILTER_NAME = "LowQual";
+ public static final String ALT_ALLELE_IN_NORMAL_FILTER_NAME = "alt_allele_in_normal"; //M2
+ public static final String BEAGLE_MONO_FILTER_NAME = "BGL_SET_TO_MONOMORPHIC";
+ public static final String CLUSTERED_EVENTS_FILTER_NAME = "clustered_events"; //M2
+ public static final String GERMLINE_RISK_FILTER_NAME = "germline_risk"; //M2
+ public static final String HOMOLOGOUS_MAPPING_EVENT_FILTER_NAME = "homologous_mapping_event"; //M2
+ public static final String LOW_QUAL_FILTER_NAME = "LowQual";
+ public static final String MULTI_EVENT_ALT_ALLELE_IN_NORMAL_FILTER_NAME = "multi_event_alt_allele_in_normal"; //M2
+ public static final String PON_FILTER_NAME = "panel_of_normals"; //M2
+ public static final String STR_CONTRACTION_FILTER_NAME = "str_contraction"; //M2
+ public static final String TUMOR_LOD_FILTER_NAME = "t_lod_fstar"; //M2
// Symbolic alleles
public final static String SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG = "ALT";
public final static String NON_REF_SYMBOLIC_ALLELE_NAME = "NON_REF";
public final static Allele NON_REF_SYMBOLIC_ALLELE = Allele.create("<"+NON_REF_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible non-ref allele at this site
- public final static String SPANNING_DELETION_SYMBOLIC_ALLELE_NAME = "*:DEL";
- public final static Allele SPANNING_DELETION_SYMBOLIC_ALLELE = Allele.create("<"+SPANNING_DELETION_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible spanning deletion allele at this site
+ public final static String SPANNING_DELETION_SYMBOLIC_ALLELE_NAME_DEPRECATED = "*:DEL";
+ public final static Allele SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED = Allele.create("<"+SPANNING_DELETION_SYMBOLIC_ALLELE_NAME_DEPRECATED+">", false); // represents any possible spanning deletion allele at this site
}
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFHeaderLines.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFHeaderLines.java
index 89b9510d2..fff7ea5f1 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFHeaderLines.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFHeaderLines.java
@@ -25,16 +25,11 @@
package org.broadinstitute.gatk.utils.variant;
-import htsjdk.variant.vcf.VCFFilterHeaderLine;
-import htsjdk.variant.vcf.VCFFormatHeaderLine;
-import htsjdk.variant.vcf.VCFHeaderLineCount;
-import htsjdk.variant.vcf.VCFHeaderLineType;
-import htsjdk.variant.vcf.VCFInfoHeaderLine;
+import htsjdk.variant.vcf.*;
import static org.broadinstitute.gatk.utils.variant.GATKVCFConstants.*;
-import java.util.HashMap;
-import java.util.Map;
+import java.util.*;
/**
* This class contains the VCFHeaderLine definitions for the annotation keys in GATKVCFConstants.
@@ -66,6 +61,16 @@ public class GATKVCFHeaderLines {
addFilterLine(new VCFFilterHeaderLine(LOW_QUAL_FILTER_NAME, "Low quality"));
addFilterLine(new VCFFilterHeaderLine(BEAGLE_MONO_FILTER_NAME, "This site was set to monomorphic by Beagle"));
+ // M2-related filters
+ addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.ALT_ALLELE_IN_NORMAL_FILTER_NAME, "Evidence seen in the normal sample"));
+ addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.CLUSTERED_EVENTS_FILTER_NAME, "Clustered events observed in the tumor"));
+ addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.GERMLINE_RISK_FILTER_NAME, "Evidence indicates this site is germline, not somatic"));
+ addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.HOMOLOGOUS_MAPPING_EVENT_FILTER_NAME, "More than three events were observed in the tumor"));
+ addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.MULTI_EVENT_ALT_ALLELE_IN_NORMAL_FILTER_NAME, "Multiple events observed in tumor and normal"));
+ addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.PON_FILTER_NAME, "Seen in at least 2 samples in the panel of normals"));
+ addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.TUMOR_LOD_FILTER_NAME, "Tumor does not meet likelihood threshold"));
+ addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.STR_CONTRACTION_FILTER_NAME, "Site filtered due to contraction of short tandem repeat region"));
+
addFormatLine(new VCFFormatHeaderLine(ALLELE_BALANCE_KEY, 1, VCFHeaderLineType.Float, "Allele balance for each het genotype"));
addFormatLine(new VCFFormatHeaderLine(MAPPING_QUALITY_ZERO_BY_SAMPLE_KEY, 1, VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample"));
addFormatLine(new VCFFormatHeaderLine(MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
@@ -89,6 +94,15 @@ public class GATKVCFHeaderLines {
addFormatLine(new VCFFormatHeaderLine(JOINT_POSTERIOR_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred-scaled joint posterior probability of the genotype combination (after applying family priors)"));
addFormatLine(new VCFFormatHeaderLine(ORIGINAL_GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Original Genotype input to Beagle"));
+ // M2-related info lines
+ addFormatLine(new VCFFormatHeaderLine(GATKVCFConstants.ALLELE_FRACTION_KEY, 1, VCFHeaderLineType.Float, "Allele fraction of the event in the tumor"));
+ addFormatLine(new VCFFormatHeaderLine(GATKVCFConstants.OXOG_ALT_F1R2_KEY, 1, VCFHeaderLineType.Integer, "Count of reads in F1R2 pair orientation supporting the alternate allele"));
+ addFormatLine(new VCFFormatHeaderLine(GATKVCFConstants.OXOG_ALT_F2R1_KEY, 1, VCFHeaderLineType.Integer, "Count of reads in F2R1 pair orientation supporting the alternate allele"));
+ addFormatLine(new VCFFormatHeaderLine(GATKVCFConstants.OXOG_REF_F1R2_KEY, 1, VCFHeaderLineType.Integer, "Count of reads in F1R2 pair orientation supporting the reference allele"));
+ addFormatLine(new VCFFormatHeaderLine(GATKVCFConstants.OXOG_REF_F2R1_KEY, 1, VCFHeaderLineType.Integer, "Count of reads in F2R1 pair orientation supporting the reference allele"));
+ addFormatLine(new VCFFormatHeaderLine(GATKVCFConstants.OXOG_FRACTION_KEY, 1, VCFHeaderLineType.Float, "Fraction of alt reads indicating OxoG error"));
+
+
addInfoLine(new VCFInfoHeaderLine(MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed"));
addInfoLine(new VCFInfoHeaderLine(MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed"));
addInfoLine(new VCFInfoHeaderLine(DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
@@ -132,7 +146,7 @@ public class GATKVCFHeaderLines {
addInfoLine(new VCFInfoHeaderLine(ORIGINAL_DP_KEY, 1, VCFHeaderLineType.Integer, "Original DP"));
addInfoLine(new VCFInfoHeaderLine(ORIGINAL_CONTIG_KEY, 1, VCFHeaderLineType.String, "Original contig name for the record"));
addInfoLine(new VCFInfoHeaderLine(ORIGINAL_START_KEY, 1, VCFHeaderLineType.Integer, "Original start position for the record"));
- addInfoLine(new VCFInfoHeaderLine(VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "Log odds ratio of being a true variant versus being false under the trained gaussian mixture model"));
+ addInfoLine(new VCFInfoHeaderLine(VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "Log odds of being a true variant versus being false under the trained gaussian mixture model"));
addInfoLine(new VCFInfoHeaderLine(CULPRIT_KEY, 1, VCFHeaderLineType.String, "The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out"));
addInfoLine(new VCFInfoHeaderLine(POSITIVE_LABEL_KEY, 1, VCFHeaderLineType.Flag, "This variant was used to build the positive training set of good variants"));
addInfoLine(new VCFInfoHeaderLine(NEGATIVE_LABEL_KEY, 1, VCFHeaderLineType.Flag, "This variant was used to build the negative training set of bad variants"));
@@ -147,5 +161,15 @@ public class GATKVCFHeaderLines {
addInfoLine(new VCFInfoHeaderLine(BEAGLE_AC_COMP_KEY, 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site"));
addInfoLine(new VCFInfoHeaderLine(BEAGLE_AF_COMP_KEY, 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site"));
addInfoLine(new VCFInfoHeaderLine(BEAGLE_AN_COMP_KEY, 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site"));
+
+ // M2-related info lines
+ addInfoLine(new VCFInfoHeaderLine(GATKVCFConstants.EVENT_COUNT_IN_HAPLOTYPE_KEY, 1, VCFHeaderLineType.String, "Number of events in this haplotype"));
+ addInfoLine(new VCFInfoHeaderLine(GATKVCFConstants.EVENT_DISTANCE_MAX_KEY, 1, VCFHeaderLineType.Integer, "Maximum distance between events in this active region"));
+ addInfoLine(new VCFInfoHeaderLine(GATKVCFConstants.EVENT_DISTANCE_MIN_KEY, 1, VCFHeaderLineType.Integer, "Minimum distance between events in this active region"));
+ addInfoLine(new VCFInfoHeaderLine(GATKVCFConstants.HAPLOTYPE_COUNT_KEY, 1, VCFHeaderLineType.String, "Number of haplotypes that support this variant"));
+ addInfoLine(new VCFInfoHeaderLine(GATKVCFConstants.NORMAL_LOD_KEY, 1, VCFHeaderLineType.String, "Normal LOD score"));
+ addInfoLine(new VCFInfoHeaderLine(GATKVCFConstants.PANEL_OF_NORMALS_COUNT_KEY, 1, VCFHeaderLineType.String, "Count from Panel of Normals"));
+ addInfoLine(new VCFInfoHeaderLine(GATKVCFConstants.TUMOR_LOD_KEY, 1, VCFHeaderLineType.String, "Tumor LOD score"));
+
}
}
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java
index 8f3e5c450..445828f58 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java
@@ -393,7 +393,7 @@ public class GATKVariantContextUtils {
if (repetitionCount[0] == 0 || repetitionCount[1] == 0)
return null;
- if (lengths.size() == 0) {
+ if (lengths.isEmpty()) {
lengths.add(repetitionCount[0]); // add ref allele length only once
}
lengths.add(repetitionCount[1]); // add this alt allele's length
@@ -947,7 +947,7 @@ public class GATKVariantContextUtils {
final String setKey,
final boolean filteredAreUncalled,
final boolean mergeInfoWithMaxAC ) {
- if ( unsortedVCs == null || unsortedVCs.size() == 0 )
+ if ( unsortedVCs == null || unsortedVCs.isEmpty() )
return null;
if (priorityListOfVCs != null && originalNumOfVCs != priorityListOfVCs.size())
@@ -965,7 +965,7 @@ public class GATKVariantContextUtils {
VCs.add(vc);
}
- if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
+ if ( VCs.isEmpty() ) // everything is filtered out and we're filteredAreUncalled
return null;
// establish the baseline info from the first VC
@@ -1289,7 +1289,7 @@ public class GATKVariantContextUtils {
* @param currentAlleles the list of alleles already created
* @return a non-null mapping of original alleles to new (extended) ones
*/
- private static Map createAlleleMapping(final Allele refAllele,
+ protected static Map createAlleleMapping(final Allele refAllele,
final VariantContext oneVC,
final Collection currentAlleles) {
final Allele myRef = oneVC.getReference();
@@ -1305,6 +1305,8 @@ public class GATKVariantContextUtils {
if ( extended.equals(b) )
extended = b;
map.put(a, extended);
+ } else if ( a.isSymbolic() ) {
+ map.put(a, a);
}
}
@@ -1696,7 +1698,7 @@ public class GATKVariantContextUtils {
// otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list
vcList.remove(k);
// avoid having empty lists
- if (vcList.size() == 0)
+ if (vcList.isEmpty())
mappedVCs.remove(type);
if ( !mappedVCs.containsKey(vcType) )
mappedVCs.put(vcType, new ArrayList());
diff --git a/public/gatk-utils/src/main/resources/org/broadinstitute/gatk/utils/pairhmm/libVectorLoglessPairHMM.so b/public/gatk-utils/src/main/resources/org/broadinstitute/gatk/utils/pairhmm/libVectorLoglessPairHMM.so
index 7913a51f8..e734211ba 100644
Binary files a/public/gatk-utils/src/main/resources/org/broadinstitute/gatk/utils/pairhmm/libVectorLoglessPairHMM.so and b/public/gatk-utils/src/main/resources/org/broadinstitute/gatk/utils/pairhmm/libVectorLoglessPairHMM.so differ
diff --git a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java
index 860c54736..410bd848f 100644
--- a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java
+++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java
@@ -46,6 +46,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
Allele ATref;
Allele Anoref;
Allele GT;
+ Allele Symbolic;
private GenomeLocParser genomeLocParser;
@@ -63,6 +64,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
ATref = Allele.create("AT",true);
Anoref = Allele.create("A",false);
GT = Allele.create("GT",false);
+ Symbolic = Allele.create("", false);
genomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(hg18Reference)));
}
@@ -1607,5 +1609,25 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
BaseUtils.fillWithRandomBases(bases, 1, bases.length);
return bases;
}
+
+ @Test
+ public void testCreateAlleleMapping(){
+ final List alleles = Arrays.asList(Aref,Symbolic,T);
+ final VariantContext vc = new VariantContextBuilder().chr("chr1").alleles(alleles).make();
+ Map map = GATKVariantContextUtils.createAlleleMapping(ATref, vc, alleles);
+
+ final List expectedAlleles = Arrays.asList(Allele.create("", false), Allele.create("TT", false));
+ for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ){
+ Assert.assertEquals(map.get(vc.getAlternateAlleles().get(i)), expectedAlleles.get(i));
+ }
+ }
+
+ @Test(expectedExceptions = IllegalStateException.class)
+ public void testCreateAlleleMappingException(){
+ final List alleles = Arrays.asList(Aref, Symbolic, T);
+ final VariantContext vc = new VariantContextBuilder().chr("chr1").alleles(alleles).make();
+ // Throws an exception if the ref allele length <= ref allele length to extend
+ Map map = GATKVariantContextUtils.createAlleleMapping(Aref, vc, alleles);
+ }
}
diff --git a/public/gsalib/pom.xml b/public/gsalib/pom.xml
index 00a84c6a5..5546ac79c 100644
--- a/public/gsalib/pom.xml
+++ b/public/gsalib/pom.xml
@@ -5,7 +5,7 @@
org.broadinstitute.gatk
gatk-aggregator
- 3.4
+ 3.5-SNAPSHOT
../..
diff --git a/public/package-tests/pom.xml b/public/package-tests/pom.xml
index 3683ccb67..2f2e74940 100644
--- a/public/package-tests/pom.xml
+++ b/public/package-tests/pom.xml
@@ -9,7 +9,7 @@
org.broadinstitute.gatk
gatk-root
- 3.4
+ 3.5-SNAPSHOT
../gatk-root
diff --git a/public/pom.xml b/public/pom.xml
index f4cea4f85..a166255a1 100644
--- a/public/pom.xml
+++ b/public/pom.xml
@@ -5,7 +5,7 @@
org.broadinstitute.gatk
gatk-root
- 3.4
+ 3.5-SNAPSHOT
gatk-root