From 0f30df0356436613a95cb7c8dc81031c3c9c2f6a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 14 Feb 2014 15:02:00 -0500 Subject: [PATCH] Stopgap procedure to rescue Fisher Strand for cases where there's lots of data. This commit consists of 2 main changes: 1. When the strand table gets too large, we normalize it down to values that are more reasonable. 2. We don't include a particular sample's contribution unless the total ref and alt counts are at least 2 each; this is a heuristic method for dealing only with hets. MD5s change as expected. Hopefully we'll have a more robust implementation for GATK 3.1. --- .../gatk/walkers/annotator/FisherStrand.java | 86 ++++++++++++++++--- .../VariantAnnotatorIntegrationTest.java | 16 ++-- ...perGeneralPloidySuite1IntegrationTest.java | 6 +- ...perGeneralPloidySuite2IntegrationTest.java | 6 +- ...dGenotyperIndelCallingIntegrationTest.java | 14 +-- .../UnifiedGenotyperIntegrationTest.java | 4 +- ...GenotyperNormalCallingIntegrationTest.java | 12 +-- ...dGenotyperReducedReadsIntegrationTest.java | 4 +- ...lexAndSymbolicVariantsIntegrationTest.java | 4 +- .../HaplotypeCallerGVCFIntegrationTest.java | 8 +- .../HaplotypeCallerIntegrationTest.java | 14 +-- .../GenotypeGVCFsIntegrationTest.java | 8 +- 12 files changed, 121 insertions(+), 61 deletions(-) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index f3785d63a..a04815e62 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -89,6 +89,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat private static final String FS = "FS"; private static final double MIN_PVALUE = 1E-320; private static final int MIN_QUAL_FOR_FILTERED_TEST = 17; + private static final int MIN_COUNT = 2; public Map annotate(final RefMetaDataTracker tracker, final AnnotatorCompatible walker, @@ -134,7 +135,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat private int[][] getTableFromSamples( final GenotypesContext genotypes ) { if( genotypes == null ) { throw new IllegalArgumentException("Genotypes cannot be null."); } - final int[] sbArray = {0,0,0,0}; // forward-reverse -by- alternate-reference + final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse boolean foundData = false; for( final Genotype g : genotypes ) { @@ -144,14 +145,27 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat foundData = true; final String sbbsString = (String) g.getAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME); final int[] data = encodeSBBS(sbbsString); - for( int index = 0; index < sbArray.length; index++ ) { - sbArray[index] += data[index]; + if ( passesMinimumThreshold(data) ) { + for( int index = 0; index < sbArray.length; index++ ) { + sbArray[index] += data[index]; + } } } return ( foundData ? decodeSBBS(sbArray) : null ); } + /** + * Does this strand data array pass the minimum threshold for inclusion? + * + * @param data the array + * @return true if it passes the minimum threshold, false otherwise + */ + private static boolean passesMinimumThreshold(final int[] data) { + // the ref and alt totals must each be greater than MIN_COUNT + return data[0] + data[1] > MIN_COUNT && data[2] + data[3] > MIN_COUNT; + } + /** * Create an annotation for the highest (i.e., least significant) p-value of table1 and table2 * @@ -236,7 +250,9 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat } private Double pValueForContingencyTable(int[][] originalTable) { - int [][] table = copyContingencyTable(originalTable); + final int[][] normalizedTable = normalizeContingencyTable(originalTable); + + int[][] table = copyContingencyTable(normalizedTable); double pCutoff = computePValue(table); //printTable(table, pCutoff); @@ -252,7 +268,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat } } - table = copyContingencyTable(originalTable); + table = copyContingencyTable(normalizedTable); while (unrotateTable(table)) { double pValuePiece = computePValue(table); @@ -270,6 +286,32 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat return Math.min(pValue, 1.0); } + // how large do we want the normalized table to be? + private static final double TARGET_TABLE_SIZE = 200.0; + + /** + * Normalize the table so that the entries are not too large. + * Note that this method does NOT necessarily make a copy of the table being passed in! + * + * @param table the original table + * @return a normalized version of the table or the original table if it is already normalized + */ + private static int[][] normalizeContingencyTable(final int[][] table) { + final int sum = table[0][0] + table[0][1] + table[1][0] + table[1][1]; + if ( sum <= TARGET_TABLE_SIZE * 2 ) + return table; + + final double normalizationFactor = (double)sum / TARGET_TABLE_SIZE; + + final int[][] normalized = new int[2][2]; + for ( int i = 0; i < 2; i++ ) { + for ( int j = 0; j < 2; j++ ) + normalized[i][j] = (int)(table[i][j] / normalizationFactor); + } + + return normalized; + } + private static int [][] copyContingencyTable(int [][] t) { int[][] c = new int[2][2]; @@ -372,17 +414,33 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat final int[][] table = new int[2][2]; for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) { + final int[] myTable = new int[4]; for (final Map.Entry> el : maps.getLikelihoodReadMap().entrySet()) { final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()); final GATKSAMRecord read = el.getKey(); final int representativeCount = read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1; - updateTable(table, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt, representativeCount); + updateTable(myTable, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt, representativeCount); } + if ( passesMinimumThreshold(myTable) ) + copyToMainTable(myTable, table); } return table; } + /** + * Helper method to copy the per-sample table to the main table + * + * @param perSampleTable per-sample table (single dimension) + * @param mainTable main table (two dimensions) + */ + private static void copyToMainTable(final int[] perSampleTable, final int[][] mainTable) { + mainTable[0][0] += perSampleTable[0]; + mainTable[0][1] += perSampleTable[1]; + mainTable[1][0] += perSampleTable[2]; + mainTable[1][1] += perSampleTable[3]; + } + /** Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this: * fw rc @@ -397,6 +455,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat int[][] table = new int[2][2]; for ( Map.Entry sample : stratifiedContexts.entrySet() ) { + final int[] myTable = new int[4]; for (PileupElement p : sample.getValue().getBasePileup()) { if ( ! isUsableBase(p) ) // ignore deletions and bad MQ @@ -405,8 +464,10 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider ) continue; - updateTable(table, Allele.create(p.getBase(), false), p.getRead(), ref, alt, p.getRepresentativeCount()); + updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, alt, p.getRepresentativeCount()); } + if ( passesMinimumThreshold(myTable) ) + copyToMainTable(myTable, table); } return table; @@ -426,13 +487,13 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat ((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE); } - private static void updateTable(final int[][] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt, final int representativeCount) { + private static void updateTable(final int[] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt, final int representativeCount) { final boolean matchesRef = allele.equals(ref, true); final boolean matchesAlt = allele.equals(alt, true); if ( matchesRef || matchesAlt ) { - final int row = matchesRef ? 0 : 1; + final int offset = matchesRef ? 0 : 2; if ( read.isStrandless() ) { @@ -443,14 +504,13 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat // (the 1 is to ensure that a strandless read always counts as an observation on both strands, even // if the read is only seen once, because it's a merged read or other) final int toAdd = Math.max(representativeCount / 2, 1); - table[row][0] += toAdd; - table[row][1] += toAdd; + table[offset] += toAdd; + table[offset + 1] += toAdd; } } else { // a normal read with an actual strand final boolean isFW = !read.getReadNegativeStrandFlag(); - final int column = isFW ? 0 : 1; - table[row][column] += representativeCount; + table[offset + (isFW ? 0 : 1)] += representativeCount; } } } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index d4a909821..287cd45d0 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -89,7 +89,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("823868a4b5b5ec2cdf080c059d04d31a")); + Arrays.asList("ff21ad7bb0d6bcabcee6b95d975570fc")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -97,7 +97,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("213560f395280e6a066d0b0497ce8881")); + Arrays.asList("cb463a56d0b5bc66940f844e56265c14")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -123,7 +123,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("6f873b3152db291e18e3a04fbce2e117")); + Arrays.asList("d57ca04b4ceb2f25b31bc0cbd88bca6b")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -131,7 +131,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("d8089c5874ff35a7fd7e35ebd7d3b137")); + Arrays.asList("9cc0cf19070d951b1979e069552810f1")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } @@ -270,7 +270,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("Testing SnpEff annotations (unsupported version, no GATK mode)", spec); } - @Test + @Test(enabled = true) public void testTDTAnnotation() { final String MD5 = "427dfdc665359b67eff210f909ebf8a2"; WalkerTestSpec spec = new WalkerTestSpec( @@ -281,7 +281,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { } - @Test + @Test(enabled = true) public void testChromosomeCountsPed() { final String MD5 = "6b5cbedf4a8b3385edf128d81c8a46f2"; WalkerTestSpec spec = new WalkerTestSpec( @@ -291,7 +291,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("Testing ChromosomeCounts annotation with PED file", spec); } - @Test + @Test(enabled = true) public void testInbreedingCoeffPed() { final String MD5 = "159a771c1deaeffb786097e106943893"; WalkerTestSpec spec = new WalkerTestSpec( @@ -358,7 +358,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { final File outputVCFNoQD = executeTest("testQualByDepth calling without QD", specNoQD).getFirst().get(0); final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoQD.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A QualByDepth"; - final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("78b8b498fdc34e59208150caacb25b1c")); + final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("4ccdbebcfd02be87ae5b4ad94666f011")); specAnn.disableShadowBCF(); final File outputVCFAnn = executeTest("testQualByDepth re-annotation of QD", specAnn).getFirst().get(0); diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index d1f13143f..8f71c35be 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -69,16 +69,16 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testBOTH_GGA_Pools() { - executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "0eec36459cf1f1e3e8739ab5b1cedb39"); + executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "c2932cc77611f13cc8a14e87d055a8f8"); } @Test(enabled = true) public void testINDEL_GGA_Pools() { - executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "73229442a8fe558e58dd5dd305eb2315"); + executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "a0648992f049ed59fab0ef753d2d0c03"); } @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "d38b9223a3234af4cd3aec245c72fb53"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "fcfe18bd4c6087b21959d3c31ec177da"); } } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 6b4b9e8e4..e16ca154f 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,16 +58,16 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","49f27dae0a86351128db87923735cb10"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","ef7a6ee4ec7e20e5ce28fc50d3362d3d"); } @Test(enabled = true) public void testMT_SNP_DISCOVERY_sp4() { - executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","5d55b71688a0777a7c0247c376401368"); + executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","fc75733fcdd8079e7f7743961a1f36be"); } @Test(enabled = true) public void testMT_SNP_GGA_sp10() { - executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "cf336d66a109c55f90e9ed2b3bc196c8"); + executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "86cdfc291f995036658bfc10773db107"); } } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index d2f838779..8b8c82ea6 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("8a4de9e1f59cffe80a4372cf02fe809e")); + Arrays.asList("bb8c1b2e9343c79133d8efb51ec2192e")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -100,7 +100,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("2b92df91a9337b9d9f03db5699bb41f2")); + Arrays.asList("f5e5148cac1526136f9f2559fe3b49fa")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -110,7 +110,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("422a114943a9e3e9bf5872b82cbc6340")); + Arrays.asList("209db887bfe1aac8bd62544aa8afa2b5")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -120,7 +120,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("d3d56be9e804132a8d085b5d0acb49f1")); + Arrays.asList("83b32ea956809654590abd5e0c029d4d")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -135,7 +135,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("505a0dfa1ec335af6850654f926ec051")); + Arrays.asList("25815c1968450ddd009b983d65809c50")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -175,7 +175,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("4a45d5bd459565ec35c726894430e8df")); + Arrays.asList("af0b881d0a931f0789706f0289b72a64")); executeTest("test minIndelFraction 0.0", spec); } @@ -183,7 +183,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("a78c663eff00b28b44f368f03b2acf1b")); + Arrays.asList("aa97a7941a861d57a3b746b3f6301eb6")); executeTest("test minIndelFraction 0.25", spec); } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index b30d124c4..ecfda9d8a 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -151,7 +151,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testNoPrior() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -inputPrior 0.33333 -inputPrior 0.33333", 1, - Arrays.asList("7ac60bdc355d97c0939e644b58de47d7")); + Arrays.asList("9ee4f1ee1827a6726bfac1220a6a7c40")); executeTest("test no prior 1", spec1); } @@ -168,7 +168,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void emitPLsAtAllSites() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --output_mode EMIT_ALL_SITES -allSitePLs", 1, - Arrays.asList("552aced1b1ef7e4a554223f4719f9560")); + Arrays.asList("85dee5da72c4154e130527c4e6329c07")); // GDA: TODO: BCF encoder/decoder doesn't seem to support non-standard values in genotype fields. IE even if there is a field defined in FORMAT and in the header the BCF2 encoder will still fail spec1.disableShadowBCF(); diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 903979e9d..29b93e427 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("710d379607129935b1b7b6960ca7b213")); + Arrays.asList("03ff28802a2e06e0a623d9a5df66d237")); executeTest("test MultiSample Pilot1", spec); } @@ -72,7 +72,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testWithAllelesPassedIn1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("ebfcc3dd8c1788929cb50050c5d456df")); + Arrays.asList("85d0e5c086dc642d55124f0e88e7326b")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); } @@ -80,7 +80,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("3e646003c5b93da80c7d8e5d0ff2ee4e")); + Arrays.asList("11783a280df9bf621840c300edd0401a")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("f5a62ecb8d32f6161b2ac7682c9f711d")); + Arrays.asList("eac8b071bd2fa89889d51de8be84624a")); executeTest("test Multiple SNP alleles", spec); } @@ -112,7 +112,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("bc5a143868e3ad3acc9bb7c09798cdf2")); + Arrays.asList("7f912aa5166f6ed16166daac1e5c0935")); executeTest("test reverse trim", spec); } @@ -120,7 +120,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("8897652c7516a91d22bc678f2189131e")); + Arrays.asList("ab22f70f5c65d45f9754e7064e5a152c")); executeTest("test mismatched PLs", spec); } } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java index eae37f142..df749231e 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java @@ -69,12 +69,12 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest { @Test public void testReducedBamSNPs() { - testReducedCalling("SNP", "e8de8c523751ad2fa2ee20185ba5dea7"); + testReducedCalling("SNP", "cc0508b18028f2e84e6a42c1ff23721c"); } @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "0281c3f46f7b1989c37b52ab7e337293"); + testReducedCalling("INDEL", "6fc00d5299b1bf334d39634c3409a69d"); } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 8d67b3baf..f547e12cc 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "e966ca14532ae80fe5d8898a1a7b4e74"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "65c316f1f3987d7bc94e887999920d45"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -88,7 +88,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "cdf6d200324949a3484668774d2289d7"); + "724a05b7df716647014f29c0fe86e071"); } @Test diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 9f24eb22a..8ca67f31d 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -67,9 +67,9 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals; // this functionality can be adapted to provide input data for whatever you might want in your data - tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"}); - tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "61c70b7b6d03930420b015958df6b5a5"}); - tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "6fd946c4c8c9fd05ea921513e4523a4b"}); + tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "50323a284788c8220c9226037c7003b5"}); + tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7c16aa8e35de9f418533efac3bae6551"}); + tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "7e1e193d70187774f9740d475e0f1cc1"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"}); @@ -149,7 +149,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testNoCallGVCFMissingPLsBugFix() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4fe4a9bfbbcc98d1158cd0c164b9cc65")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4e2c20650c4c5ae6fa44b289eae5771d")); spec.disableShadowBCF(); executeTest("testNoCallGVCFMissingPLsBugFix", spec); } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 047de62e1..615c62c43 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "abbfdcbf4bfed7547a48121091a7e16f"); + HCTest(CEUTRIO_BAM, "", "489073bf0034fe9f10e6472ab93a17eb"); } @Test @@ -104,7 +104,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerGraphBasedMultiSample() { - HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "6a89f40fbeec05481fa1f2bf16289d5d"); + HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "d45b2b26434dd3bd48df5a43b3d2954a"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -115,7 +115,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "f62e874e2405689784764095b6abd1a7"); + "a1e59313516c2d5eeedae8348b0bdff1"); } @Test @@ -249,7 +249,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("51e63c0431817ca1824b01e56341a8ae")); + Arrays.asList("0864904254b2fa757991f8c2dac4932d")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -266,7 +266,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGSGraphBased() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("a2ada5984fe835f7f2169f8393d122a6")); + Arrays.asList("df1f9410d23a550a143531ac0891f1dc")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -298,7 +298,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestAggressivePcrIndelModelWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1, - Arrays.asList("69bbadca5beb8202a77815daaa49e634")); + Arrays.asList("f426f4c2986e1dea8f3f55951ef8e013")); executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec); } @@ -306,7 +306,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestConservativePcrIndelModelWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1, - Arrays.asList("061a5a9bde0739fe58b314bf8bf8eee3")); + Arrays.asList("616cc63d5a78765145914457dec475b0")); executeTest("HC calling with conservative indel error modeling on WGS intervals", spec); } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java index 741141118..1ca23caba 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java @@ -65,7 +65,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " -L 20:10,000,000-20,000,000", b37KGReference), 1, - Arrays.asList("9c618890c03ee9cae1d269039fc29506")); + Arrays.asList("2be5f6f7e7f79841108906555d548683")); executeTest("combineSingleSamplePipelineGVCF", spec); } @@ -89,7 +89,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference), 1, - Arrays.asList("27f3e4700cf836c23a9af2dc1d1bbecb")); + Arrays.asList("e3c7452277898fece54bf60af9588666")); executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec); } @@ -99,7 +99,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { "-T GenotypeGVCFs --no_cmdline_in_header -L 1:69485-69791 -o %s -R " + b37KGReference + " -V " + privateTestDir + "gvcfExample1.vcf", 1, - Arrays.asList("2541e164056d5632ad7de784a9af3880")); + Arrays.asList("bee009201ec3ad7b4f42f913e7ef1367")); executeTest("testJustOneSample", spec); } @@ -110,7 +110,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V " + privateTestDir + "gvcfExample1.vcf" + " -V " + privateTestDir + "gvcfExample2.vcf", 1, - Arrays.asList("9daf9602338db9d06c075c6e9a15ee2c")); + Arrays.asList("67410d8ac490e3c9d19ba7a4cceaf8fb")); executeTest("testSamplesWithDifferentLs", spec); } } \ No newline at end of file