Stopgap procedure to rescue Fisher Strand for cases where there's lots of data.

This commit consists of 2 main changes:
1. When the strand table gets too large, we normalize it down to values that are more reasonable.
2. We don't include a particular sample's contribution unless the total ref and alt counts are at least 2 each;
this is a heuristic method for dealing only with hets.

MD5s change as expected.
Hopefully we'll have a more robust implementation for GATK 3.1.
This commit is contained in:
Eric Banks 2014-02-14 15:02:00 -05:00
parent e8ea9f58d3
commit 0f30df0356
12 changed files with 121 additions and 61 deletions

View File

@ -89,6 +89,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
private static final String FS = "FS";
private static final double MIN_PVALUE = 1E-320;
private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;
private static final int MIN_COUNT = 2;
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
@ -134,7 +135,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
private int[][] getTableFromSamples( final GenotypesContext genotypes ) {
if( genotypes == null ) { throw new IllegalArgumentException("Genotypes cannot be null."); }
final int[] sbArray = {0,0,0,0}; // forward-reverse -by- alternate-reference
final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse
boolean foundData = false;
for( final Genotype g : genotypes ) {
@ -144,14 +145,27 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
foundData = true;
final String sbbsString = (String) g.getAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME);
final int[] data = encodeSBBS(sbbsString);
for( int index = 0; index < sbArray.length; index++ ) {
sbArray[index] += data[index];
if ( passesMinimumThreshold(data) ) {
for( int index = 0; index < sbArray.length; index++ ) {
sbArray[index] += data[index];
}
}
}
return ( foundData ? decodeSBBS(sbArray) : null );
}
/**
* Does this strand data array pass the minimum threshold for inclusion?
*
* @param data the array
* @return true if it passes the minimum threshold, false otherwise
*/
private static boolean passesMinimumThreshold(final int[] data) {
// the ref and alt totals must each be greater than MIN_COUNT
return data[0] + data[1] > MIN_COUNT && data[2] + data[3] > MIN_COUNT;
}
/**
* Create an annotation for the highest (i.e., least significant) p-value of table1 and table2
*
@ -236,7 +250,9 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
}
private Double pValueForContingencyTable(int[][] originalTable) {
int [][] table = copyContingencyTable(originalTable);
final int[][] normalizedTable = normalizeContingencyTable(originalTable);
int[][] table = copyContingencyTable(normalizedTable);
double pCutoff = computePValue(table);
//printTable(table, pCutoff);
@ -252,7 +268,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
}
}
table = copyContingencyTable(originalTable);
table = copyContingencyTable(normalizedTable);
while (unrotateTable(table)) {
double pValuePiece = computePValue(table);
@ -270,6 +286,32 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
return Math.min(pValue, 1.0);
}
// how large do we want the normalized table to be?
private static final double TARGET_TABLE_SIZE = 200.0;
/**
* Normalize the table so that the entries are not too large.
* Note that this method does NOT necessarily make a copy of the table being passed in!
*
* @param table the original table
* @return a normalized version of the table or the original table if it is already normalized
*/
private static int[][] normalizeContingencyTable(final int[][] table) {
final int sum = table[0][0] + table[0][1] + table[1][0] + table[1][1];
if ( sum <= TARGET_TABLE_SIZE * 2 )
return table;
final double normalizationFactor = (double)sum / TARGET_TABLE_SIZE;
final int[][] normalized = new int[2][2];
for ( int i = 0; i < 2; i++ ) {
for ( int j = 0; j < 2; j++ )
normalized[i][j] = (int)(table[i][j] / normalizationFactor);
}
return normalized;
}
private static int [][] copyContingencyTable(int [][] t) {
int[][] c = new int[2][2];
@ -372,17 +414,33 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
final int[][] table = new int[2][2];
for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
final int[] myTable = new int[4];
for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
final GATKSAMRecord read = el.getKey();
final int representativeCount = read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1;
updateTable(table, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt, representativeCount);
updateTable(myTable, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt, representativeCount);
}
if ( passesMinimumThreshold(myTable) )
copyToMainTable(myTable, table);
}
return table;
}
/**
* Helper method to copy the per-sample table to the main table
*
* @param perSampleTable per-sample table (single dimension)
* @param mainTable main table (two dimensions)
*/
private static void copyToMainTable(final int[] perSampleTable, final int[][] mainTable) {
mainTable[0][0] += perSampleTable[0];
mainTable[0][1] += perSampleTable[1];
mainTable[1][0] += perSampleTable[2];
mainTable[1][1] += perSampleTable[3];
}
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc
@ -397,6 +455,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
int[][] table = new int[2][2];
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
final int[] myTable = new int[4];
for (PileupElement p : sample.getValue().getBasePileup()) {
if ( ! isUsableBase(p) ) // ignore deletions and bad MQ
@ -405,8 +464,10 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider )
continue;
updateTable(table, Allele.create(p.getBase(), false), p.getRead(), ref, alt, p.getRepresentativeCount());
updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, alt, p.getRepresentativeCount());
}
if ( passesMinimumThreshold(myTable) )
copyToMainTable(myTable, table);
}
return table;
@ -426,13 +487,13 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE);
}
private static void updateTable(final int[][] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt, final int representativeCount) {
private static void updateTable(final int[] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt, final int representativeCount) {
final boolean matchesRef = allele.equals(ref, true);
final boolean matchesAlt = allele.equals(alt, true);
if ( matchesRef || matchesAlt ) {
final int row = matchesRef ? 0 : 1;
final int offset = matchesRef ? 0 : 2;
if ( read.isStrandless() ) {
@ -443,14 +504,13 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
// (the 1 is to ensure that a strandless read always counts as an observation on both strands, even
// if the read is only seen once, because it's a merged read or other)
final int toAdd = Math.max(representativeCount / 2, 1);
table[row][0] += toAdd;
table[row][1] += toAdd;
table[offset] += toAdd;
table[offset + 1] += toAdd;
}
} else {
// a normal read with an actual strand
final boolean isFW = !read.getReadNegativeStrandFlag();
final int column = isFW ? 0 : 1;
table[row][column] += representativeCount;
table[offset + (isFW ? 0 : 1)] += representativeCount;
}
}
}

View File

@ -89,7 +89,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("823868a4b5b5ec2cdf080c059d04d31a"));
Arrays.asList("ff21ad7bb0d6bcabcee6b95d975570fc"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -97,7 +97,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("213560f395280e6a066d0b0497ce8881"));
Arrays.asList("cb463a56d0b5bc66940f844e56265c14"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -123,7 +123,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("6f873b3152db291e18e3a04fbce2e117"));
Arrays.asList("d57ca04b4ceb2f25b31bc0cbd88bca6b"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -131,7 +131,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("d8089c5874ff35a7fd7e35ebd7d3b137"));
Arrays.asList("9cc0cf19070d951b1979e069552810f1"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -270,7 +270,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
executeTest("Testing SnpEff annotations (unsupported version, no GATK mode)", spec);
}
@Test
@Test(enabled = true)
public void testTDTAnnotation() {
final String MD5 = "427dfdc665359b67eff210f909ebf8a2";
WalkerTestSpec spec = new WalkerTestSpec(
@ -281,7 +281,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
}
@Test
@Test(enabled = true)
public void testChromosomeCountsPed() {
final String MD5 = "6b5cbedf4a8b3385edf128d81c8a46f2";
WalkerTestSpec spec = new WalkerTestSpec(
@ -291,7 +291,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
executeTest("Testing ChromosomeCounts annotation with PED file", spec);
}
@Test
@Test(enabled = true)
public void testInbreedingCoeffPed() {
final String MD5 = "159a771c1deaeffb786097e106943893";
WalkerTestSpec spec = new WalkerTestSpec(
@ -358,7 +358,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
final File outputVCFNoQD = executeTest("testQualByDepth calling without QD", specNoQD).getFirst().get(0);
final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoQD.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A QualByDepth";
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("78b8b498fdc34e59208150caacb25b1c"));
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("4ccdbebcfd02be87ae5b4ad94666f011"));
specAnn.disableShadowBCF();
final File outputVCFAnn = executeTest("testQualByDepth re-annotation of QD", specAnn).getFirst().get(0);

View File

@ -69,16 +69,16 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "0eec36459cf1f1e3e8739ab5b1cedb39");
executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "c2932cc77611f13cc8a14e87d055a8f8");
}
@Test(enabled = true)
public void testINDEL_GGA_Pools() {
executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "73229442a8fe558e58dd5dd305eb2315");
executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "a0648992f049ed59fab0ef753d2d0c03");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "d38b9223a3234af4cd3aec245c72fb53");
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "fcfe18bd4c6087b21959d3c31ec177da");
}
}

View File

@ -58,16 +58,16 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","49f27dae0a86351128db87923735cb10");
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","ef7a6ee4ec7e20e5ce28fc50d3362d3d");
}
@Test(enabled = true)
public void testMT_SNP_DISCOVERY_sp4() {
executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","5d55b71688a0777a7c0247c376401368");
executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","fc75733fcdd8079e7f7743961a1f36be");
}
@Test(enabled = true)
public void testMT_SNP_GGA_sp10() {
executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "cf336d66a109c55f90e9ed2b3bc196c8");
executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "86cdfc291f995036658bfc10773db107");
}
}

View File

@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("8a4de9e1f59cffe80a4372cf02fe809e"));
Arrays.asList("bb8c1b2e9343c79133d8efb51ec2192e"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -100,7 +100,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("2b92df91a9337b9d9f03db5699bb41f2"));
Arrays.asList("f5e5148cac1526136f9f2559fe3b49fa"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -110,7 +110,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("422a114943a9e3e9bf5872b82cbc6340"));
Arrays.asList("209db887bfe1aac8bd62544aa8afa2b5"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
}
@ -120,7 +120,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("d3d56be9e804132a8d085b5d0acb49f1"));
Arrays.asList("83b32ea956809654590abd5e0c029d4d"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
}
@ -135,7 +135,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
Arrays.asList("505a0dfa1ec335af6850654f926ec051"));
Arrays.asList("25815c1968450ddd009b983d65809c50"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}
@ -175,7 +175,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
Arrays.asList("4a45d5bd459565ec35c726894430e8df"));
Arrays.asList("af0b881d0a931f0789706f0289b72a64"));
executeTest("test minIndelFraction 0.0", spec);
}
@ -183,7 +183,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
Arrays.asList("a78c663eff00b28b44f368f03b2acf1b"));
Arrays.asList("aa97a7941a861d57a3b746b3f6301eb6"));
executeTest("test minIndelFraction 0.25", spec);
}

View File

@ -151,7 +151,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNoPrior() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -inputPrior 0.33333 -inputPrior 0.33333", 1,
Arrays.asList("7ac60bdc355d97c0939e644b58de47d7"));
Arrays.asList("9ee4f1ee1827a6726bfac1220a6a7c40"));
executeTest("test no prior 1", spec1);
}
@ -168,7 +168,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void emitPLsAtAllSites() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --output_mode EMIT_ALL_SITES -allSitePLs", 1,
Arrays.asList("552aced1b1ef7e4a554223f4719f9560"));
Arrays.asList("85dee5da72c4154e130527c4e6329c07"));
// GDA: TODO: BCF encoder/decoder doesn't seem to support non-standard values in genotype fields. IE even if there is a field defined in FORMAT and in the header the BCF2 encoder will still fail
spec1.disableShadowBCF();

View File

@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("710d379607129935b1b7b6960ca7b213"));
Arrays.asList("03ff28802a2e06e0a623d9a5df66d237"));
executeTest("test MultiSample Pilot1", spec);
}
@ -72,7 +72,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testWithAllelesPassedIn1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("ebfcc3dd8c1788929cb50050c5d456df"));
Arrays.asList("85d0e5c086dc642d55124f0e88e7326b"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
}
@ -80,7 +80,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testWithAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("3e646003c5b93da80c7d8e5d0ff2ee4e"));
Arrays.asList("11783a280df9bf621840c300edd0401a"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("f5a62ecb8d32f6161b2ac7682c9f711d"));
Arrays.asList("eac8b071bd2fa89889d51de8be84624a"));
executeTest("test Multiple SNP alleles", spec);
}
@ -112,7 +112,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("bc5a143868e3ad3acc9bb7c09798cdf2"));
Arrays.asList("7f912aa5166f6ed16166daac1e5c0935"));
executeTest("test reverse trim", spec);
}
@ -120,7 +120,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("8897652c7516a91d22bc678f2189131e"));
Arrays.asList("ab22f70f5c65d45f9754e7064e5a152c"));
executeTest("test mismatched PLs", spec);
}
}

View File

@ -69,12 +69,12 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest {
@Test
public void testReducedBamSNPs() {
testReducedCalling("SNP", "e8de8c523751ad2fa2ee20185ba5dea7");
testReducedCalling("SNP", "cc0508b18028f2e84e6a42c1ff23721c");
}
@Test
public void testReducedBamINDELs() {
testReducedCalling("INDEL", "0281c3f46f7b1989c37b52ab7e337293");
testReducedCalling("INDEL", "6fc00d5299b1bf334d39634c3409a69d");
}

View File

@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "e966ca14532ae80fe5d8898a1a7b4e74");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "65c316f1f3987d7bc94e887999920d45");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -88,7 +88,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"cdf6d200324949a3484668774d2289d7");
"724a05b7df716647014f29c0fe86e071");
}
@Test

View File

@ -67,9 +67,9 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "61c70b7b6d03930420b015958df6b5a5"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "6fd946c4c8c9fd05ea921513e4523a4b"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "50323a284788c8220c9226037c7003b5"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7c16aa8e35de9f418533efac3bae6551"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "7e1e193d70187774f9740d475e0f1cc1"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"});
@ -149,7 +149,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
public void testNoCallGVCFMissingPLsBugFix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4fe4a9bfbbcc98d1158cd0c164b9cc65"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4e2c20650c4c5ae6fa44b289eae5771d"));
spec.disableShadowBCF();
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
}

View File

@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "abbfdcbf4bfed7547a48121091a7e16f");
HCTest(CEUTRIO_BAM, "", "489073bf0034fe9f10e6472ab93a17eb");
}
@Test
@ -104,7 +104,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerGraphBasedMultiSample() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "6a89f40fbeec05481fa1f2bf16289d5d");
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "d45b2b26434dd3bd48df5a43b3d2954a");
}
@Test(enabled = false) // can't annotate the rsID's yet
@ -115,7 +115,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"f62e874e2405689784764095b6abd1a7");
"a1e59313516c2d5eeedae8348b0bdff1");
}
@Test
@ -249,7 +249,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("51e63c0431817ca1824b01e56341a8ae"));
Arrays.asList("0864904254b2fa757991f8c2dac4932d"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}
@ -266,7 +266,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGSGraphBased() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("a2ada5984fe835f7f2169f8393d122a6"));
Arrays.asList("df1f9410d23a550a143531ac0891f1dc"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}
@ -298,7 +298,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestAggressivePcrIndelModelWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
Arrays.asList("69bbadca5beb8202a77815daaa49e634"));
Arrays.asList("f426f4c2986e1dea8f3f55951ef8e013"));
executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
}
@ -306,7 +306,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestConservativePcrIndelModelWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
Arrays.asList("061a5a9bde0739fe58b314bf8bf8eee3"));
Arrays.asList("616cc63d5a78765145914457dec475b0"));
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
}

View File

@ -65,7 +65,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" -L 20:10,000,000-20,000,000", b37KGReference),
1,
Arrays.asList("9c618890c03ee9cae1d269039fc29506"));
Arrays.asList("2be5f6f7e7f79841108906555d548683"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@ -89,7 +89,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference),
1,
Arrays.asList("27f3e4700cf836c23a9af2dc1d1bbecb"));
Arrays.asList("e3c7452277898fece54bf60af9588666"));
executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec);
}
@ -99,7 +99,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:69485-69791 -o %s -R " + b37KGReference +
" -V " + privateTestDir + "gvcfExample1.vcf",
1,
Arrays.asList("2541e164056d5632ad7de784a9af3880"));
Arrays.asList("bee009201ec3ad7b4f42f913e7ef1367"));
executeTest("testJustOneSample", spec);
}
@ -110,7 +110,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V " + privateTestDir + "gvcfExample1.vcf" +
" -V " + privateTestDir + "gvcfExample2.vcf",
1,
Arrays.asList("9daf9602338db9d06c075c6e9a15ee2c"));
Arrays.asList("67410d8ac490e3c9d19ba7a4cceaf8fb"));
executeTest("testSamplesWithDifferentLs", spec);
}
}