Stopgap procedure to rescue Fisher Strand for cases where there's lots of data.
This commit consists of 2 main changes: 1. When the strand table gets too large, we normalize it down to values that are more reasonable. 2. We don't include a particular sample's contribution unless the total ref and alt counts are at least 2 each; this is a heuristic method for dealing only with hets. MD5s change as expected. Hopefully we'll have a more robust implementation for GATK 3.1.
This commit is contained in:
parent
e8ea9f58d3
commit
0f30df0356
|
|
@ -89,6 +89,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
private static final String FS = "FS";
|
||||
private static final double MIN_PVALUE = 1E-320;
|
||||
private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;
|
||||
private static final int MIN_COUNT = 2;
|
||||
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
|
|
@ -134,7 +135,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
private int[][] getTableFromSamples( final GenotypesContext genotypes ) {
|
||||
if( genotypes == null ) { throw new IllegalArgumentException("Genotypes cannot be null."); }
|
||||
|
||||
final int[] sbArray = {0,0,0,0}; // forward-reverse -by- alternate-reference
|
||||
final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse
|
||||
boolean foundData = false;
|
||||
|
||||
for( final Genotype g : genotypes ) {
|
||||
|
|
@ -144,14 +145,27 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
foundData = true;
|
||||
final String sbbsString = (String) g.getAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME);
|
||||
final int[] data = encodeSBBS(sbbsString);
|
||||
for( int index = 0; index < sbArray.length; index++ ) {
|
||||
sbArray[index] += data[index];
|
||||
if ( passesMinimumThreshold(data) ) {
|
||||
for( int index = 0; index < sbArray.length; index++ ) {
|
||||
sbArray[index] += data[index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ( foundData ? decodeSBBS(sbArray) : null );
|
||||
}
|
||||
|
||||
/**
|
||||
* Does this strand data array pass the minimum threshold for inclusion?
|
||||
*
|
||||
* @param data the array
|
||||
* @return true if it passes the minimum threshold, false otherwise
|
||||
*/
|
||||
private static boolean passesMinimumThreshold(final int[] data) {
|
||||
// the ref and alt totals must each be greater than MIN_COUNT
|
||||
return data[0] + data[1] > MIN_COUNT && data[2] + data[3] > MIN_COUNT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an annotation for the highest (i.e., least significant) p-value of table1 and table2
|
||||
*
|
||||
|
|
@ -236,7 +250,9 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
}
|
||||
|
||||
private Double pValueForContingencyTable(int[][] originalTable) {
|
||||
int [][] table = copyContingencyTable(originalTable);
|
||||
final int[][] normalizedTable = normalizeContingencyTable(originalTable);
|
||||
|
||||
int[][] table = copyContingencyTable(normalizedTable);
|
||||
|
||||
double pCutoff = computePValue(table);
|
||||
//printTable(table, pCutoff);
|
||||
|
|
@ -252,7 +268,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
}
|
||||
}
|
||||
|
||||
table = copyContingencyTable(originalTable);
|
||||
table = copyContingencyTable(normalizedTable);
|
||||
while (unrotateTable(table)) {
|
||||
double pValuePiece = computePValue(table);
|
||||
|
||||
|
|
@ -270,6 +286,32 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
return Math.min(pValue, 1.0);
|
||||
}
|
||||
|
||||
// how large do we want the normalized table to be?
|
||||
private static final double TARGET_TABLE_SIZE = 200.0;
|
||||
|
||||
/**
|
||||
* Normalize the table so that the entries are not too large.
|
||||
* Note that this method does NOT necessarily make a copy of the table being passed in!
|
||||
*
|
||||
* @param table the original table
|
||||
* @return a normalized version of the table or the original table if it is already normalized
|
||||
*/
|
||||
private static int[][] normalizeContingencyTable(final int[][] table) {
|
||||
final int sum = table[0][0] + table[0][1] + table[1][0] + table[1][1];
|
||||
if ( sum <= TARGET_TABLE_SIZE * 2 )
|
||||
return table;
|
||||
|
||||
final double normalizationFactor = (double)sum / TARGET_TABLE_SIZE;
|
||||
|
||||
final int[][] normalized = new int[2][2];
|
||||
for ( int i = 0; i < 2; i++ ) {
|
||||
for ( int j = 0; j < 2; j++ )
|
||||
normalized[i][j] = (int)(table[i][j] / normalizationFactor);
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private static int [][] copyContingencyTable(int [][] t) {
|
||||
int[][] c = new int[2][2];
|
||||
|
||||
|
|
@ -372,17 +414,33 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
final int[][] table = new int[2][2];
|
||||
|
||||
for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
|
||||
final int[] myTable = new int[4];
|
||||
for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
|
||||
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||
final GATKSAMRecord read = el.getKey();
|
||||
final int representativeCount = read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1;
|
||||
updateTable(table, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt, representativeCount);
|
||||
updateTable(myTable, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt, representativeCount);
|
||||
}
|
||||
if ( passesMinimumThreshold(myTable) )
|
||||
copyToMainTable(myTable, table);
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to copy the per-sample table to the main table
|
||||
*
|
||||
* @param perSampleTable per-sample table (single dimension)
|
||||
* @param mainTable main table (two dimensions)
|
||||
*/
|
||||
private static void copyToMainTable(final int[] perSampleTable, final int[][] mainTable) {
|
||||
mainTable[0][0] += perSampleTable[0];
|
||||
mainTable[0][1] += perSampleTable[1];
|
||||
mainTable[1][0] += perSampleTable[2];
|
||||
mainTable[1][1] += perSampleTable[3];
|
||||
}
|
||||
|
||||
/**
|
||||
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
|
||||
* fw rc
|
||||
|
|
@ -397,6 +455,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
int[][] table = new int[2][2];
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
final int[] myTable = new int[4];
|
||||
for (PileupElement p : sample.getValue().getBasePileup()) {
|
||||
|
||||
if ( ! isUsableBase(p) ) // ignore deletions and bad MQ
|
||||
|
|
@ -405,8 +464,10 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider )
|
||||
continue;
|
||||
|
||||
updateTable(table, Allele.create(p.getBase(), false), p.getRead(), ref, alt, p.getRepresentativeCount());
|
||||
updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, alt, p.getRepresentativeCount());
|
||||
}
|
||||
if ( passesMinimumThreshold(myTable) )
|
||||
copyToMainTable(myTable, table);
|
||||
}
|
||||
|
||||
return table;
|
||||
|
|
@ -426,13 +487,13 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE);
|
||||
}
|
||||
|
||||
private static void updateTable(final int[][] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt, final int representativeCount) {
|
||||
private static void updateTable(final int[] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt, final int representativeCount) {
|
||||
|
||||
final boolean matchesRef = allele.equals(ref, true);
|
||||
final boolean matchesAlt = allele.equals(alt, true);
|
||||
|
||||
if ( matchesRef || matchesAlt ) {
|
||||
final int row = matchesRef ? 0 : 1;
|
||||
final int offset = matchesRef ? 0 : 2;
|
||||
|
||||
if ( read.isStrandless() ) {
|
||||
|
||||
|
|
@ -443,14 +504,13 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
// (the 1 is to ensure that a strandless read always counts as an observation on both strands, even
|
||||
// if the read is only seen once, because it's a merged read or other)
|
||||
final int toAdd = Math.max(representativeCount / 2, 1);
|
||||
table[row][0] += toAdd;
|
||||
table[row][1] += toAdd;
|
||||
table[offset] += toAdd;
|
||||
table[offset + 1] += toAdd;
|
||||
}
|
||||
} else {
|
||||
// a normal read with an actual strand
|
||||
final boolean isFW = !read.getReadNegativeStrandFlag();
|
||||
final int column = isFW ? 0 : 1;
|
||||
table[row][column] += representativeCount;
|
||||
table[offset + (isFW ? 0 : 1)] += representativeCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("823868a4b5b5ec2cdf080c059d04d31a"));
|
||||
Arrays.asList("ff21ad7bb0d6bcabcee6b95d975570fc"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -97,7 +97,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("213560f395280e6a066d0b0497ce8881"));
|
||||
Arrays.asList("cb463a56d0b5bc66940f844e56265c14"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -123,7 +123,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("6f873b3152db291e18e3a04fbce2e117"));
|
||||
Arrays.asList("d57ca04b4ceb2f25b31bc0cbd88bca6b"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -131,7 +131,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("d8089c5874ff35a7fd7e35ebd7d3b137"));
|
||||
Arrays.asList("9cc0cf19070d951b1979e069552810f1"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -270,7 +270,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
executeTest("Testing SnpEff annotations (unsupported version, no GATK mode)", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = true)
|
||||
public void testTDTAnnotation() {
|
||||
final String MD5 = "427dfdc665359b67eff210f909ebf8a2";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
@ -281,7 +281,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
|
||||
@Test
|
||||
@Test(enabled = true)
|
||||
public void testChromosomeCountsPed() {
|
||||
final String MD5 = "6b5cbedf4a8b3385edf128d81c8a46f2";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
@ -291,7 +291,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
executeTest("Testing ChromosomeCounts annotation with PED file", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = true)
|
||||
public void testInbreedingCoeffPed() {
|
||||
final String MD5 = "159a771c1deaeffb786097e106943893";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
@ -358,7 +358,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
final File outputVCFNoQD = executeTest("testQualByDepth calling without QD", specNoQD).getFirst().get(0);
|
||||
|
||||
final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoQD.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A QualByDepth";
|
||||
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("78b8b498fdc34e59208150caacb25b1c"));
|
||||
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("4ccdbebcfd02be87ae5b4ad94666f011"));
|
||||
specAnn.disableShadowBCF();
|
||||
final File outputVCFAnn = executeTest("testQualByDepth re-annotation of QD", specAnn).getFirst().get(0);
|
||||
|
||||
|
|
|
|||
|
|
@ -69,16 +69,16 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testBOTH_GGA_Pools() {
|
||||
executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "0eec36459cf1f1e3e8739ab5b1cedb39");
|
||||
executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "c2932cc77611f13cc8a14e87d055a8f8");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_GGA_Pools() {
|
||||
executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "73229442a8fe558e58dd5dd305eb2315");
|
||||
executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "a0648992f049ed59fab0ef753d2d0c03");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "d38b9223a3234af4cd3aec245c72fb53");
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "fcfe18bd4c6087b21959d3c31ec177da");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,16 +58,16 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","49f27dae0a86351128db87923735cb10");
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","ef7a6ee4ec7e20e5ce28fc50d3362d3d");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_DISCOVERY_sp4() {
|
||||
executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","5d55b71688a0777a7c0247c376401368");
|
||||
executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","fc75733fcdd8079e7f7743961a1f36be");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_GGA_sp10() {
|
||||
executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "cf336d66a109c55f90e9ed2b3bc196c8");
|
||||
executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "86cdfc291f995036658bfc10773db107");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("8a4de9e1f59cffe80a4372cf02fe809e"));
|
||||
Arrays.asList("bb8c1b2e9343c79133d8efb51ec2192e"));
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
||||
|
|
@ -100,7 +100,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("2b92df91a9337b9d9f03db5699bb41f2"));
|
||||
Arrays.asList("f5e5148cac1526136f9f2559fe3b49fa"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -110,7 +110,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("422a114943a9e3e9bf5872b82cbc6340"));
|
||||
Arrays.asList("209db887bfe1aac8bd62544aa8afa2b5"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -120,7 +120,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("d3d56be9e804132a8d085b5d0acb49f1"));
|
||||
Arrays.asList("83b32ea956809654590abd5e0c029d4d"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -135,7 +135,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
|
||||
Arrays.asList("505a0dfa1ec335af6850654f926ec051"));
|
||||
Arrays.asList("25815c1968450ddd009b983d65809c50"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -175,7 +175,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("4a45d5bd459565ec35c726894430e8df"));
|
||||
Arrays.asList("af0b881d0a931f0789706f0289b72a64"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -183,7 +183,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("a78c663eff00b28b44f368f03b2acf1b"));
|
||||
Arrays.asList("aa97a7941a861d57a3b746b3f6301eb6"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNoPrior() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -inputPrior 0.33333 -inputPrior 0.33333", 1,
|
||||
Arrays.asList("7ac60bdc355d97c0939e644b58de47d7"));
|
||||
Arrays.asList("9ee4f1ee1827a6726bfac1220a6a7c40"));
|
||||
executeTest("test no prior 1", spec1);
|
||||
|
||||
}
|
||||
|
|
@ -168,7 +168,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void emitPLsAtAllSites() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --output_mode EMIT_ALL_SITES -allSitePLs", 1,
|
||||
Arrays.asList("552aced1b1ef7e4a554223f4719f9560"));
|
||||
Arrays.asList("85dee5da72c4154e130527c4e6329c07"));
|
||||
// GDA: TODO: BCF encoder/decoder doesn't seem to support non-standard values in genotype fields. IE even if there is a field defined in FORMAT and in the header the BCF2 encoder will still fail
|
||||
spec1.disableShadowBCF();
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("710d379607129935b1b7b6960ca7b213"));
|
||||
Arrays.asList("03ff28802a2e06e0a623d9a5df66d237"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testWithAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("ebfcc3dd8c1788929cb50050c5d456df"));
|
||||
Arrays.asList("85d0e5c086dc642d55124f0e88e7326b"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("3e646003c5b93da80c7d8e5d0ff2ee4e"));
|
||||
Arrays.asList("11783a280df9bf621840c300edd0401a"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("f5a62ecb8d32f6161b2ac7682c9f711d"));
|
||||
Arrays.asList("eac8b071bd2fa89889d51de8be84624a"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -112,7 +112,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("bc5a143868e3ad3acc9bb7c09798cdf2"));
|
||||
Arrays.asList("7f912aa5166f6ed16166daac1e5c0935"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
|
|
@ -120,7 +120,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMismatchedPLs() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("8897652c7516a91d22bc678f2189131e"));
|
||||
Arrays.asList("ab22f70f5c65d45f9754e7064e5a152c"));
|
||||
executeTest("test mismatched PLs", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -69,12 +69,12 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testReducedBamSNPs() {
|
||||
testReducedCalling("SNP", "e8de8c523751ad2fa2ee20185ba5dea7");
|
||||
testReducedCalling("SNP", "cc0508b18028f2e84e6a42c1ff23721c");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamINDELs() {
|
||||
testReducedCalling("INDEL", "0281c3f46f7b1989c37b52ab7e337293");
|
||||
testReducedCalling("INDEL", "6fc00d5299b1bf334d39634c3409a69d");
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex1() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "e966ca14532ae80fe5d8898a1a7b4e74");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "65c316f1f3987d7bc94e887999920d45");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -88,7 +88,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
|
||||
"cdf6d200324949a3484668774d2289d7");
|
||||
"724a05b7df716647014f29c0fe86e071");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -67,9 +67,9 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
|
||||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "61c70b7b6d03930420b015958df6b5a5"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "6fd946c4c8c9fd05ea921513e4523a4b"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "50323a284788c8220c9226037c7003b5"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7c16aa8e35de9f418533efac3bae6551"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "7e1e193d70187774f9740d475e0f1cc1"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"});
|
||||
|
|
@ -149,7 +149,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
public void testNoCallGVCFMissingPLsBugFix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||
b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4fe4a9bfbbcc98d1158cd0c164b9cc65"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4e2c20650c4c5ae6fa44b289eae5771d"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "abbfdcbf4bfed7547a48121091a7e16f");
|
||||
HCTest(CEUTRIO_BAM, "", "489073bf0034fe9f10e6472ab93a17eb");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -104,7 +104,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerGraphBasedMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "6a89f40fbeec05481fa1f2bf16289d5d");
|
||||
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "d45b2b26434dd3bd48df5a43b3d2954a");
|
||||
}
|
||||
|
||||
@Test(enabled = false) // can't annotate the rsID's yet
|
||||
|
|
@ -115,7 +115,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"f62e874e2405689784764095b6abd1a7");
|
||||
"a1e59313516c2d5eeedae8348b0bdff1");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -249,7 +249,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("51e63c0431817ca1824b01e56341a8ae"));
|
||||
Arrays.asList("0864904254b2fa757991f8c2dac4932d"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -266,7 +266,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGSGraphBased() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("a2ada5984fe835f7f2169f8393d122a6"));
|
||||
Arrays.asList("df1f9410d23a550a143531ac0891f1dc"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -298,7 +298,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestAggressivePcrIndelModelWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||
Arrays.asList("69bbadca5beb8202a77815daaa49e634"));
|
||||
Arrays.asList("f426f4c2986e1dea8f3f55951ef8e013"));
|
||||
executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -306,7 +306,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestConservativePcrIndelModelWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||
Arrays.asList("061a5a9bde0739fe58b314bf8bf8eee3"));
|
||||
Arrays.asList("616cc63d5a78765145914457dec475b0"));
|
||||
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-20,000,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("9c618890c03ee9cae1d269039fc29506"));
|
||||
Arrays.asList("2be5f6f7e7f79841108906555d548683"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -89,7 +89,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference),
|
||||
1,
|
||||
Arrays.asList("27f3e4700cf836c23a9af2dc1d1bbecb"));
|
||||
Arrays.asList("e3c7452277898fece54bf60af9588666"));
|
||||
executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec);
|
||||
}
|
||||
|
||||
|
|
@ -99,7 +99,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:69485-69791 -o %s -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "gvcfExample1.vcf",
|
||||
1,
|
||||
Arrays.asList("2541e164056d5632ad7de784a9af3880"));
|
||||
Arrays.asList("bee009201ec3ad7b4f42f913e7ef1367"));
|
||||
executeTest("testJustOneSample", spec);
|
||||
}
|
||||
|
||||
|
|
@ -110,7 +110,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V " + privateTestDir + "gvcfExample1.vcf" +
|
||||
" -V " + privateTestDir + "gvcfExample2.vcf",
|
||||
1,
|
||||
Arrays.asList("9daf9602338db9d06c075c6e9a15ee2c"));
|
||||
Arrays.asList("67410d8ac490e3c9d19ba7a4cceaf8fb"));
|
||||
executeTest("testSamplesWithDifferentLs", spec);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue