Calculate StrandBiasBySample using all alternate alleles as ref vs. any alt

This commit is contained in:
Laura Gauthier 2014-08-20 10:27:27 -04:00
parent b69b256003
commit bc7202fff7
11 changed files with 42 additions and 38 deletions

View File

@ -113,8 +113,8 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
@Override
protected Map<String, Object> calculateAnnotationFromStratifiedContexts(final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc){
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT);
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAlternateAlleles(), -1, MIN_COUNT);
final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAlternateAlleles(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT);
printTable("unfiltered", tableNoFiltering);
printTable("filtered", tableFiltering);
return pValueForBestTable(tableFiltering, tableNoFiltering);

View File

@ -193,7 +193,7 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
*/
protected static int[][] getSNPContingencyTable(final Map<String, AlignmentContext> stratifiedContexts,
final Allele ref,
final Allele alt,
final List<Allele> allAlts,
final int minQScoreToConsider,
final int minCount ) {
int[][] table = new int[2][2];
@ -208,11 +208,13 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider )
continue;
updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, alt);
updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, allAlts);
}
if ( passesMinimumThreshold( myTable, minCount ) )
if ( passesMinimumThreshold( myTable, minCount ) ) {
copyToMainTable(myTable, table);
}
}
return table;
@ -233,6 +235,7 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
final Allele ref = vc.getReference();
final Allele alt = vc.getAltAlleleWithHighestAlleleCount();
final List<Allele> allAlts = vc.getAlternateAlleles();
final int[][] table = new int[2][2];
for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
@ -240,7 +243,7 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
final GATKSAMRecord read = el.getKey();
updateTable(myTable, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt);
updateTable(myTable, mostLikelyAllele.getAlleleIfInformative(), read, ref, allAlts);
}
if ( passesMinimumThreshold(myTable, minCount) )
copyToMainTable(myTable, table);
@ -277,12 +280,13 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE);
}
private static void updateTable(final int[] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt) {
private static void updateTable(final int[] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final List<Allele> allAlts) {
final boolean matchesRef = allele.equals(ref, true);
final boolean matchesAlt = allele.equals(alt, true);
final boolean matchesAlt = allele.equals(allAlts.get(0), true);
final boolean matchesAnyAlt = allAlts.contains(allele);
if ( matchesRef || matchesAlt ) {
if ( matchesRef || matchesAnyAlt ) {
final int offset = matchesRef ? 0 : 2;
if ( read.isStrandless() ) {

View File

@ -118,7 +118,7 @@ public class StrandOddsRatio extends StrandBiasTest implements StandardAnnotatio
@Override
protected Map<String, Object> calculateAnnotationFromStratifiedContexts(Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc){
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAlternateAlleles(), -1, MIN_COUNT);
final double ratio = calculateSOR(tableNoFiltering);
return annotationForOneTable(ratio);
}

View File

@ -94,7 +94,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("62b6dacf131695f81eccbfe2b1efa128"));
Arrays.asList("4f7ebd519451a776c1aa61493ff33943"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -128,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("6a93ce9ce7bda8de285c9c67d93a0b10"));
Arrays.asList("a4df0258a61170c74c85b3cd516c8153"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -144,7 +144,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("9c1aa104de4735be4f7e418014b6536b"));
Arrays.asList("11935c8d5cc5a170d06f0b624b31079f"));
executeTest("test exclude annotations", spec);
}

View File

@ -74,7 +74,7 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "71b196793025aa1f99cb8f6f9929d0bf");
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "972c8db4b1cc971bd714fd9c1a72b65a");
}
@Test(enabled = true)
@ -84,6 +84,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "ea5b4124be3ab15a14b670506a98fd9b");
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "6007e0735aa5a680da92396345824077");
}
}

View File

@ -63,7 +63,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","e0060cbb6d5e1af3b274a1e577ba47a9");
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","fdcdfbed14fb7d703cd991ee7d2821a6");
}
@Test(enabled = true)

View File

@ -140,7 +140,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
Arrays.asList("053913cb29fee481158e1f497a4fffdc"));
Arrays.asList("781d305993aedcc1a4c199a5c63ac54c"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}

View File

@ -102,7 +102,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("37594ce48695bf443c9251f70006f2f0"));
Arrays.asList("837847b512c8f60d7c572dd6a80239d8"));
executeTest("test Multiple SNP alleles", spec);
}
@ -118,7 +118,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("ad2be9f69ae8c6776b3bfba069735f50"));
Arrays.asList("0fc44ff26d3f913e7012b000a4de9682"));
executeTest("test reverse trim", spec);
}
@ -126,7 +126,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("1cc9c3e45e0296bb33042b409db18ca4"));
Arrays.asList("0dba3bc42c0eb43fea205d528739e9da"));
executeTest("test mismatched PLs", spec);
}
}

View File

@ -69,7 +69,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "f4018f734d64f1f88b3ac4b712311567");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "438933e7d6345b80ff09d9be40bdb42e");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -93,7 +93,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"9de64c4405e0dab99c70c2fae54d4841");
"a571eb20a5bd17a41b2bd29d00adacc1");
}
@Test
@ -111,7 +111,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
"272e096b7dc2839d11343f35e5d5442d");
"4c8352d71877a585b8a9b74567be72e6");
}
}

View File

@ -72,8 +72,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "646ec07bd026da1e72b5e789f5aa3a3d"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "9acbee336e91cbfc1abeebd41bbcc9dd"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "e696ffc927af7f7a36dc7d49dad2c4f8"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "cb598bee733db0461f6a24d265daed45"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "d875592d1fd8057250dafad793768535"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "c223d6fe112d2bb698811600c3b7f6af"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "dacb94af2632e4dc4a1948306dd1661c"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "ef9093880efedac09b78c8fb26420e84"});
@ -90,9 +90,9 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "0a982ba98be666d56452791df32109d7"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "5edcfa5ab96bc327783484c2bbe1c06f"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "b60c70fac56f129af78eaff9ad769557"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "83b96e3e364821c51e6a2c2a64616b24"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "f38178834961798d79e7190dbca004bf"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "762dde6b938dd7bb988f132dd9e4b76f"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "1c3570461e96ad6d66c6abb0fd6ee865"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "66019a0914f905522da6bd3b557a57d1"});
@ -113,9 +113,9 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "7db552463cf779644335bfa09fcddf82"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "90600d209cf778fdfca6a844b8ee4acb"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "db3c99786c5726b20dbfe47e31e50d60"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "254b4bed7fc8ef5d3c4cb4ebf4ea73c2"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "3f9e2b721c3bc8a6d76aa75bb7544f28"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "dac179dc0c314e2ac7e39ecb9d334493"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "eac793500fbc7de46259000dbbcdd27d"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "4a431e0e387de3f791318f67d8855b0b"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "1058d3fe6553e07f002f994759c9647d"});

View File

@ -90,7 +90,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "5468f50b4ed198e6e9b05a67c3103f72");
HCTest(CEUTRIO_BAM, "", "bd0c2401f0c0a1f35ca0563a74672116");
}
@Test
@ -101,7 +101,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleHaploid() {
HCTest(CEUTRIO_BAM,
"-ploidy 1", "5046d3f77a56fcc4ccc8a216670effac");
"-ploidy 1", "1adc1d54b9d0cd493d85c6406ae4d4a7");
}
@Test
@ -136,12 +136,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerGraphBasedMultiSampleHaploid() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "1425b46f3cd50040a1272c8775672fc0");
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "1cf7e07f659c0aa462e17d2fe2981613");
}
@Test
public void testHaplotypeCallerGraphBasedMultiSample() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "987c0bb684fc03bcc46cb619e8269fe4");
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "f9388b9a6c8bd76862dc716adfb9fd5d");
}
@Test
@ -153,19 +153,19 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf" +
" -isr INTERSECTION -L " + GGA_INTERVALS_FILE,
"86a060e9514eaf90c14ddaa7e6d07259");
"48b11c06729a99d4d54c5aa99663343c");
}
@Test
public void testHaplotypeCallerMultiSampleGGAHaploid() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 1 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf -isr INTERSECTION -L 20:10080000-10100000",
"2d32509234571132ec1fc84ebbc0c48b");
"5c619c63de7ecf35b29562cf155ff5a0");
}
@Test
public void testHaplotypeCallerMultiSampleGGATetraploid() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 4 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf -isr INTERSECTION -L 20:10080000-10100000",
"b47d6e7b99a8e3413fa94d83a9e760fa");
"506f0b55414dbdb900c8705609f69d57");
}
@Test