The Unified Genotyper now does NOT emit SLOD/SB by default; to compute SB use --computeSLOD

This commit is contained in:
Eric Banks 2011-08-02 10:14:21 -04:00
parent 3a9b6eacdf
commit 5626199bb6
4 changed files with 34 additions and 47 deletions

View File

@ -58,8 +58,8 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false)
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
@Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false)
public boolean NO_SLOD = false;
@Argument(fullName = "computeSLOD", shortName = "sl", doc = "If provided, we will calculate the SLOD", required = false)
public boolean COMPUTE_SLOD = false;
// control the error modes
@ -154,7 +154,7 @@ public class UnifiedArgumentCollection {
uac.PCR_error = PCR_error;
uac.GenotypingMode = GenotypingMode;
uac.OutputMode = OutputMode;
uac.NO_SLOD = NO_SLOD;
uac.COMPUTE_SLOD = COMPUTE_SLOD;
uac.ASSUME_SINGLE_SAMPLE = ASSUME_SINGLE_SAMPLE;
uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING;
uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING;

View File

@ -144,7 +144,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
// annotation (INFO) fields from UnifiedGenotyper
if ( !UAC.NO_SLOD )
if ( UAC.COMPUTE_SLOD )
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias"));
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));

View File

@ -372,8 +372,8 @@ public class UnifiedGenotyperEngine {
attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
if ( !UAC.NO_SLOD && bestAFguess != 0 ) {
final boolean DEBUG_SLOD = false;
if ( UAC.COMPUTE_SLOD && bestAFguess != 0 ) {
//final boolean DEBUG_SLOD = false;
// the overall lod
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
@ -381,7 +381,7 @@ public class UnifiedGenotyperEngine {
afcm.get().getLog10PNonRef(tracker, refContext, vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
// the forward lod
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
@ -390,7 +390,7 @@ public class UnifiedGenotyperEngine {
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
// the reverse lod
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
@ -399,11 +399,11 @@ public class UnifiedGenotyperEngine {
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
//if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
// strand score is max bias between forward and reverse strands
double strandScore = Math.max(forwardLod, reverseLod);

View File

@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("c97829259463d04b0159591bb6fb44af"));
Arrays.asList("16b0c7b47745abcd1ddaa2e261719530"));
executeTest("test MultiSample Pilot1", spec);
}
@ -54,12 +54,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("2b69667f4770e8c0c894066b7f27e440"));
Arrays.asList("811ddc0bd8322b14f14f58df8c627aa9"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("b77fe007c2a97fcd59dfd5eef94d8b95"));
Arrays.asList("5cf08dd7ac3d218082f7be3915ce0b15"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -67,7 +67,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("ee8a5e63ddd470726a749e69c0c20f60"));
Arrays.asList("75156264696563c2f47620fef9424f7c"));
executeTest("test SingleSample Pilot2", spec);
}
@ -77,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "ef31654a2b85b9b2d3bba4f4a75a17b6";
private final static String COMPRESSED_OUTPUT_MD5 = "7255e03430549cb97d8fcae34cbffb02";
@Test
public void testCompressedOutput() {
@ -107,7 +107,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "46868a9c4134651c54535fb46b408aee";
String md5 = "7912109e83fda21dae90ef8d5dd0140d";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -138,9 +138,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testCallingParameters() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "--min_base_quality_score 26", "5043c9a101e691602eb7a3f9704bdf20" );
e.put( "--min_mapping_quality_score 26", "71a833eb8fd93ee62ae0d5a430f27940" );
e.put( "--p_nonref_model GRID_SEARCH", "ddf443e9dcadef367476b26b4d52c134" );
e.put( "--min_base_quality_score 26", "6d3aa9f783ca63f37c952f83eeda593c" );
e.put( "--min_mapping_quality_score 26", "51bfdf777123bf49de5d92ffde5c74e7" );
e.put( "--p_nonref_model GRID_SEARCH", "333328ab2c8da2875fade599e80a271f" );
e.put( "--computeSLOD", "226caa28a4fa9fe34f3beb8a23f3d53d" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -153,9 +154,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-sites_only", "eaad6ceb71ab94290650a70bea5ab951" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "05bf7db8a3d19ef4a3d14772c90b732f" );
e.put( "--output_mode EMIT_ALL_SITES", "e4b86740468d7369f0156550855586c7" );
e.put( "-sites_only", "5f659dee408710d3709ed72005cd863a" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "55d09bf13149bddc06cc36be0801507b" );
e.put( "--output_mode EMIT_ALL_SITES", "727f49dcb2439b18446829efc3b1561c" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -169,12 +170,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("71a833eb8fd93ee62ae0d5a430f27940"));
Arrays.asList("51bfdf777123bf49de5d92ffde5c74e7"));
executeTest("test confidence 1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("79968844dc3ddecb97748c1acf2984c7"));
Arrays.asList("c67c285e70fd4457c9f9ce7bd878ddca"));
executeTest("test confidence 2", spec2);
}
@ -186,8 +187,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "4e878664f61d2d800146d3762303fde1" );
e.put( 1.0 / 1850, "9204caec095ff5e63ca21a10b6fab453" );
e.put( 0.01, "7ecc564d4db97d5932cef2e558550ed2" );
e.put( 1.0 / 1850, "aa9e101bb9f9e111fe292fec467d915a" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -211,7 +212,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("1a58ec52df545f946f80cc16c5736a91"));
Arrays.asList("2efd686186b2c5129be4cf89274a24dd"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -230,25 +231,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("62d0f6d9de344ce68ce121c13b1e78b1"));
Arrays.asList("2892d35331fe9fc141ba19269ec7caed"));
executeTest(String.format("test calling with BAQ"), spec);
}
@Test
public void testCallingWithBAQOff() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand +
" -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" +
" -o %s" +
" -L 1:10,000,000-10,100,000" +
" -baq OFF",
1,
Arrays.asList("1a58ec52df545f946f80cc16c5736a91"));
executeTest(String.format("test calling with BAQ OFF"), spec);
}
// --------------------------------------------------------------------------------------------------------------
//
// testing indel caller
@ -263,7 +250,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("631ae1f1eb6bc4c1a4136b8495250536"));
Arrays.asList("8c2afb4289ed44521933d1a74c8d6c7f"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -278,7 +265,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("fd556585c79e2b892a5976668f45aa43"));
Arrays.asList("b6fb70590a10e1c27fb611732916f27d"));
executeTest(String.format("test indel caller in SLX witn low min allele count"), spec);
}
@ -291,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("9cd56feedd2787919e571383889fde70"));
Arrays.asList("61642502bd08cc03cdaaeb83a5426b46"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -301,14 +288,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("315e1b78d7a403d7fcbcf0caa8c496b8"));
Arrays.asList("69b0b3f089c80b9864294d838a061336"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("cf89e0c54f14482a23c105b73a333d8a"));
Arrays.asList("c90174cfd7dd68bdef36fe2c60145e10"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
}