Reverting back to the old definition of QD because it works better with large numbers of samples. The new QD is relegated to a new annotation: sumGLbyD. Tweaks to the new HaplotypeScore based on evaluation with better QD calculation. The default qual threshold in GenerateVariantClusters is updated to be in line with the variant quality scores coming from the exact model.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4984 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2011-01-13 14:12:30 +00:00
parent e0092bb160
commit ce3d226183
7 changed files with 33 additions and 43 deletions

View File

@ -46,7 +46,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
private final static boolean DEBUG = false;
private final static int MIN_CONTEXT_WING_SIZE = 20;
private final static int MIN_CONTEXT_WING_SIZE = 10;
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
private final static char REGEXP_WILDCARD = '.';

View File

@ -18,7 +18,7 @@ import java.util.List;
import java.util.Arrays;
public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
public class QualByDepth extends AnnotationByDepth implements InfoFieldAnnotation, StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
@ -55,13 +55,18 @@ public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
if ( qual == 0.0 )
qual = 10.0 * vc.getNegLog10PError();
double QbyD = qual / (double)depth;
double sumGLbyD = qual / (double)depth;
int qDepth = annotationByVariantDepth(genotypes, stratifiedContexts);
double QD = 10.0 * vc.getNegLog10PError() / (double)qDepth;
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%.2f", QbyD));
map.put(getKeyNames().get(0), String.format("%.2f", QD));
map.put(getKeyNames().get(1), String.format("%.2f", sumGLbyD));
return map;
}
public List<String> getKeyNames() { return Arrays.asList("QD"); }
public List<String> getKeyNames() { return Arrays.asList("QD", "sumGLbyD"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); }

View File

@ -84,7 +84,7 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
@Argument(fullName="stdThreshold", shortName="std", doc="If a variant has annotations more than -std standard deviations away from mean then don't use it for clustering.", required=false)
private double STD_THRESHOLD = 4.5;
@Argument(fullName="qualThreshold", shortName="qual", doc="If a known variant has raw QUAL value less than -qual then don't use it for clustering.", required=false)
private double QUAL_THRESHOLD = 800.0;
private double QUAL_THRESHOLD = 100.0;
@Argument(fullName="shrinkage", shortName="shrinkage", doc="The shrinkage parameter in variational Bayes algorithm.", required=false)
private double SHRINKAGE = 0.0001;
@Argument(fullName="dirichlet", shortName="dirichlet", doc="The dirichlet parameter in variational Bayes algoirthm.", required=false)

View File

@ -18,11 +18,7 @@ import java.util.Arrays;
*/
public class PileupElement {
public static final byte DELETION_BASE = BaseUtils.D;
public static final byte INSERTION_BASE_A = 87;
public static final byte INSERTION_BASE_C = 88;
public static final byte INSERTION_BASE_T = 89;
public static final byte INSERTION_BASE_G = 90;
public static final byte DELETION_QUAL = 18;
public static final byte DELETION_QUAL = 16;
protected SAMRecord read;
protected int offset;

View File

@ -548,17 +548,6 @@ public class AlignmentUtils {
switch( ce.getOperator() ) {
case I:
if( alignPos > 0 ) {
if(alignment[alignPos-1] == (byte) 'A') {
alignment[alignPos-1] = PileupElement.INSERTION_BASE_A;
} else if(alignment[alignPos-1] == (byte) 'C') {
alignment[alignPos-1] = PileupElement.INSERTION_BASE_C;
} else if(alignment[alignPos-1] == (byte) 'T') {
alignment[alignPos-1] = PileupElement.INSERTION_BASE_T;
} else if(alignment[alignPos-1] == (byte) 'G') {
alignment[alignPos-1] = PileupElement.INSERTION_BASE_G;
}
}
case S:
for ( int jjj = 0; jjj < elementLength; jjj++ ) {
readPos++;

View File

@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("98861116ac1cdaf152adead3183664d8"));
Arrays.asList("c62ee353f7ddafa2348b461d9e107f2f"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("53d083505fc82fb388566d3856ed20ba"));
Arrays.asList("0c43cfb9b08f63f49396e3bf8b3d843f"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("c5807c5794c3a847d78e3800553d989a"));
Arrays.asList("3aa1415f7bc6d41c4f14343187377f96"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("a9b2a7adee7ba8b0f5d7ff8d92e6dfbd"));
Arrays.asList("bf3bc299552b595f791486a1f8dbc509"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -79,7 +79,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("e3c839910aa82061742e33196b112cb0"));
Arrays.asList("28829de1afd9dcb41ed666f949c4f5cc"));
executeTest("test overwriting header", spec);
}

View File

@ -25,7 +25,7 @@ public class
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("ae901e034b00aef16d36295821b3ea63"));
Arrays.asList("6c04cc01cf6bebe4bdbc025fd45c5559"));
executeTest("testMultiSamplePilot1", spec);
}
@ -33,7 +33,7 @@ public class
public void testMultiSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
Arrays.asList("2ad026dee3fe592c124eb8724a843a5e"));
Arrays.asList("84efe068164891dbec7c85ff6cc33df3"));
executeTest("testMultiSamplePilot2", spec);
}
@ -41,7 +41,7 @@ public class
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("bc573090407ae9ec4401757eaa03de20"));
Arrays.asList("c7decebadb35067a38b9be3c43c1fb76"));
executeTest("testSingleSamplePilot2", spec);
}
@ -51,7 +51,7 @@ public class
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "1889ecc5aa1b23b2e77b1bd192577f1a";
private final static String COMPRESSED_OUTPUT_MD5 = "66bcb3f7b20d4fbe7849b616bfe69c0f";
@Test
public void testCompressedOutput() {
@ -78,7 +78,7 @@ public class
@Test
public void testParallelization() {
String md5 = "d19745ab31f903de8d5a8e853b4e52dd";
String md5 = "a0032a9952c94a55e0fa42e2dec33169";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -105,12 +105,12 @@ public class
@Test
public void testParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-genotype", "37d3954e19309a24c386758afad93252" );
e.put( "-all_bases", "04568093c5dc70fa7965b4ab15fd0f7e" );
e.put( "--min_base_quality_score 26", "5d1886a9637183707645bc2dc6bf8282" );
e.put( "--min_mapping_quality_score 26", "78423524cf56cce1d0847847d542459f" );
e.put( "--max_mismatches_in_40bp_window 5", "2963c771aafe84b62082f475d20bad5e" );
e.put( "--p_nonref_model GRID_SEARCH", "c254a4e593b4ffb112299be874503ce0" );
e.put( "-genotype", "4a623d4a68fba4f9f8d7e915af0cc450" );
e.put( "-all_bases", "2ab5106795c70a63d8dd2353ee0f1427" );
e.put( "--min_base_quality_score 26", "9c9be22923b9ba0d588f3d37385ae4b0" );
e.put( "--min_mapping_quality_score 26", "aa36bb2f4fc5bd6b6c4206e0a084a577" );
e.put( "--max_mismatches_in_40bp_window 5", "4914ed25a8ca22c7aea983999cd6768a" );
e.put( "--p_nonref_model GRID_SEARCH", "dd4fb1fb304e44b82c9cc3d4cc257172" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -124,12 +124,12 @@ public class
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("c254a4e593b4ffb112299be874503ce0"));
Arrays.asList("dd4fb1fb304e44b82c9cc3d4cc257172"));
executeTest("testConfidence1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("d2323a0234d27257393f4931fca70dbc"));
Arrays.asList("eef4e314d0cdae669454232ffbbab8ea"));
executeTest("testConfidence2", spec2);
}
@ -141,8 +141,8 @@ public class
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "96e85b26cf5f0a523b4b0886dbb902b1" );
e.put( 1.0 / 1850, "5ffecb68b52169ded04312aa5dcdc137" );
e.put( 0.01, "1bbd2e24ddec902339eac481565d7f0a" );
e.put( 1.0 / 1850, "472d2d6c375a7c6d2edb464a12f10742" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -165,7 +165,7 @@ public class
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("e65c95a9d2a0995078d3e6835cf4ee61"));
Arrays.asList("eb4144006eda780d69b6979817ceb58d"));
executeTest(String.format("testMultiTechnologies"), spec);
}