Added qual-by-depth annotation

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2445 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-25 02:30:30 +00:00
parent 0571d9dcb9
commit 12990c5e7a
3 changed files with 95 additions and 16 deletions

View File

@ -0,0 +1,79 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.SampleBacked;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.List;
import java.util.ArrayList;
public class QualByDepth extends StandardVariantAnnotation {
public String annotate(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
if ( !(variation instanceof VariantBackedByGenotype) )
return null;
final List<Genotype> genotypes = ((VariantBackedByGenotype)variation).getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )
return null;
//double QbyD = genotypeQualByDepth(ref.getBase(), genotypes, stratifiedContexts);
double QbyD = 10.0 * variation.getNegLog10PError() / (double)variationQualByDepth(ref.getBase(), genotypes, stratifiedContexts);
return String.format("%.2f", QbyD);
}
public String getKeyName() { return "QD"; }
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(getKeyName(), 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Genotype Quality by Depth"); }
private int variationQualByDepth(char ref, final List<Genotype> genotypes, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
int depth = 0;
for ( Genotype genotype : genotypes ) {
if ( !(genotype instanceof SampleBacked) )
continue;
// we care only about variant calls
if ( !genotype.isVariant(ref) )
continue;
String sample = ((SampleBacked)genotype).getSampleName();
StratifiedAlignmentContext context = stratifiedContexts.get(sample);
if ( context != null )
depth += context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size();
}
return depth;
}
private double genotypeQualByDepth(char ref, final List<Genotype> genotypes, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
ArrayList<Double> qualsByDepth = new ArrayList<Double>();
for ( Genotype genotype : genotypes ) {
if ( !(genotype instanceof SampleBacked) )
continue;
// we care only about variant calls
if ( !genotype.isVariant(ref) )
continue;
String sample = ((SampleBacked)genotype).getSampleName();
StratifiedAlignmentContext context = stratifiedContexts.get(sample);
if ( context == null )
continue;
qualsByDepth.add(genotype.getNegLog10PError() / context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size());
}
double mean = 0.0;
for ( Double value : qualsByDepth )
mean += value;
mean /= qualsByDepth.size();
return mean;
}
}

View File

@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("da1e19a969cd79779ba401062d2fcf78"));
Arrays.asList("b0012ffce509196885bfdf1612086263"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("592e6eb532fa7b0250bfa410b62b1ae4"));
Arrays.asList("7e00e94a046e82ac41f6fd298a64cc74"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("0413be573f447450222a48223dd10095"));
Arrays.asList("a6630eec84161a089dde204e09744c45"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("0f0ef13834e2ef00fa2ea53ad82450b4"));
Arrays.asList("130658564083f650981b28728b813917"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}

View File

@ -22,7 +22,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("b2e7854a474fbd24add057673ba469f1"));
Arrays.asList("caeb030b47503e9d79cf1e18b86e8bc9"));
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
}
@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("c1ea2a36a7dacb278d82be9963dc6053"));
Arrays.asList("f87c182b694a7baeab886d8f75c91e28"));
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
}
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testPooled1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
Arrays.asList("c2364a23fcca9d08c7424e7a68c3ba04"));
Arrays.asList("acf8006174a460247fabbc650802c29b"));
executeTest("testPooled1", spec);
}
@ -56,7 +56,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("293ea49e8c6854aa04099a48293a2c65"));
Arrays.asList("92b32599938bc60d6d636b425c5e0a6c"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -64,7 +64,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("44362bd78c9559c194149fa0be20004b"));
Arrays.asList("cc90c3dd5d30dd4ac0fceb748283ddb9"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@ -72,7 +72,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("9013c944a894d1d941167c2d1d57f0f9"));
Arrays.asList("d04c02cdbf1e1adbdf84540c861a64f7"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}
@ -85,11 +85,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-genotype", "9f7886f0f63de56ebcd187c22725b1f7" );
e.put( "-all_bases", "17f08368b48ebb5e18063da47f689e4e" );
e.put( "--min_base_quality_score 10", "cdac609651438aa8e74cb859aa04e2f0" );
e.put( "--min_mapping_quality_score 10", "ee1afd51eda0d005e5b3e908af63eb27" );
e.put( "--max_mismatches_in_40bp_window 5", "b9bec75e1aa4c3f45e0a88ee74b41321" );
e.put( "-genotype", "ce888ec4bdd85e24b1129198327ff315" );
e.put( "-all_bases", "95be4777a8f89f98d0a033fc3d75a3c1" );
e.put( "--min_base_quality_score 10", "390802b120cba6019af39d4775f504d1" );
e.put( "--min_mapping_quality_score 10", "f7fe79dace81157bc83c8e4d27e1ae40" );
e.put( "--max_mismatches_in_40bp_window 5", "0e7741a7a683a6d4d00876372bb70991" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -103,7 +103,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1,
Arrays.asList("022cf14445f7cd6beb04957c8fea9ae5"));
Arrays.asList("c8c4a463ab23585d8373f3e8a7fbec22"));
executeTest("testConfidence", spec);
}