- All annotations are now required to return their VCF INFO keys and descriptions
- Renamed keys to fit with the standard naming - FisherStrand is no longer standard - Integration tests no longer test experimental annotations since they're not stable git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2216 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
67179e2412
commit
7055a3ea2d
|
|
@ -40,9 +40,13 @@ public class AlleleBalance extends StandardVariantAnnotation {
|
|||
ratio = computeSingleBalance(ref.getBase(), genotypeStr, bases);
|
||||
}
|
||||
|
||||
return new Pair<String, String>("AlleleBalance", String.format("%.2f", ratio));
|
||||
return new Pair<String, String>(getKeyName(), String.format("%.2f", ratio));
|
||||
}
|
||||
|
||||
public String getKeyName() { return "AB"; }
|
||||
|
||||
public String getDescription() { return "AB,1,Float,\"Allele Balance (ref/(ref+alt))\""; }
|
||||
|
||||
private double computeSingleBalance(char ref, final String genotypeStr, final String bases) {
|
||||
|
||||
char a = genotypeStr.charAt(0);
|
||||
|
|
|
|||
|
|
@ -13,8 +13,12 @@ public class DepthOfCoverage extends StandardVariantAnnotation {
|
|||
|
||||
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
|
||||
int depth = pileup.getReads().size();
|
||||
return new Pair<String, String>("DoC", String.format("%d", depth));
|
||||
return new Pair<String, String>(getKeyName(), String.format("%d", depth));
|
||||
}
|
||||
|
||||
public String getKeyName() { return "DP"; }
|
||||
|
||||
public String getDescription() { return "DP,1,Integer,\"Total Depth\""; }
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import cern.jet.math.Arithmetic;
|
|||
import java.util.List;
|
||||
|
||||
|
||||
public class FisherStrand extends StandardVariantAnnotation {
|
||||
public class FisherStrand implements VariantAnnotation {
|
||||
|
||||
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
|
||||
|
||||
|
|
@ -33,9 +33,13 @@ public class FisherStrand extends StandardVariantAnnotation {
|
|||
return null;
|
||||
|
||||
// use Math.abs to prevent -0's
|
||||
return new Pair<String, String>("FisherStrand", String.format("%.1f", Math.abs(10.0 * Math.log10(pvalue))));
|
||||
return new Pair<String, String>(getKeyName(), String.format("%.1f", Math.abs(10.0 * Math.log10(pvalue))));
|
||||
}
|
||||
|
||||
public String getKeyName() { return "FisherStrand"; }
|
||||
|
||||
public String getDescription() { return "FisherStrand,1,Float,\"Phred-scaled p-value Using Fisher's Exact Test to Detect Strand Bias\""; }
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
|
||||
private Double strandTest(ReadBackedPileup pileup, int allele1, int allele2) {
|
||||
|
|
|
|||
|
|
@ -18,9 +18,13 @@ public class HomopolymerRun extends StandardVariantAnnotation {
|
|||
return null;
|
||||
|
||||
int run = computeHomopolymerRun(variation.getAlternativeBaseForSNP(), ref);
|
||||
return new Pair<String, String>("HomopolymerRun", String.format("%d", run));
|
||||
return new Pair<String, String>(getKeyName(), String.format("%d", run));
|
||||
}
|
||||
|
||||
public String getKeyName() { return "HRun"; }
|
||||
|
||||
public String getDescription() { return "HRun,1,Integer,\"Largest Contiguous Homopolymer Run of Variant Allele In Either Direction\""; }
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
|
||||
private static int computeHomopolymerRun(char altAllele, ReferenceContext ref) {
|
||||
|
|
|
|||
|
|
@ -19,8 +19,12 @@ public class MappingQualityZero extends StandardVariantAnnotation {
|
|||
if ( reads.get(i).getMappingQuality() == 0 )
|
||||
MQ0Count++;
|
||||
}
|
||||
return new Pair<String, String>("MAPQ0", String.format("%d", MQ0Count));
|
||||
return new Pair<String, String>(getKeyName(), String.format("%d", MQ0Count));
|
||||
}
|
||||
|
||||
public String getKeyName() { return "MQ0"; }
|
||||
|
||||
public String getDescription() { return "MQ0,1,Integer,\"Total Mapping Quality Zero Reads\""; }
|
||||
|
||||
public boolean useZeroQualityReads() { return true; }
|
||||
}
|
||||
|
|
@ -51,6 +51,10 @@ public class PrimaryBaseSecondaryBaseSymmetry implements VariantAnnotation{
|
|||
}
|
||||
}
|
||||
|
||||
public String getKeyName() { return KEY_NAME; }
|
||||
|
||||
public String getDescription() { return KEY_NAME + ",1,Float,\"Primary Vs. Secondary Base Symmetry\""; }
|
||||
|
||||
private Pair<Integer,Double> getProportionOfReferenceSecondBasesThatSupportAlt( ReferenceContext ref, ReadBackedPileup p, byte snp ) {
|
||||
int depth = 0;
|
||||
int support = 0;
|
||||
|
|
|
|||
|
|
@ -19,8 +19,12 @@ public class RMSMappingQuality extends StandardVariantAnnotation {
|
|||
for (int i=0; i < reads.size(); i++)
|
||||
qualities[i] = reads.get(i).getMappingQuality();
|
||||
double rms = MathUtils.rms(qualities);
|
||||
return new Pair<String, String>("RMSMAPQ", String.format("%.2f", rms));
|
||||
return new Pair<String, String>(getKeyName(), String.format("%.2f", rms));
|
||||
}
|
||||
|
||||
public String getKeyName() { return "MQ"; }
|
||||
|
||||
public String getDescription() { return "MQ,1,Float,\"RMS Mapping Quality\""; }
|
||||
|
||||
public boolean useZeroQualityReads() { return true; }
|
||||
}
|
||||
|
|
@ -40,9 +40,13 @@ public class RankSumTest implements VariantAnnotation {
|
|||
if ( MathUtils.compareDoubles(pvalue, 0.0) == 0 )
|
||||
return null;
|
||||
|
||||
return new Pair<String, String>("RankSum", String.format("%.1f", -10.0 * Math.log10(pvalue)));
|
||||
return new Pair<String, String>(getKeyName(), String.format("%.1f", -10.0 * Math.log10(pvalue)));
|
||||
}
|
||||
|
||||
public String getKeyName() { return "RankSum"; }
|
||||
|
||||
public String getDescription() { return "RankSum,1,Float,\"Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Base Qualities\""; }
|
||||
|
||||
private void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals) {
|
||||
for ( PileupElement p : pileup ) {
|
||||
// ignore deletions
|
||||
|
|
|
|||
|
|
@ -38,6 +38,10 @@ public class ResidualQuality implements VariantAnnotation{
|
|||
return new Pair<String,String>(KEY_NAME, String.format("%f", logResidQual ));
|
||||
}
|
||||
|
||||
public String getKeyName() { return KEY_NAME; }
|
||||
|
||||
public String getDescription() { return KEY_NAME + ",1,Float,\"Log-scaled Residual Error\""; }
|
||||
|
||||
private Double getLogResidualQuality( ReadBackedPileup p, char ref, char snp ) {
|
||||
byte[] pbp = p.getBases();
|
||||
byte[] quals = p.getQuals();
|
||||
|
|
|
|||
|
|
@ -26,6 +26,10 @@ public class SecondBaseSkew implements VariantAnnotation {
|
|||
|
||||
public boolean useZeroQualityReads() { return USE_ZERO_QUALITY_READS; }
|
||||
|
||||
public String getKeyName() { return KEY_NAME; }
|
||||
|
||||
public String getDescription() { return KEY_NAME + ",1,Float,\"Chi-square Secondary Base Skew\""; }
|
||||
|
||||
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
|
||||
if ( variation.isSNP() && variation.isBiallelic() ) {
|
||||
char snp = variation.getAlternativeBaseForSNP();
|
||||
|
|
|
|||
|
|
@ -13,8 +13,12 @@ public class SpanningDeletions extends StandardVariantAnnotation {
|
|||
|
||||
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
|
||||
int deletions = pileup.getNumberOfDeletions();
|
||||
return new Pair<String, String>("SpanningDeletionFraction", String.format("%.2f", (double)deletions/(double)pileup.size()));
|
||||
return new Pair<String, String>(getKeyName(), String.format("%.2f", (double)deletions/(double)pileup.size()));
|
||||
}
|
||||
|
||||
public String getKeyName() { return "Dels"; }
|
||||
|
||||
public String getDescription() { return "Dels,1,Float,\"Fraction of Reads Containing Spanning Deletions\""; }
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
}
|
||||
|
|
@ -12,5 +12,7 @@ public interface VariantAnnotation {
|
|||
|
||||
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes);
|
||||
public boolean useZeroQualityReads();
|
||||
public String getKeyName();
|
||||
public String getDescription();
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
protected String sampleName = null;
|
||||
@Argument(fullName="annotations", shortName="A", doc="Annotation types to apply to variant calls", required=false)
|
||||
protected String[] ANNOTATIONS;
|
||||
@Argument(fullName="useAllAnnotations", shortName="all", doc="Use all possible annotations", required=false)
|
||||
@Argument(fullName="includeExperimentalAnnotations", shortName="exp", doc="Use all possible annotations, including experimental ones", required=false)
|
||||
protected Boolean USE_ALL_ANNOTATIONS = false;
|
||||
@Argument(fullName="useStandardAnnotations", shortName="standard", doc="Use all standard annotations", required=false)
|
||||
protected Boolean USE_STANDARD_ANNOTATIONS = false;
|
||||
|
|
|
|||
|
|
@ -65,16 +65,16 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("aa4fd832eab85123b97c7961c9c8402a"));
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("9a30487ad885f4d49569032fe6463af3"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("fcae8ca66020be99cc01cc699f489462"));
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("ef01d735ea0bcfeb6e7394c65f2a1938"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -97,16 +97,16 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("ad3945cab44444d3b1c0bd35307814f7"));
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("7e6ceb79e9a1f104723299ed68b236c6"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("c31832ce6abf496180ebb64c1916b2e8"));
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("87748d4c80ff76701dd01d7b0f803249"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1PointEM() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("b14783b877e4857366cf9b1f516de343"));
|
||||
Arrays.asList("e401eb288c167b72f2fb3d0b3f9c22da"));
|
||||
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2PointEM() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("72bcbd0786bb94803d1ad867becf12e2"));
|
||||
Arrays.asList("ce9f37df3275ed4e7abaedf33d982889"));
|
||||
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testPooled1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
|
||||
Arrays.asList("48f17694db2fc8744741e4eb73227e5b"));
|
||||
Arrays.asList("1905bc65b1abb56c776558d562de5ea1"));
|
||||
executeTest("testPooled1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("e72379c9654db9cf6ee17e7b9126f23e"));
|
||||
Arrays.asList("aaecb9fe822a42576500a91973baff03"));
|
||||
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -89,7 +89,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("577ad8a1b1d16efc68e833a6d8e5b638"));
|
||||
Arrays.asList("d36a8ba5ddf1265ab5be2ed390fa56e1"));
|
||||
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -97,7 +97,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("b4a18843fc6fa76a878c39dfde1cedda"));
|
||||
Arrays.asList("029706a60440660c6a636091e9489122"));
|
||||
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@
|
|||
<!-- And the annotations -->
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.DepthOfCoverage</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.FisherStrand</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.HomopolymerRun</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.MappingQualityZero</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.RMSMappingQuality</class>
|
||||
|
|
|
|||
Loading…
Reference in New Issue