From 8de6a8d24662d7c2bb37a95c21d7229e60df7da6 Mon Sep 17 00:00:00 2001 From: chartl Date: Thu, 28 Jan 2010 21:06:56 +0000 Subject: [PATCH] Lots of changes; all to do something relatively minor. 1) Changed VCF/RodVCF to allow for inquiries to whether or not the site is novel; isNovel() looks at the ID field, and those members of the info field that indicate membership in dbsnp, hapmap2, or hapmap3; and if none can be found, returns true. 2) Changed VariantAnnotator to annotate hapmap2 and hapmap3, if you bind rods to it with those names. Works in the same way as DBSNP does -- if you give it a rod named "hapmap2" it'll annotate membership in it. -- Passes integration tests 3) Changed UnifiedGenotyper to do the same thing (since it uses Annotations as a subroutine) -- Passes integration tests 4) Changed MultiSampleConcordanceWalker to take a flag --ignoreKnownSites (or -novels) to examine concordance only on sites that are not marked as in dbSNP or in Hapmap in the variant VCF 5) Changed VCFConcordanceCalculator (the object MultiSampleConcordanceWalker runs on) to output Concordant_Het_Calls and Concordant_Hom_Calls separately, rather than combined as Concordant_Calls 6) AlleleBalanceHistogramWalker -- I don't know what i did to this thing. I've been jerry rigging System.outs to do stuff it was never really intended to do; so there's probably some dumb System.out.print("HI I AM AT LOCUS:"+loc) stuck somewhere. It compiles at any rate. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2724 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/RodVCF.java | 10 +++++ .../walkers/annotator/VariantAnnotator.java | 38 +++++++++++++++---- .../walkers/genotyper/UnifiedGenotyper.java | 15 ++++++-- .../walkers/AlleleBalanceHistogramWalker.java | 20 ++++++++-- .../MultiSampleConcordanceWalker.java | 11 ++++-- .../multisample/VCFConcordanceCalculator.java | 15 +++++--- .../sting/utils/genotype/vcf/VCFRecord.java | 5 +++ 7 files changed, 92 insertions(+), 22 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java index 91639f7d6..3fa89d85e 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java @@ -126,6 +126,16 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, return mCurrentRecord.isSNP(); } + /** + * are we a novel site? Is there a DBSNP identifier + * or a hapmap entry for the site? + */ + + public boolean isNovel() { + assertNotNull(); + return mCurrentRecord.isNovel(); + } + /** * are we an insertion? * diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 056136b1f..4aeaab0cd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -21,6 +21,7 @@ import java.io.*; //@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=VariationRod.class)) @Allows(value={DataSource.READS, DataSource.REFERENCE}) @Reference(window=@Window(start=-20,stop=20)) +@By(DataSource.REFERENCE) public class VariantAnnotator extends LocusWalker { @Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true) protected File VCF_OUT; @@ -44,6 +45,10 @@ public class VariantAnnotator extends LocusWalker { // should we annotate dbsnp? private boolean annotateDbsnp = false; + // how about hapmap2? + private boolean annotateHapmap2 = false; + // how about hapmap3? + private boolean annotateHapmap3 = false; // mapping from class name to class private static HashMap allAnnotations = null; @@ -125,7 +130,12 @@ public class VariantAnnotator extends LocusWalker { ReferenceOrderedData rod = source.getReferenceOrderedData(); if ( rod.getType().equals(rodDbSNP.class) ) { annotateDbsnp = true; - break; + } + if ( rod.getName().equals("hapmap2") ) { + annotateHapmap2 = true; + } + if ( rod.getName().equals("hapmap3") ) { + annotateHapmap3 = true; } } @@ -137,6 +147,10 @@ public class VariantAnnotator extends LocusWalker { hInfo.addAll(getVCFAnnotationDescriptions(requestedAnnotations)); if ( annotateDbsnp ) hInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP membership")); + if ( annotateHapmap2 ) + hInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP2_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Hapmap 2 membership")); + if ( annotateHapmap3 ) + hInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP3_KEY,1,VCFInfoHeaderLine.INFO_TYPE.Integer, "Hapmap 3 membership")); vcfWriter = new VCFWriter(VCF_OUT); vcfHeader = new VCFHeader(hInfo, samples); @@ -182,7 +196,7 @@ public class VariantAnnotator extends LocusWalker { if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) { Map stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup()); if ( stratifiedContexts != null ) - annotations = getAnnotations(tracker, ref, stratifiedContexts, variant, requestedAnnotations, annotateDbsnp); + annotations = getAnnotations(tracker, ref, stratifiedContexts, variant, requestedAnnotations, annotateDbsnp, annotateHapmap2, annotateHapmap3); } writeVCF(tracker, ref, context, variant, annotations); @@ -224,21 +238,21 @@ public class VariantAnnotator extends LocusWalker { } // option #1: don't specify annotations to be used: standard annotations are used by default - public static Map getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, Variation variation, boolean annotateDbsnp) { + public static Map getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, Variation variation, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) { if ( standardAnnotations == null ) determineAllAnnotations(); - return getAnnotations(tracker, ref, stratifiedContexts, variation, standardAnnotations.values(), annotateDbsnp); + return getAnnotations(tracker, ref, stratifiedContexts, variation, standardAnnotations.values(), annotateDbsnp, annotateHapmap2, annotateHapmap3); } // option #2: specify that all possible annotations be used - public static Map getAllAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, Variation variation, boolean annotateDbsnp) { + public static Map getAllAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, Variation variation, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) { if ( allAnnotations == null ) determineAllAnnotations(); - return getAnnotations(tracker, ref, stratifiedContexts, variation, allAnnotations.values(), annotateDbsnp); + return getAnnotations(tracker, ref, stratifiedContexts, variation, allAnnotations.values(), annotateDbsnp, annotateHapmap2, annotateHapmap3); } // option #3: specify the exact annotations to be used - public static Map getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, Variation variation, Collection annotations, boolean annotateDbsnp) { + public static Map getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, Variation variation, Collection annotations, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) { HashMap results = new HashMap(); @@ -248,6 +262,16 @@ public class VariantAnnotator extends LocusWalker { results.put(VCFRecord.DBSNP_KEY, dbsnp == null ? "0" : "1"); } + if ( annotateHapmap2 ) { + RODRecordList hapmap2 = tracker.getTrackData("hapmap2",null); + results.put(VCFRecord.HAPMAP2_KEY, hapmap2 == null? "0" : "1"); + } + + if ( annotateHapmap3 ) { + RODRecordList hapmap3 = tracker.getTrackData("hapmap3",null); + results.put( VCFRecord.HAPMAP3_KEY, hapmap3 == null ? "0" : "1"); + } + for ( VariantAnnotation annotator : annotations) { String annot = annotator.annotate(tracker, ref, stratifiedContexts, variation); if ( annot != null ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 0a6a3b861..1dbc28f3f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -72,6 +72,10 @@ public class UnifiedGenotyper extends LocusWalker annotations; if ( UAC.ALL_ANNOTATIONS ) - annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp); + annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3); else - annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp); + annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3); ((ArbitraryFieldsBacked)call.variation).setFields(annotations); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java index dbe753db4..ea48defbe 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java @@ -68,15 +68,27 @@ public class AlleleBalanceHistogramWalker extends LocusWalker private HashMap getAlleleBalanceBySample(VCFRecord vcf, ReferenceContext ref, AlignmentContext context) { Map sampleContext = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup(),null,null); HashMap balances = new HashMap(); + System.out.println("----- "+ref.getLocus()+" -----"); + int returnedBalances = 0; for ( String sample : vcf.getSampleNames() ) { - balances.put(sample, getAlleleBalance(ref,sampleContext.get(sample),vcf.getGenotype(sample))); + Double balance = getAlleleBalance(ref,sampleContext.get(sample),vcf.getAlternativeBaseForSNP()); + balances.put(sample, balance); + if ( balance != null ) { + returnedBalances++; + System.out.println(sample+"\t"+getCoverage(sampleContext.get(sample))); + } } return balances; } - private Double getAlleleBalance(ReferenceContext ref, StratifiedAlignmentContext context, VCFGenotypeRecord vcf) { + private long getCoverage(StratifiedAlignmentContext context) { + return context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size(); + } + + private Double getAlleleBalance(ReferenceContext ref, StratifiedAlignmentContext context, char snpBase) { if ( context == null ) { + //System.out.println("Stratified context was null"); return null; } @@ -85,13 +97,14 @@ public class AlleleBalanceHistogramWalker extends LocusWalker AlignmentContext alicon = context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE); if ( alicon == null ) { + System.out.println("Alignment context from stratified was null"); return null; } for ( PileupElement e : alicon.getBasePileup() ) { if ( BaseUtils.basesAreEqual( e.getBase(), (byte) ref.getBase() ) ) { refBases++; - } else if ( BaseUtils.basesAreEqual(e.getBase(), (byte) vcf.toVariation(ref.getBase()).getAlternativeBaseForSNP() ) ) { + } else if ( BaseUtils.basesAreEqual(e.getBase(), (byte) snpBase ) ) { altBases++; } } @@ -99,6 +112,7 @@ public class AlleleBalanceHistogramWalker extends LocusWalker if ( refBases > 0 || altBases > 0) { return ( ( double ) altBases ) / ( ( double ) altBases + ( double ) refBases ); } else { + System.out.println("No ref or alt bases in pileup"); return null; } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java index 4a7478cb1..04b62e566 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java @@ -26,8 +26,11 @@ import org.broadinstitute.sting.utils.cmdLine.Argument; */ @Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= RodVCF.class),@RMD(name="variants",type= RodVCF.class)}) public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInfo, MultiSampleConcordanceSet > { -@Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+ - "DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1; + @Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+ + "DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1; + @Argument(fullName = "ignoreKnownSites", shortName = "novel", doc="Only run concordance over novel sites (sites marked in the VCF as being in dbSNP or Hapmap 2 or 3)", required=false ) + boolean ignoreKnownSites = false; + public void initialize() { } @@ -37,7 +40,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf } public LocusConcordanceInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext c) { - if ( tracker == null ) { + if ( tracker == null || ( ignoreKnownSites && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) ) { return null; } ReferenceOrderedDatum truthData = tracker.lookup("truth", null); @@ -94,7 +97,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf } public void onTraversalDone(MultiSampleConcordanceSet cSet) { - out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Vars","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call"); + out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Homs","Concordant_Hets","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call"); for ( VCFConcordanceCalculator sample : cSet.getConcordanceSet() ) { out.print(String.format("%s%n",sample)); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/VCFConcordanceCalculator.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/VCFConcordanceCalculator.java index fa12d950f..d5cad1c04 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/VCFConcordanceCalculator.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/VCFConcordanceCalculator.java @@ -21,7 +21,8 @@ class VCFConcordanceCalculator { private Set falseNegativeLociDueToNoCall; private Set hetsCalledHoms; private Set homsCalledHets; - private Set concordantCalls; + private Set concordantHomCalls; + private Set concordantHetCalls; private Set concordantGenotypeReferenceCalls; private Set chipNoCalls; private Set ignoredDueToDepth; @@ -33,7 +34,8 @@ class VCFConcordanceCalculator { falsePositiveLoci = new HashSet(); hetsCalledHoms = new HashSet(); homsCalledHets = new HashSet(); - concordantCalls = new HashSet(); + concordantHomCalls = new HashSet(); + concordantHetCalls = new HashSet(); concordantGenotypeReferenceCalls = new HashSet(); chipNoCalls = new HashSet(); ignoredDueToDepth = new HashSet(); @@ -45,7 +47,7 @@ class VCFConcordanceCalculator { } public String toString() { - return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),concordantGenotypeReferenceCalls.size(),concordantCalls.size(),homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),falseNegativeLociDueToNoCall.size()); + return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),concordantGenotypeReferenceCalls.size(),concordantHomCalls.size(),concordantHetCalls.size(),homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),falseNegativeLociDueToNoCall.size()); } private void compareGenotypes(VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc, byte ref) { @@ -80,8 +82,11 @@ class VCFConcordanceCalculator { hetsCalledHoms.add(loc); } else if ( truth.isHom() && call.isHet() ) { homsCalledHets.add(loc); - } else if ( ( truth.isHet() && call.isHet() ) || ( truth.isHom() && call.isHom() ) ) { // be extra careful - concordantCalls.add(loc); + } else if ( ( truth.isHet() && call.isHet() ) ) { + concordantHetCalls.add(loc); + } else if ( truth.isHom() && call.isHom() ) { // be extra careful + concordantHomCalls.add(loc); } + } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index 439e2a257..0aa5ae2bc 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -20,6 +20,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { public static final String DBSNP_KEY = "DB"; public static final String DEPTH_KEY = "DP"; public static final String HAPMAP2_KEY = "H2"; + public static final String HAPMAP3_KEY = "H3"; public static final String RMS_MAPPING_QUALITY_KEY = "MQ"; public static final String SAMPLE_NUMBER_KEY = "NS"; public static final String STRAND_BIAS_KEY = "SB"; @@ -323,6 +324,10 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { return getType() == VARIANT_TYPE.SNP; } + public boolean isNovel() { + return ( mID != null || mInfoFields.get(HAPMAP2_KEY) != null || mInfoFields.get(HAPMAP3_KEY) != null || mInfoFields.get(DBSNP_KEY) != null); + } + public char getAlternativeBaseForSNP() { if ( !isSNP() && !isBiallelic() ) throw new IllegalStateException("This record does not represent a SNP");