diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java index 91639f7d6..3fa89d85e 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java @@ -126,6 +126,16 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, return mCurrentRecord.isSNP(); } + /** + * are we a novel site? Is there a DBSNP identifier + * or a hapmap entry for the site? + */ + + public boolean isNovel() { + assertNotNull(); + return mCurrentRecord.isNovel(); + } + /** * are we an insertion? * diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 056136b1f..4aeaab0cd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -21,6 +21,7 @@ import java.io.*; //@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=VariationRod.class)) @Allows(value={DataSource.READS, DataSource.REFERENCE}) @Reference(window=@Window(start=-20,stop=20)) +@By(DataSource.REFERENCE) public class VariantAnnotator extends LocusWalker { @Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true) protected File VCF_OUT; @@ -44,6 +45,10 @@ public class VariantAnnotator extends LocusWalker { // should we annotate dbsnp? private boolean annotateDbsnp = false; + // how about hapmap2? + private boolean annotateHapmap2 = false; + // how about hapmap3? + private boolean annotateHapmap3 = false; // mapping from class name to class private static HashMap allAnnotations = null; @@ -125,7 +130,12 @@ public class VariantAnnotator extends LocusWalker { ReferenceOrderedData rod = source.getReferenceOrderedData(); if ( rod.getType().equals(rodDbSNP.class) ) { annotateDbsnp = true; - break; + } + if ( rod.getName().equals("hapmap2") ) { + annotateHapmap2 = true; + } + if ( rod.getName().equals("hapmap3") ) { + annotateHapmap3 = true; } } @@ -137,6 +147,10 @@ public class VariantAnnotator extends LocusWalker { hInfo.addAll(getVCFAnnotationDescriptions(requestedAnnotations)); if ( annotateDbsnp ) hInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP membership")); + if ( annotateHapmap2 ) + hInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP2_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Hapmap 2 membership")); + if ( annotateHapmap3 ) + hInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP3_KEY,1,VCFInfoHeaderLine.INFO_TYPE.Integer, "Hapmap 3 membership")); vcfWriter = new VCFWriter(VCF_OUT); vcfHeader = new VCFHeader(hInfo, samples); @@ -182,7 +196,7 @@ public class VariantAnnotator extends LocusWalker { if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) { Map stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup()); if ( stratifiedContexts != null ) - annotations = getAnnotations(tracker, ref, stratifiedContexts, variant, requestedAnnotations, annotateDbsnp); + annotations = getAnnotations(tracker, ref, stratifiedContexts, variant, requestedAnnotations, annotateDbsnp, annotateHapmap2, annotateHapmap3); } writeVCF(tracker, ref, context, variant, annotations); @@ -224,21 +238,21 @@ public class VariantAnnotator extends LocusWalker { } // option #1: don't specify annotations to be used: standard annotations are used by default - public static Map getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, Variation variation, boolean annotateDbsnp) { + public static Map getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, Variation variation, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) { if ( standardAnnotations == null ) determineAllAnnotations(); - return getAnnotations(tracker, ref, stratifiedContexts, variation, standardAnnotations.values(), annotateDbsnp); + return getAnnotations(tracker, ref, stratifiedContexts, variation, standardAnnotations.values(), annotateDbsnp, annotateHapmap2, annotateHapmap3); } // option #2: specify that all possible annotations be used - public static Map getAllAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, Variation variation, boolean annotateDbsnp) { + public static Map getAllAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, Variation variation, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) { if ( allAnnotations == null ) determineAllAnnotations(); - return getAnnotations(tracker, ref, stratifiedContexts, variation, allAnnotations.values(), annotateDbsnp); + return getAnnotations(tracker, ref, stratifiedContexts, variation, allAnnotations.values(), annotateDbsnp, annotateHapmap2, annotateHapmap3); } // option #3: specify the exact annotations to be used - public static Map getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, Variation variation, Collection annotations, boolean annotateDbsnp) { + public static Map getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, Variation variation, Collection annotations, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) { HashMap results = new HashMap(); @@ -248,6 +262,16 @@ public class VariantAnnotator extends LocusWalker { results.put(VCFRecord.DBSNP_KEY, dbsnp == null ? "0" : "1"); } + if ( annotateHapmap2 ) { + RODRecordList hapmap2 = tracker.getTrackData("hapmap2",null); + results.put(VCFRecord.HAPMAP2_KEY, hapmap2 == null? "0" : "1"); + } + + if ( annotateHapmap3 ) { + RODRecordList hapmap3 = tracker.getTrackData("hapmap3",null); + results.put( VCFRecord.HAPMAP3_KEY, hapmap3 == null ? "0" : "1"); + } + for ( VariantAnnotation annotator : annotations) { String annot = annotator.annotate(tracker, ref, stratifiedContexts, variation); if ( annot != null ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 0a6a3b861..1dbc28f3f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -72,6 +72,10 @@ public class UnifiedGenotyper extends LocusWalker annotations; if ( UAC.ALL_ANNOTATIONS ) - annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp); + annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3); else - annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp); + annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3); ((ArbitraryFieldsBacked)call.variation).setFields(annotations); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java index dbe753db4..ea48defbe 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java @@ -68,15 +68,27 @@ public class AlleleBalanceHistogramWalker extends LocusWalker private HashMap getAlleleBalanceBySample(VCFRecord vcf, ReferenceContext ref, AlignmentContext context) { Map sampleContext = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup(),null,null); HashMap balances = new HashMap(); + System.out.println("----- "+ref.getLocus()+" -----"); + int returnedBalances = 0; for ( String sample : vcf.getSampleNames() ) { - balances.put(sample, getAlleleBalance(ref,sampleContext.get(sample),vcf.getGenotype(sample))); + Double balance = getAlleleBalance(ref,sampleContext.get(sample),vcf.getAlternativeBaseForSNP()); + balances.put(sample, balance); + if ( balance != null ) { + returnedBalances++; + System.out.println(sample+"\t"+getCoverage(sampleContext.get(sample))); + } } return balances; } - private Double getAlleleBalance(ReferenceContext ref, StratifiedAlignmentContext context, VCFGenotypeRecord vcf) { + private long getCoverage(StratifiedAlignmentContext context) { + return context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size(); + } + + private Double getAlleleBalance(ReferenceContext ref, StratifiedAlignmentContext context, char snpBase) { if ( context == null ) { + //System.out.println("Stratified context was null"); return null; } @@ -85,13 +97,14 @@ public class AlleleBalanceHistogramWalker extends LocusWalker AlignmentContext alicon = context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE); if ( alicon == null ) { + System.out.println("Alignment context from stratified was null"); return null; } for ( PileupElement e : alicon.getBasePileup() ) { if ( BaseUtils.basesAreEqual( e.getBase(), (byte) ref.getBase() ) ) { refBases++; - } else if ( BaseUtils.basesAreEqual(e.getBase(), (byte) vcf.toVariation(ref.getBase()).getAlternativeBaseForSNP() ) ) { + } else if ( BaseUtils.basesAreEqual(e.getBase(), (byte) snpBase ) ) { altBases++; } } @@ -99,6 +112,7 @@ public class AlleleBalanceHistogramWalker extends LocusWalker if ( refBases > 0 || altBases > 0) { return ( ( double ) altBases ) / ( ( double ) altBases + ( double ) refBases ); } else { + System.out.println("No ref or alt bases in pileup"); return null; } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java index 4a7478cb1..04b62e566 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java @@ -26,8 +26,11 @@ import org.broadinstitute.sting.utils.cmdLine.Argument; */ @Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= RodVCF.class),@RMD(name="variants",type= RodVCF.class)}) public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInfo, MultiSampleConcordanceSet > { -@Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+ - "DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1; + @Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+ + "DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1; + @Argument(fullName = "ignoreKnownSites", shortName = "novel", doc="Only run concordance over novel sites (sites marked in the VCF as being in dbSNP or Hapmap 2 or 3)", required=false ) + boolean ignoreKnownSites = false; + public void initialize() { } @@ -37,7 +40,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf } public LocusConcordanceInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext c) { - if ( tracker == null ) { + if ( tracker == null || ( ignoreKnownSites && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) ) { return null; } ReferenceOrderedDatum truthData = tracker.lookup("truth", null); @@ -94,7 +97,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf } public void onTraversalDone(MultiSampleConcordanceSet cSet) { - out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Vars","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call"); + out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Homs","Concordant_Hets","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call"); for ( VCFConcordanceCalculator sample : cSet.getConcordanceSet() ) { out.print(String.format("%s%n",sample)); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/VCFConcordanceCalculator.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/VCFConcordanceCalculator.java index fa12d950f..d5cad1c04 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/VCFConcordanceCalculator.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/multisample/VCFConcordanceCalculator.java @@ -21,7 +21,8 @@ class VCFConcordanceCalculator { private Set falseNegativeLociDueToNoCall; private Set hetsCalledHoms; private Set homsCalledHets; - private Set concordantCalls; + private Set concordantHomCalls; + private Set concordantHetCalls; private Set concordantGenotypeReferenceCalls; private Set chipNoCalls; private Set ignoredDueToDepth; @@ -33,7 +34,8 @@ class VCFConcordanceCalculator { falsePositiveLoci = new HashSet(); hetsCalledHoms = new HashSet(); homsCalledHets = new HashSet(); - concordantCalls = new HashSet(); + concordantHomCalls = new HashSet(); + concordantHetCalls = new HashSet(); concordantGenotypeReferenceCalls = new HashSet(); chipNoCalls = new HashSet(); ignoredDueToDepth = new HashSet(); @@ -45,7 +47,7 @@ class VCFConcordanceCalculator { } public String toString() { - return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),concordantGenotypeReferenceCalls.size(),concordantCalls.size(),homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),falseNegativeLociDueToNoCall.size()); + return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),concordantGenotypeReferenceCalls.size(),concordantHomCalls.size(),concordantHetCalls.size(),homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),falseNegativeLociDueToNoCall.size()); } private void compareGenotypes(VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc, byte ref) { @@ -80,8 +82,11 @@ class VCFConcordanceCalculator { hetsCalledHoms.add(loc); } else if ( truth.isHom() && call.isHet() ) { homsCalledHets.add(loc); - } else if ( ( truth.isHet() && call.isHet() ) || ( truth.isHom() && call.isHom() ) ) { // be extra careful - concordantCalls.add(loc); + } else if ( ( truth.isHet() && call.isHet() ) ) { + concordantHetCalls.add(loc); + } else if ( truth.isHom() && call.isHom() ) { // be extra careful + concordantHomCalls.add(loc); } + } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index 439e2a257..0aa5ae2bc 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -20,6 +20,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { public static final String DBSNP_KEY = "DB"; public static final String DEPTH_KEY = "DP"; public static final String HAPMAP2_KEY = "H2"; + public static final String HAPMAP3_KEY = "H3"; public static final String RMS_MAPPING_QUALITY_KEY = "MQ"; public static final String SAMPLE_NUMBER_KEY = "NS"; public static final String STRAND_BIAS_KEY = "SB"; @@ -323,6 +324,10 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { return getType() == VARIANT_TYPE.SNP; } + public boolean isNovel() { + return ( mID != null || mInfoFields.get(HAPMAP2_KEY) != null || mInfoFields.get(HAPMAP3_KEY) != null || mInfoFields.get(DBSNP_KEY) != null); + } + public char getAlternativeBaseForSNP() { if ( !isSNP() && !isBiallelic() ) throw new IllegalStateException("This record does not represent a SNP");