Lots of changes; all to do something relatively minor.

1) Changed VCF/RodVCF to allow for inquiries to whether or not the site is novel; isNovel() looks at the ID field, and those members of the info field that indicate membership in dbsnp, hapmap2, or hapmap3; and if none can be found, returns true.

2) Changed VariantAnnotator to annotate hapmap2 and hapmap3, if you bind rods to it with those names. Works in the same way as DBSNP does -- if you give it a rod named "hapmap2" it'll annotate membership in it. -- Passes integration tests

3) Changed UnifiedGenotyper to do the same thing (since it uses Annotations as a subroutine) -- Passes integration tests

4) Changed MultiSampleConcordanceWalker to take a flag --ignoreKnownSites (or -novels) to examine concordance only on sites that are not marked as in dbSNP or in Hapmap in the variant VCF

5) Changed VCFConcordanceCalculator (the object MultiSampleConcordanceWalker runs on) to output Concordant_Het_Calls and Concordant_Hom_Calls separately, rather than combined as Concordant_Calls

6) AlleleBalanceHistogramWalker -- I don't know what i did to this thing. I've been jerry rigging System.outs to do stuff it was never really intended to do; so there's probably some dumb System.out.print("HI I AM AT LOCUS:"+loc) stuck somewhere. It compiles at any rate.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2724 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-01-28 21:06:56 +00:00
parent 6f11fe442a
commit 8de6a8d246
7 changed files with 92 additions and 22 deletions

View File

@ -126,6 +126,16 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
return mCurrentRecord.isSNP();
}
/**
* are we a novel site? Is there a DBSNP identifier
* or a hapmap entry for the site?
*/
public boolean isNovel() {
assertNotNull();
return mCurrentRecord.isNovel();
}
/**
* are we an insertion?
*

View File

@ -21,6 +21,7 @@ import java.io.*;
//@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=VariationRod.class))
@Allows(value={DataSource.READS, DataSource.REFERENCE})
@Reference(window=@Window(start=-20,stop=20))
@By(DataSource.REFERENCE)
public class VariantAnnotator extends LocusWalker<Integer, Integer> {
@Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true)
protected File VCF_OUT;
@ -44,6 +45,10 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
// should we annotate dbsnp?
private boolean annotateDbsnp = false;
// how about hapmap2?
private boolean annotateHapmap2 = false;
// how about hapmap3?
private boolean annotateHapmap3 = false;
// mapping from class name to class
private static HashMap<String, VariantAnnotation> allAnnotations = null;
@ -125,7 +130,12 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
ReferenceOrderedData rod = source.getReferenceOrderedData();
if ( rod.getType().equals(rodDbSNP.class) ) {
annotateDbsnp = true;
break;
}
if ( rod.getName().equals("hapmap2") ) {
annotateHapmap2 = true;
}
if ( rod.getName().equals("hapmap3") ) {
annotateHapmap3 = true;
}
}
@ -137,6 +147,10 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
hInfo.addAll(getVCFAnnotationDescriptions(requestedAnnotations));
if ( annotateDbsnp )
hInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP membership"));
if ( annotateHapmap2 )
hInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP2_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Hapmap 2 membership"));
if ( annotateHapmap3 )
hInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP3_KEY,1,VCFInfoHeaderLine.INFO_TYPE.Integer, "Hapmap 3 membership"));
vcfWriter = new VCFWriter(VCF_OUT);
vcfHeader = new VCFHeader(hInfo, samples);
@ -182,7 +196,7 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
if ( stratifiedContexts != null )
annotations = getAnnotations(tracker, ref, stratifiedContexts, variant, requestedAnnotations, annotateDbsnp);
annotations = getAnnotations(tracker, ref, stratifiedContexts, variant, requestedAnnotations, annotateDbsnp, annotateHapmap2, annotateHapmap3);
}
writeVCF(tracker, ref, context, variant, annotations);
@ -224,21 +238,21 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
}
// option #1: don't specify annotations to be used: standard annotations are used by default
public static Map<String, String> getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, boolean annotateDbsnp) {
public static Map<String, String> getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) {
if ( standardAnnotations == null )
determineAllAnnotations();
return getAnnotations(tracker, ref, stratifiedContexts, variation, standardAnnotations.values(), annotateDbsnp);
return getAnnotations(tracker, ref, stratifiedContexts, variation, standardAnnotations.values(), annotateDbsnp, annotateHapmap2, annotateHapmap3);
}
// option #2: specify that all possible annotations be used
public static Map<String, String> getAllAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, boolean annotateDbsnp) {
public static Map<String, String> getAllAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) {
if ( allAnnotations == null )
determineAllAnnotations();
return getAnnotations(tracker, ref, stratifiedContexts, variation, allAnnotations.values(), annotateDbsnp);
return getAnnotations(tracker, ref, stratifiedContexts, variation, allAnnotations.values(), annotateDbsnp, annotateHapmap2, annotateHapmap3);
}
// option #3: specify the exact annotations to be used
public static Map<String, String> getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, Collection<VariantAnnotation> annotations, boolean annotateDbsnp) {
public static Map<String, String> getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, Collection<VariantAnnotation> annotations, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) {
HashMap<String, String> results = new HashMap<String, String>();
@ -248,6 +262,16 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
results.put(VCFRecord.DBSNP_KEY, dbsnp == null ? "0" : "1");
}
if ( annotateHapmap2 ) {
RODRecordList hapmap2 = tracker.getTrackData("hapmap2",null);
results.put(VCFRecord.HAPMAP2_KEY, hapmap2 == null? "0" : "1");
}
if ( annotateHapmap3 ) {
RODRecordList hapmap3 = tracker.getTrackData("hapmap3",null);
results.put( VCFRecord.HAPMAP3_KEY, hapmap3 == null ? "0" : "1");
}
for ( VariantAnnotation annotator : annotations) {
String annot = annotator.annotate(tracker, ref, stratifiedContexts, variation);
if ( annot != null ) {

View File

@ -72,6 +72,10 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
// should we annotate dbsnp?
private boolean annotateDbsnp = false;
// how about hapmap2?
private boolean annotateHapmap2 = false;
// how about hapmap3?
private boolean annotateHapmap3 = false;
// Enable deletions in the pileup
public boolean includeReadsWithDeletionAtLoci() { return true; }
@ -175,7 +179,12 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
ReferenceOrderedData rod = source.getReferenceOrderedData();
if ( rod.getType().equals(rodDbSNP.class) ) {
annotateDbsnp = true;
break;
}
if ( rod.getName().equals("hapmap2") ) {
annotateHapmap2 = true;
}
if ( rod.getName().equals("hapmap3") ) {
annotateHapmap3 = true;
}
}
@ -298,9 +307,9 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
Map<String, String> annotations;
if ( UAC.ALL_ANNOTATIONS )
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp);
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3);
else
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp);
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3);
((ArbitraryFieldsBacked)call.variation).setFields(annotations);
}

View File

@ -68,15 +68,27 @@ public class AlleleBalanceHistogramWalker extends LocusWalker<Map<String,Double>
private HashMap<String,Double> getAlleleBalanceBySample(VCFRecord vcf, ReferenceContext ref, AlignmentContext context) {
Map<String, StratifiedAlignmentContext> sampleContext = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup(),null,null);
HashMap<String,Double> balances = new HashMap<String,Double>();
System.out.println("----- "+ref.getLocus()+" -----");
int returnedBalances = 0;
for ( String sample : vcf.getSampleNames() ) {
balances.put(sample, getAlleleBalance(ref,sampleContext.get(sample),vcf.getGenotype(sample)));
Double balance = getAlleleBalance(ref,sampleContext.get(sample),vcf.getAlternativeBaseForSNP());
balances.put(sample, balance);
if ( balance != null ) {
returnedBalances++;
System.out.println(sample+"\t"+getCoverage(sampleContext.get(sample)));
}
}
return balances;
}
private Double getAlleleBalance(ReferenceContext ref, StratifiedAlignmentContext context, VCFGenotypeRecord vcf) {
private long getCoverage(StratifiedAlignmentContext context) {
return context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size();
}
private Double getAlleleBalance(ReferenceContext ref, StratifiedAlignmentContext context, char snpBase) {
if ( context == null ) {
//System.out.println("Stratified context was null");
return null;
}
@ -85,13 +97,14 @@ public class AlleleBalanceHistogramWalker extends LocusWalker<Map<String,Double>
AlignmentContext alicon = context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
if ( alicon == null ) {
System.out.println("Alignment context from stratified was null");
return null;
}
for ( PileupElement e : alicon.getBasePileup() ) {
if ( BaseUtils.basesAreEqual( e.getBase(), (byte) ref.getBase() ) ) {
refBases++;
} else if ( BaseUtils.basesAreEqual(e.getBase(), (byte) vcf.toVariation(ref.getBase()).getAlternativeBaseForSNP() ) ) {
} else if ( BaseUtils.basesAreEqual(e.getBase(), (byte) snpBase ) ) {
altBases++;
}
}
@ -99,6 +112,7 @@ public class AlleleBalanceHistogramWalker extends LocusWalker<Map<String,Double>
if ( refBases > 0 || altBases > 0) {
return ( ( double ) altBases ) / ( ( double ) altBases + ( double ) refBases );
} else {
System.out.println("No ref or alt bases in pileup");
return null;
}
}

View File

@ -26,8 +26,11 @@ import org.broadinstitute.sting.utils.cmdLine.Argument;
*/
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= RodVCF.class),@RMD(name="variants",type= RodVCF.class)})
public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInfo, MultiSampleConcordanceSet > {
@Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+
"DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1;
@Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+
"DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1;
@Argument(fullName = "ignoreKnownSites", shortName = "novel", doc="Only run concordance over novel sites (sites marked in the VCF as being in dbSNP or Hapmap 2 or 3)", required=false )
boolean ignoreKnownSites = false;
public void initialize() {
}
@ -37,7 +40,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
}
public LocusConcordanceInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext c) {
if ( tracker == null ) {
if ( tracker == null || ( ignoreKnownSites && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) ) {
return null;
}
ReferenceOrderedDatum truthData = tracker.lookup("truth", null);
@ -94,7 +97,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
}
public void onTraversalDone(MultiSampleConcordanceSet cSet) {
out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Vars","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call");
out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Homs","Concordant_Hets","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call");
for ( VCFConcordanceCalculator sample : cSet.getConcordanceSet() ) {
out.print(String.format("%s%n",sample));
}

View File

@ -21,7 +21,8 @@ class VCFConcordanceCalculator {
private Set<GenomeLoc> falseNegativeLociDueToNoCall;
private Set<GenomeLoc> hetsCalledHoms;
private Set<GenomeLoc> homsCalledHets;
private Set<GenomeLoc> concordantCalls;
private Set<GenomeLoc> concordantHomCalls;
private Set<GenomeLoc> concordantHetCalls;
private Set<GenomeLoc> concordantGenotypeReferenceCalls;
private Set<GenomeLoc> chipNoCalls;
private Set<GenomeLoc> ignoredDueToDepth;
@ -33,7 +34,8 @@ class VCFConcordanceCalculator {
falsePositiveLoci = new HashSet<GenomeLoc>();
hetsCalledHoms = new HashSet<GenomeLoc>();
homsCalledHets = new HashSet<GenomeLoc>();
concordantCalls = new HashSet<GenomeLoc>();
concordantHomCalls = new HashSet<GenomeLoc>();
concordantHetCalls = new HashSet<GenomeLoc>();
concordantGenotypeReferenceCalls = new HashSet<GenomeLoc>();
chipNoCalls = new HashSet<GenomeLoc>();
ignoredDueToDepth = new HashSet<GenomeLoc>();
@ -45,7 +47,7 @@ class VCFConcordanceCalculator {
}
public String toString() {
return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),concordantGenotypeReferenceCalls.size(),concordantCalls.size(),homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),falseNegativeLociDueToNoCall.size());
return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),concordantGenotypeReferenceCalls.size(),concordantHomCalls.size(),concordantHetCalls.size(),homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),falseNegativeLociDueToNoCall.size());
}
private void compareGenotypes(VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc, byte ref) {
@ -80,8 +82,11 @@ class VCFConcordanceCalculator {
hetsCalledHoms.add(loc);
} else if ( truth.isHom() && call.isHet() ) {
homsCalledHets.add(loc);
} else if ( ( truth.isHet() && call.isHet() ) || ( truth.isHom() && call.isHom() ) ) { // be extra careful
concordantCalls.add(loc);
} else if ( ( truth.isHet() && call.isHet() ) ) {
concordantHetCalls.add(loc);
} else if ( truth.isHom() && call.isHom() ) { // be extra careful
concordantHomCalls.add(loc);
}
}
}

View File

@ -20,6 +20,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
public static final String DBSNP_KEY = "DB";
public static final String DEPTH_KEY = "DP";
public static final String HAPMAP2_KEY = "H2";
public static final String HAPMAP3_KEY = "H3";
public static final String RMS_MAPPING_QUALITY_KEY = "MQ";
public static final String SAMPLE_NUMBER_KEY = "NS";
public static final String STRAND_BIAS_KEY = "SB";
@ -323,6 +324,10 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
return getType() == VARIANT_TYPE.SNP;
}
public boolean isNovel() {
return ( mID != null || mInfoFields.get(HAPMAP2_KEY) != null || mInfoFields.get(HAPMAP3_KEY) != null || mInfoFields.get(DBSNP_KEY) != null);
}
public char getAlternativeBaseForSNP() {
if ( !isSNP() && !isBiallelic() )
throw new IllegalStateException("This record does not represent a SNP");