Lots of changes; all to do something relatively minor.
1) Changed VCF/RodVCF to allow for inquiries to whether or not the site is novel; isNovel() looks at the ID field, and those members of the info field that indicate membership in dbsnp, hapmap2, or hapmap3; and if none can be found, returns true.
2) Changed VariantAnnotator to annotate hapmap2 and hapmap3, if you bind rods to it with those names. Works in the same way as DBSNP does -- if you give it a rod named "hapmap2" it'll annotate membership in it. -- Passes integration tests
3) Changed UnifiedGenotyper to do the same thing (since it uses Annotations as a subroutine) -- Passes integration tests
4) Changed MultiSampleConcordanceWalker to take a flag --ignoreKnownSites (or -novels) to examine concordance only on sites that are not marked as in dbSNP or in Hapmap in the variant VCF
5) Changed VCFConcordanceCalculator (the object MultiSampleConcordanceWalker runs on) to output Concordant_Het_Calls and Concordant_Hom_Calls separately, rather than combined as Concordant_Calls
6) AlleleBalanceHistogramWalker -- I don't know what i did to this thing. I've been jerry rigging System.outs to do stuff it was never really intended to do; so there's probably some dumb System.out.print("HI I AM AT LOCUS:"+loc) stuck somewhere. It compiles at any rate.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2724 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6f11fe442a
commit
8de6a8d246
|
|
@ -126,6 +126,16 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
return mCurrentRecord.isSNP();
|
||||
}
|
||||
|
||||
/**
|
||||
* are we a novel site? Is there a DBSNP identifier
|
||||
* or a hapmap entry for the site?
|
||||
*/
|
||||
|
||||
public boolean isNovel() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.isNovel();
|
||||
}
|
||||
|
||||
/**
|
||||
* are we an insertion?
|
||||
*
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ import java.io.*;
|
|||
//@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=VariationRod.class))
|
||||
@Allows(value={DataSource.READS, DataSource.REFERENCE})
|
||||
@Reference(window=@Window(start=-20,stop=20))
|
||||
@By(DataSource.REFERENCE)
|
||||
public class VariantAnnotator extends LocusWalker<Integer, Integer> {
|
||||
@Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true)
|
||||
protected File VCF_OUT;
|
||||
|
|
@ -44,6 +45,10 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
|
|||
|
||||
// should we annotate dbsnp?
|
||||
private boolean annotateDbsnp = false;
|
||||
// how about hapmap2?
|
||||
private boolean annotateHapmap2 = false;
|
||||
// how about hapmap3?
|
||||
private boolean annotateHapmap3 = false;
|
||||
|
||||
// mapping from class name to class
|
||||
private static HashMap<String, VariantAnnotation> allAnnotations = null;
|
||||
|
|
@ -125,7 +130,12 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
|
|||
ReferenceOrderedData rod = source.getReferenceOrderedData();
|
||||
if ( rod.getType().equals(rodDbSNP.class) ) {
|
||||
annotateDbsnp = true;
|
||||
break;
|
||||
}
|
||||
if ( rod.getName().equals("hapmap2") ) {
|
||||
annotateHapmap2 = true;
|
||||
}
|
||||
if ( rod.getName().equals("hapmap3") ) {
|
||||
annotateHapmap3 = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -137,6 +147,10 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
|
|||
hInfo.addAll(getVCFAnnotationDescriptions(requestedAnnotations));
|
||||
if ( annotateDbsnp )
|
||||
hInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP membership"));
|
||||
if ( annotateHapmap2 )
|
||||
hInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP2_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Hapmap 2 membership"));
|
||||
if ( annotateHapmap3 )
|
||||
hInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP3_KEY,1,VCFInfoHeaderLine.INFO_TYPE.Integer, "Hapmap 3 membership"));
|
||||
|
||||
vcfWriter = new VCFWriter(VCF_OUT);
|
||||
vcfHeader = new VCFHeader(hInfo, samples);
|
||||
|
|
@ -182,7 +196,7 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
|
|||
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
|
||||
if ( stratifiedContexts != null )
|
||||
annotations = getAnnotations(tracker, ref, stratifiedContexts, variant, requestedAnnotations, annotateDbsnp);
|
||||
annotations = getAnnotations(tracker, ref, stratifiedContexts, variant, requestedAnnotations, annotateDbsnp, annotateHapmap2, annotateHapmap3);
|
||||
}
|
||||
writeVCF(tracker, ref, context, variant, annotations);
|
||||
|
||||
|
|
@ -224,21 +238,21 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
// option #1: don't specify annotations to be used: standard annotations are used by default
|
||||
public static Map<String, String> getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, boolean annotateDbsnp) {
|
||||
public static Map<String, String> getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) {
|
||||
if ( standardAnnotations == null )
|
||||
determineAllAnnotations();
|
||||
return getAnnotations(tracker, ref, stratifiedContexts, variation, standardAnnotations.values(), annotateDbsnp);
|
||||
return getAnnotations(tracker, ref, stratifiedContexts, variation, standardAnnotations.values(), annotateDbsnp, annotateHapmap2, annotateHapmap3);
|
||||
}
|
||||
|
||||
// option #2: specify that all possible annotations be used
|
||||
public static Map<String, String> getAllAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, boolean annotateDbsnp) {
|
||||
public static Map<String, String> getAllAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) {
|
||||
if ( allAnnotations == null )
|
||||
determineAllAnnotations();
|
||||
return getAnnotations(tracker, ref, stratifiedContexts, variation, allAnnotations.values(), annotateDbsnp);
|
||||
return getAnnotations(tracker, ref, stratifiedContexts, variation, allAnnotations.values(), annotateDbsnp, annotateHapmap2, annotateHapmap3);
|
||||
}
|
||||
|
||||
// option #3: specify the exact annotations to be used
|
||||
public static Map<String, String> getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, Collection<VariantAnnotation> annotations, boolean annotateDbsnp) {
|
||||
public static Map<String, String> getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, Collection<VariantAnnotation> annotations, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) {
|
||||
|
||||
HashMap<String, String> results = new HashMap<String, String>();
|
||||
|
||||
|
|
@ -248,6 +262,16 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
|
|||
results.put(VCFRecord.DBSNP_KEY, dbsnp == null ? "0" : "1");
|
||||
}
|
||||
|
||||
if ( annotateHapmap2 ) {
|
||||
RODRecordList hapmap2 = tracker.getTrackData("hapmap2",null);
|
||||
results.put(VCFRecord.HAPMAP2_KEY, hapmap2 == null? "0" : "1");
|
||||
}
|
||||
|
||||
if ( annotateHapmap3 ) {
|
||||
RODRecordList hapmap3 = tracker.getTrackData("hapmap3",null);
|
||||
results.put( VCFRecord.HAPMAP3_KEY, hapmap3 == null ? "0" : "1");
|
||||
}
|
||||
|
||||
for ( VariantAnnotation annotator : annotations) {
|
||||
String annot = annotator.annotate(tracker, ref, stratifiedContexts, variation);
|
||||
if ( annot != null ) {
|
||||
|
|
|
|||
|
|
@ -72,6 +72,10 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
|
||||
// should we annotate dbsnp?
|
||||
private boolean annotateDbsnp = false;
|
||||
// how about hapmap2?
|
||||
private boolean annotateHapmap2 = false;
|
||||
// how about hapmap3?
|
||||
private boolean annotateHapmap3 = false;
|
||||
|
||||
// Enable deletions in the pileup
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
|
@ -175,7 +179,12 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
ReferenceOrderedData rod = source.getReferenceOrderedData();
|
||||
if ( rod.getType().equals(rodDbSNP.class) ) {
|
||||
annotateDbsnp = true;
|
||||
break;
|
||||
}
|
||||
if ( rod.getName().equals("hapmap2") ) {
|
||||
annotateHapmap2 = true;
|
||||
}
|
||||
if ( rod.getName().equals("hapmap3") ) {
|
||||
annotateHapmap3 = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -298,9 +307,9 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
|
||||
Map<String, String> annotations;
|
||||
if ( UAC.ALL_ANNOTATIONS )
|
||||
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp);
|
||||
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3);
|
||||
else
|
||||
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp);
|
||||
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3);
|
||||
((ArbitraryFieldsBacked)call.variation).setFields(annotations);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -68,15 +68,27 @@ public class AlleleBalanceHistogramWalker extends LocusWalker<Map<String,Double>
|
|||
private HashMap<String,Double> getAlleleBalanceBySample(VCFRecord vcf, ReferenceContext ref, AlignmentContext context) {
|
||||
Map<String, StratifiedAlignmentContext> sampleContext = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup(),null,null);
|
||||
HashMap<String,Double> balances = new HashMap<String,Double>();
|
||||
System.out.println("----- "+ref.getLocus()+" -----");
|
||||
int returnedBalances = 0;
|
||||
for ( String sample : vcf.getSampleNames() ) {
|
||||
balances.put(sample, getAlleleBalance(ref,sampleContext.get(sample),vcf.getGenotype(sample)));
|
||||
Double balance = getAlleleBalance(ref,sampleContext.get(sample),vcf.getAlternativeBaseForSNP());
|
||||
balances.put(sample, balance);
|
||||
if ( balance != null ) {
|
||||
returnedBalances++;
|
||||
System.out.println(sample+"\t"+getCoverage(sampleContext.get(sample)));
|
||||
}
|
||||
}
|
||||
|
||||
return balances;
|
||||
}
|
||||
|
||||
private Double getAlleleBalance(ReferenceContext ref, StratifiedAlignmentContext context, VCFGenotypeRecord vcf) {
|
||||
private long getCoverage(StratifiedAlignmentContext context) {
|
||||
return context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size();
|
||||
}
|
||||
|
||||
private Double getAlleleBalance(ReferenceContext ref, StratifiedAlignmentContext context, char snpBase) {
|
||||
if ( context == null ) {
|
||||
//System.out.println("Stratified context was null");
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
@ -85,13 +97,14 @@ public class AlleleBalanceHistogramWalker extends LocusWalker<Map<String,Double>
|
|||
AlignmentContext alicon = context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
|
||||
|
||||
if ( alicon == null ) {
|
||||
System.out.println("Alignment context from stratified was null");
|
||||
return null;
|
||||
}
|
||||
|
||||
for ( PileupElement e : alicon.getBasePileup() ) {
|
||||
if ( BaseUtils.basesAreEqual( e.getBase(), (byte) ref.getBase() ) ) {
|
||||
refBases++;
|
||||
} else if ( BaseUtils.basesAreEqual(e.getBase(), (byte) vcf.toVariation(ref.getBase()).getAlternativeBaseForSNP() ) ) {
|
||||
} else if ( BaseUtils.basesAreEqual(e.getBase(), (byte) snpBase ) ) {
|
||||
altBases++;
|
||||
}
|
||||
}
|
||||
|
|
@ -99,6 +112,7 @@ public class AlleleBalanceHistogramWalker extends LocusWalker<Map<String,Double>
|
|||
if ( refBases > 0 || altBases > 0) {
|
||||
return ( ( double ) altBases ) / ( ( double ) altBases + ( double ) refBases );
|
||||
} else {
|
||||
System.out.println("No ref or alt bases in pileup");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,8 +26,11 @@ import org.broadinstitute.sting.utils.cmdLine.Argument;
|
|||
*/
|
||||
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= RodVCF.class),@RMD(name="variants",type= RodVCF.class)})
|
||||
public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInfo, MultiSampleConcordanceSet > {
|
||||
@Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+
|
||||
"DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1;
|
||||
@Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+
|
||||
"DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1;
|
||||
@Argument(fullName = "ignoreKnownSites", shortName = "novel", doc="Only run concordance over novel sites (sites marked in the VCF as being in dbSNP or Hapmap 2 or 3)", required=false )
|
||||
boolean ignoreKnownSites = false;
|
||||
|
||||
public void initialize() {
|
||||
|
||||
}
|
||||
|
|
@ -37,7 +40,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
|
|||
}
|
||||
|
||||
public LocusConcordanceInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext c) {
|
||||
if ( tracker == null ) {
|
||||
if ( tracker == null || ( ignoreKnownSites && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) ) {
|
||||
return null;
|
||||
}
|
||||
ReferenceOrderedDatum truthData = tracker.lookup("truth", null);
|
||||
|
|
@ -94,7 +97,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
|
|||
}
|
||||
|
||||
public void onTraversalDone(MultiSampleConcordanceSet cSet) {
|
||||
out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Vars","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call");
|
||||
out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Homs","Concordant_Hets","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call");
|
||||
for ( VCFConcordanceCalculator sample : cSet.getConcordanceSet() ) {
|
||||
out.print(String.format("%s%n",sample));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,7 +21,8 @@ class VCFConcordanceCalculator {
|
|||
private Set<GenomeLoc> falseNegativeLociDueToNoCall;
|
||||
private Set<GenomeLoc> hetsCalledHoms;
|
||||
private Set<GenomeLoc> homsCalledHets;
|
||||
private Set<GenomeLoc> concordantCalls;
|
||||
private Set<GenomeLoc> concordantHomCalls;
|
||||
private Set<GenomeLoc> concordantHetCalls;
|
||||
private Set<GenomeLoc> concordantGenotypeReferenceCalls;
|
||||
private Set<GenomeLoc> chipNoCalls;
|
||||
private Set<GenomeLoc> ignoredDueToDepth;
|
||||
|
|
@ -33,7 +34,8 @@ class VCFConcordanceCalculator {
|
|||
falsePositiveLoci = new HashSet<GenomeLoc>();
|
||||
hetsCalledHoms = new HashSet<GenomeLoc>();
|
||||
homsCalledHets = new HashSet<GenomeLoc>();
|
||||
concordantCalls = new HashSet<GenomeLoc>();
|
||||
concordantHomCalls = new HashSet<GenomeLoc>();
|
||||
concordantHetCalls = new HashSet<GenomeLoc>();
|
||||
concordantGenotypeReferenceCalls = new HashSet<GenomeLoc>();
|
||||
chipNoCalls = new HashSet<GenomeLoc>();
|
||||
ignoredDueToDepth = new HashSet<GenomeLoc>();
|
||||
|
|
@ -45,7 +47,7 @@ class VCFConcordanceCalculator {
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),concordantGenotypeReferenceCalls.size(),concordantCalls.size(),homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),falseNegativeLociDueToNoCall.size());
|
||||
return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),concordantGenotypeReferenceCalls.size(),concordantHomCalls.size(),concordantHetCalls.size(),homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),falseNegativeLociDueToNoCall.size());
|
||||
}
|
||||
|
||||
private void compareGenotypes(VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc, byte ref) {
|
||||
|
|
@ -80,8 +82,11 @@ class VCFConcordanceCalculator {
|
|||
hetsCalledHoms.add(loc);
|
||||
} else if ( truth.isHom() && call.isHet() ) {
|
||||
homsCalledHets.add(loc);
|
||||
} else if ( ( truth.isHet() && call.isHet() ) || ( truth.isHom() && call.isHom() ) ) { // be extra careful
|
||||
concordantCalls.add(loc);
|
||||
} else if ( ( truth.isHet() && call.isHet() ) ) {
|
||||
concordantHetCalls.add(loc);
|
||||
} else if ( truth.isHom() && call.isHom() ) { // be extra careful
|
||||
concordantHomCalls.add(loc);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
public static final String DBSNP_KEY = "DB";
|
||||
public static final String DEPTH_KEY = "DP";
|
||||
public static final String HAPMAP2_KEY = "H2";
|
||||
public static final String HAPMAP3_KEY = "H3";
|
||||
public static final String RMS_MAPPING_QUALITY_KEY = "MQ";
|
||||
public static final String SAMPLE_NUMBER_KEY = "NS";
|
||||
public static final String STRAND_BIAS_KEY = "SB";
|
||||
|
|
@ -323,6 +324,10 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
return getType() == VARIANT_TYPE.SNP;
|
||||
}
|
||||
|
||||
public boolean isNovel() {
|
||||
return ( mID != null || mInfoFields.get(HAPMAP2_KEY) != null || mInfoFields.get(HAPMAP3_KEY) != null || mInfoFields.get(DBSNP_KEY) != null);
|
||||
}
|
||||
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if ( !isSNP() && !isBiallelic() )
|
||||
throw new IllegalStateException("This record does not represent a SNP");
|
||||
|
|
|
|||
Loading…
Reference in New Issue