diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceSet.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceSet.java index 7bfd72155..6a6ac9032 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceSet.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceSet.java @@ -11,6 +11,7 @@ import java.util.Set; * To change this template use File | Settings | File Templates. */ class MultiSampleConcordanceSet { + private boolean treatTruthOnlyAsFalseNegative; private int minimumDepthForTest; private HashSet concordanceSet; private Set cachedSampleNames; @@ -18,14 +19,17 @@ class MultiSampleConcordanceSet { private long truthOnlyVariantSites; private long variantOnlySites; private long overlappingSites; + private int genotypeQuality; - public MultiSampleConcordanceSet(int minDepth) { + public MultiSampleConcordanceSet(int minDepth, boolean assumeRef, int genotypeQuality) { concordanceSet = new HashSet(); truthOnlySites = 0l; truthOnlyVariantSites = 0l; variantOnlySites = 0l; overlappingSites = 0l; minimumDepthForTest = minDepth; + treatTruthOnlyAsFalseNegative = assumeRef; + this.genotypeQuality = genotypeQuality; } public boolean hasBeenInstantiated() { @@ -35,7 +39,7 @@ class MultiSampleConcordanceSet { public void instantiate(Set samples) { cachedSampleNames = samples; for ( String s : samples ) { - concordanceSet.add(new VCFConcordanceCalculator(s,minimumDepthForTest)); + concordanceSet.add(new VCFConcordanceCalculator(s,minimumDepthForTest,genotypeQuality)); } } @@ -49,6 +53,11 @@ class MultiSampleConcordanceSet { truthOnlySites++; if ( info.isVariantSite() ) { truthOnlyVariantSites++; + if ( treatTruthOnlyAsFalseNegative ) { + for ( VCFConcordanceCalculator concordance : concordanceSet ) { + concordance.updateTruthOnly(info); + } + } } } else { variantOnlySites++; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java index 4e6588805..5922055cb 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java @@ -28,15 +28,20 @@ import org.broadinstitute.sting.utils.cmdLine.Argument; public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInfo, MultiSampleConcordanceSet > { @Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+ "DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1; + @Argument(fullName="genotypeConfidence", shortName="GC", doc="The quality score for genotypes below which to count genotyping as a no-call", required=false) + int genotypeQuality = 0; @Argument(fullName = "ignoreKnownSites", shortName = "novel", doc="Only run concordance over novel sites (sites marked in the VCF as being in dbSNP or Hapmap 2 or 3)", required=false ) boolean ignoreKnownSites = false; + @Argument(fullName="missingLocusAsConfidentRef", shortName="assumeRef", doc="Assume a missing locus in the variant VCF is a confident ref call with sufficient depth"+ + "across all samples. Default: Missing locus = no call", required=false) + boolean assumeRef = false; public void initialize() { } public MultiSampleConcordanceSet reduceInit() { - return new MultiSampleConcordanceSet(minDepth); + return new MultiSampleConcordanceSet(minDepth,assumeRef,genotypeQuality); } public LocusConcordanceInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext c) { @@ -97,7 +102,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf } public void onTraversalDone(MultiSampleConcordanceSet cSet) { - out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Homs","Concordant_Hets","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call"); + out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Homs","Concordant_Hets","Correct_But_Low_Genotype_Qual","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call"); for ( VCFConcordanceCalculator sample : cSet.getConcordanceSet() ) { out.print(String.format("%s%n",sample)); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/VCFConcordanceCalculator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/VCFConcordanceCalculator.java index 5f9d401bb..e5c0ae3a4 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/VCFConcordanceCalculator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/VCFConcordanceCalculator.java @@ -1,8 +1,10 @@ package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; +import java.util.Arrays; import java.util.HashSet; import java.util.Set; @@ -14,40 +16,56 @@ import java.util.Set; * To change this template use File | Settings | File Templates. */ class VCFConcordanceCalculator { + private int minimumDepthForUpdate; + private int minimumGenotypeQuality; private String name; private Set falsePositiveLoci; private Set falseNegativeLoci; private Set falseNegativeLociDueToNoCall; private Set hetsCalledHoms; private Set homsCalledHets; + private Set nonConfidentGenotypeCalls; private Set concordantHomCalls; private Set concordantHetCalls; private Set concordantGenotypeReferenceCalls; private Set chipNoCalls; private Set ignoredDueToDepth; - public VCFConcordanceCalculator(String sampleName, int minimumDepth) { + public VCFConcordanceCalculator(String sampleName, int minimumDepth, int minGenQual) { name = sampleName; falseNegativeLoci = new HashSet(); falseNegativeLociDueToNoCall = new HashSet(); falsePositiveLoci = new HashSet(); hetsCalledHoms = new HashSet(); homsCalledHets = new HashSet(); + nonConfidentGenotypeCalls = new HashSet(); concordantHomCalls = new HashSet(); concordantHetCalls = new HashSet(); concordantGenotypeReferenceCalls = new HashSet(); chipNoCalls = new HashSet(); ignoredDueToDepth = new HashSet(); minimumDepthForUpdate = minimumDepth; + minimumGenotypeQuality = minGenQual; } public void update(LocusConcordanceInfo info) { compareGenotypes(info.getTruthGenotype(name), info.getVariantGenotype(name), info.getLoc(), info.getReferenceBase() ); } + public void updateTruthOnly(LocusConcordanceInfo info) { + if ( info.getTruthGenotype(name).isVariant( (char) info.getReferenceBase() ) ) { + falseNegativeLoci.add(info.getLoc()); + } else { + concordantGenotypeReferenceCalls.add(info.getLoc()); + } + } + public String toString() { - return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),concordantGenotypeReferenceCalls.size(),concordantHomCalls.size(),concordantHetCalls.size(),homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),falseNegativeLociDueToNoCall.size()); + return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(), + concordantGenotypeReferenceCalls.size(),concordantHomCalls.size(),concordantHetCalls.size(),nonConfidentGenotypeCalls.size(), + homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(), + falseNegativeLociDueToNoCall.size()); } private void compareGenotypes(VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc, byte ref) { @@ -78,14 +96,21 @@ class VCFConcordanceCalculator { } private void checkGenotypeCall( VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc ) { - if ( truth.isHet() && call.isHom() ) { - hetsCalledHoms.add(loc); - } else if ( truth.isHom() && call.isHet() ) { - homsCalledHets.add(loc); - } else if ( ( truth.isHet() && call.isHet() ) ) { - concordantHetCalls.add(loc); - } else if ( truth.isHom() && call.isHom() ) { // be extra careful - concordantHomCalls.add(loc); + if ( ! call.isFiltered() && 10*call.getNegLog10PError() > minimumGenotypeQuality) { + + if ( truth.isHet() && call.isHom() ) { + hetsCalledHoms.add(loc); + } else if ( truth.isHom() && call.isHet() ) { + homsCalledHets.add(loc); + } else if ( ( truth.isHet() && call.isHet() ) ) { + concordantHetCalls.add(loc); + } else if ( truth.isHom() && call.isHom() ) { // be extra careful + concordantHomCalls.add(loc); + } + + } else { + + nonConfidentGenotypeCalls.add(loc); } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/SimpleVCFIntersectWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/SimpleVCFIntersectWalker.java new file mode 100644 index 000000000..d398de5eb --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/SimpleVCFIntersectWalker.java @@ -0,0 +1,42 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.vcftools; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; +import java.lang.Long; + +/** + * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl + * + * @Author chartl + * @Date Jan 29, 2010 + */ +public abstract class SimpleVCFIntersectWalker extends RodWalker{ + + public void initialize() { + + } + + public Long reduceInit() { + return 0l; + } + + public VCFRecordPair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + return null; + } + + public Long reduce(VCFRecordPair records, long prevReduce) { + return 0l; + } + + public void onTraversalDone(long finalReduce) { + return; + } +} + +class VCFRecordPair { + public VCFRecord rec1; + public VCFRecord rec2; +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index 02c55858c..814b02542 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -193,6 +193,10 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked { return true; } + public boolean isFiltered() { + return ( mFields.get(FILTER_KEY) != null && ! mFields.get(FILTER_KEY).equals("0")); + } + public int getPloidy() { return 2; }