Added the ability to filter out variant (not truth) calls based on read depth. Using -NLD 5 will not update concordant counts for calls with 0, 1, 2, 3, or 4 reads supporting them. Not to be used with VCF files that do not have DP in the format field.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2716 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-01-27 23:28:04 +00:00
parent 1b9184a1c7
commit 23fc9737b4
3 changed files with 19 additions and 8 deletions

View File

@ -11,6 +11,7 @@ import java.util.Set;
* To change this template use File | Settings | File Templates.
*/
class MultiSampleConcordanceSet {
private int minimumDepthForTest;
private HashSet<VCFConcordanceCalculator> concordanceSet;
private Set<String> cachedSampleNames;
private long truthOnlySites;
@ -18,12 +19,13 @@ class MultiSampleConcordanceSet {
private long variantOnlySites;
private long overlappingSites;
public MultiSampleConcordanceSet() {
public MultiSampleConcordanceSet(int minDepth) {
concordanceSet = new HashSet<VCFConcordanceCalculator>();
truthOnlySites = 0l;
truthOnlyVariantSites = 0l;
variantOnlySites = 0l;
overlappingSites = 0l;
minimumDepthForTest = minDepth;
}
public boolean hasBeenInstantiated() {
@ -33,7 +35,7 @@ class MultiSampleConcordanceSet {
public void instantiate(Set<String> samples) {
cachedSampleNames = samples;
for ( String s : samples ) {
concordanceSet.add(new VCFConcordanceCalculator(s));
concordanceSet.add(new VCFConcordanceCalculator(s,minimumDepthForTest));
}
}

View File

@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
/**
* Created by IntelliJ IDEA.
@ -25,13 +26,14 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
*/
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= RodVCF.class),@RMD(name="variants",type= RodVCF.class)})
public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInfo, MultiSampleConcordanceSet > {
@Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+
"DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1;
public void initialize() {
}
public MultiSampleConcordanceSet reduceInit() {
return new MultiSampleConcordanceSet();
return new MultiSampleConcordanceSet(minDepth);
}
public LocusConcordanceInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext c) {
@ -92,8 +94,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
}
public void onTraversalDone(MultiSampleConcordanceSet cSet) {
String[] header = {"Sample_ID","Concordant_Refs","Concordant_Vars","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call"};
out.print(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n",header));
out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Vars","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call");
for ( VCFConcordanceCalculator sample : cSet.getConcordanceSet() ) {
out.print(String.format("%s%n",sample));
}

View File

@ -14,6 +14,7 @@ import java.util.Set;
* To change this template use File | Settings | File Templates.
*/
class VCFConcordanceCalculator {
private int minimumDepthForUpdate;
private String name;
private Set<GenomeLoc> falsePositiveLoci;
private Set<GenomeLoc> falseNegativeLoci;
@ -23,8 +24,9 @@ class VCFConcordanceCalculator {
private Set<GenomeLoc> concordantCalls;
private Set<GenomeLoc> concordantGenotypeReferenceCalls;
private Set<GenomeLoc> chipNoCalls;
private Set<GenomeLoc> ignoredDueToDepth;
public VCFConcordanceCalculator(String sampleName) {
public VCFConcordanceCalculator(String sampleName, int minimumDepth) {
name = sampleName;
falseNegativeLoci = new HashSet<GenomeLoc>();
falseNegativeLociDueToNoCall = new HashSet<GenomeLoc>();
@ -34,6 +36,8 @@ class VCFConcordanceCalculator {
concordantCalls = new HashSet<GenomeLoc>();
concordantGenotypeReferenceCalls = new HashSet<GenomeLoc>();
chipNoCalls = new HashSet<GenomeLoc>();
ignoredDueToDepth = new HashSet<GenomeLoc>();
minimumDepthForUpdate = minimumDepth;
}
public void update(LocusConcordanceInfo info) {
@ -41,10 +45,14 @@ class VCFConcordanceCalculator {
}
public String toString() {
return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,concordantGenotypeReferenceCalls.size(),concordantCalls.size(),homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),falseNegativeLociDueToNoCall.size());
return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),concordantGenotypeReferenceCalls.size(),concordantCalls.size(),homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),falseNegativeLociDueToNoCall.size());
}
private void compareGenotypes(VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc, byte ref) {
if ( minimumDepthForUpdate > 0 && call.getReadCount() < minimumDepthForUpdate ) {
ignoredDueToDepth.add(loc);
return; // do not update; just go away
}
if ( truth.isNoCall() ) {
chipNoCalls.add(loc);
} else if ( truth.isVariant(( char) ref) ) {