Added the ability to filter out variant (not truth) calls based on read depth. Using -NLD 5 will not update concordant counts for calls with 0, 1, 2, 3, or 4 reads supporting them. Not to be used with VCF files that do not have DP in the format field.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2716 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1b9184a1c7
commit
23fc9737b4
|
|
@ -11,6 +11,7 @@ import java.util.Set;
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
class MultiSampleConcordanceSet {
|
class MultiSampleConcordanceSet {
|
||||||
|
private int minimumDepthForTest;
|
||||||
private HashSet<VCFConcordanceCalculator> concordanceSet;
|
private HashSet<VCFConcordanceCalculator> concordanceSet;
|
||||||
private Set<String> cachedSampleNames;
|
private Set<String> cachedSampleNames;
|
||||||
private long truthOnlySites;
|
private long truthOnlySites;
|
||||||
|
|
@ -18,12 +19,13 @@ class MultiSampleConcordanceSet {
|
||||||
private long variantOnlySites;
|
private long variantOnlySites;
|
||||||
private long overlappingSites;
|
private long overlappingSites;
|
||||||
|
|
||||||
public MultiSampleConcordanceSet() {
|
public MultiSampleConcordanceSet(int minDepth) {
|
||||||
concordanceSet = new HashSet<VCFConcordanceCalculator>();
|
concordanceSet = new HashSet<VCFConcordanceCalculator>();
|
||||||
truthOnlySites = 0l;
|
truthOnlySites = 0l;
|
||||||
truthOnlyVariantSites = 0l;
|
truthOnlyVariantSites = 0l;
|
||||||
variantOnlySites = 0l;
|
variantOnlySites = 0l;
|
||||||
overlappingSites = 0l;
|
overlappingSites = 0l;
|
||||||
|
minimumDepthForTest = minDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasBeenInstantiated() {
|
public boolean hasBeenInstantiated() {
|
||||||
|
|
@ -33,7 +35,7 @@ class MultiSampleConcordanceSet {
|
||||||
public void instantiate(Set<String> samples) {
|
public void instantiate(Set<String> samples) {
|
||||||
cachedSampleNames = samples;
|
cachedSampleNames = samples;
|
||||||
for ( String s : samples ) {
|
for ( String s : samples ) {
|
||||||
concordanceSet.add(new VCFConcordanceCalculator(s));
|
concordanceSet.add(new VCFConcordanceCalculator(s,minimumDepthForTest));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -25,13 +26,14 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
*/
|
*/
|
||||||
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= RodVCF.class),@RMD(name="variants",type= RodVCF.class)})
|
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= RodVCF.class),@RMD(name="variants",type= RodVCF.class)})
|
||||||
public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInfo, MultiSampleConcordanceSet > {
|
public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInfo, MultiSampleConcordanceSet > {
|
||||||
|
@Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+
|
||||||
|
"DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1;
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public MultiSampleConcordanceSet reduceInit() {
|
public MultiSampleConcordanceSet reduceInit() {
|
||||||
return new MultiSampleConcordanceSet();
|
return new MultiSampleConcordanceSet(minDepth);
|
||||||
}
|
}
|
||||||
|
|
||||||
public LocusConcordanceInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext c) {
|
public LocusConcordanceInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext c) {
|
||||||
|
|
@ -92,8 +94,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraversalDone(MultiSampleConcordanceSet cSet) {
|
public void onTraversalDone(MultiSampleConcordanceSet cSet) {
|
||||||
String[] header = {"Sample_ID","Concordant_Refs","Concordant_Vars","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call"};
|
out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Vars","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call");
|
||||||
out.print(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n",header));
|
|
||||||
for ( VCFConcordanceCalculator sample : cSet.getConcordanceSet() ) {
|
for ( VCFConcordanceCalculator sample : cSet.getConcordanceSet() ) {
|
||||||
out.print(String.format("%s%n",sample));
|
out.print(String.format("%s%n",sample));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ import java.util.Set;
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
class VCFConcordanceCalculator {
|
class VCFConcordanceCalculator {
|
||||||
|
private int minimumDepthForUpdate;
|
||||||
private String name;
|
private String name;
|
||||||
private Set<GenomeLoc> falsePositiveLoci;
|
private Set<GenomeLoc> falsePositiveLoci;
|
||||||
private Set<GenomeLoc> falseNegativeLoci;
|
private Set<GenomeLoc> falseNegativeLoci;
|
||||||
|
|
@ -23,8 +24,9 @@ class VCFConcordanceCalculator {
|
||||||
private Set<GenomeLoc> concordantCalls;
|
private Set<GenomeLoc> concordantCalls;
|
||||||
private Set<GenomeLoc> concordantGenotypeReferenceCalls;
|
private Set<GenomeLoc> concordantGenotypeReferenceCalls;
|
||||||
private Set<GenomeLoc> chipNoCalls;
|
private Set<GenomeLoc> chipNoCalls;
|
||||||
|
private Set<GenomeLoc> ignoredDueToDepth;
|
||||||
|
|
||||||
public VCFConcordanceCalculator(String sampleName) {
|
public VCFConcordanceCalculator(String sampleName, int minimumDepth) {
|
||||||
name = sampleName;
|
name = sampleName;
|
||||||
falseNegativeLoci = new HashSet<GenomeLoc>();
|
falseNegativeLoci = new HashSet<GenomeLoc>();
|
||||||
falseNegativeLociDueToNoCall = new HashSet<GenomeLoc>();
|
falseNegativeLociDueToNoCall = new HashSet<GenomeLoc>();
|
||||||
|
|
@ -34,6 +36,8 @@ class VCFConcordanceCalculator {
|
||||||
concordantCalls = new HashSet<GenomeLoc>();
|
concordantCalls = new HashSet<GenomeLoc>();
|
||||||
concordantGenotypeReferenceCalls = new HashSet<GenomeLoc>();
|
concordantGenotypeReferenceCalls = new HashSet<GenomeLoc>();
|
||||||
chipNoCalls = new HashSet<GenomeLoc>();
|
chipNoCalls = new HashSet<GenomeLoc>();
|
||||||
|
ignoredDueToDepth = new HashSet<GenomeLoc>();
|
||||||
|
minimumDepthForUpdate = minimumDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void update(LocusConcordanceInfo info) {
|
public void update(LocusConcordanceInfo info) {
|
||||||
|
|
@ -41,10 +45,14 @@ class VCFConcordanceCalculator {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,concordantGenotypeReferenceCalls.size(),concordantCalls.size(),homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),falseNegativeLociDueToNoCall.size());
|
return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),concordantGenotypeReferenceCalls.size(),concordantCalls.size(),homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),falseNegativeLociDueToNoCall.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void compareGenotypes(VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc, byte ref) {
|
private void compareGenotypes(VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc, byte ref) {
|
||||||
|
if ( minimumDepthForUpdate > 0 && call.getReadCount() < minimumDepthForUpdate ) {
|
||||||
|
ignoredDueToDepth.add(loc);
|
||||||
|
return; // do not update; just go away
|
||||||
|
}
|
||||||
if ( truth.isNoCall() ) {
|
if ( truth.isNoCall() ) {
|
||||||
chipNoCalls.add(loc);
|
chipNoCalls.add(loc);
|
||||||
} else if ( truth.isVariant(( char) ref) ) {
|
} else if ( truth.isVariant(( char) ref) ) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue