diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java index 98271b00b..9a6873537 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java @@ -309,7 +309,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, qual = refQual; else if (lst.get(0).getFields().containsKey("GQ")) qual = Double.valueOf(lst.get(0).getFields().get("GQ")) / 10.0; - return new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", lst.get(0).getAlleles()), qual, lst.get(0).getSampleName()); + int coverage = (lst.get(0).getFields().containsKey("RD") ? Integer.valueOf(lst.get(0).getFields().get("RD")) : 0); + return new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", lst.get(0).getAlleles()), qual, coverage, lst.get(0).getSampleName()); } return null; } @@ -333,7 +334,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, qual = refQual; else if (rec.getFields().containsKey("GQ")) qual = Double.valueOf(rec.getFields().get("GQ")) / 10.0; - genotypes.add(new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", rec.getAlleles()), qual, rec.getSampleName())); + int coverage = (rec.getFields().containsKey("RD") ? Integer.valueOf(rec.getFields().get("RD")) : 0); + genotypes.add(new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", rec.getAlleles()), qual, coverage, rec.getSampleName())); } return genotypes; } @@ -353,7 +355,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, qual = this.getNegLog10PError(); else if (record.getFields().containsKey("GQ")) qual = Double.valueOf(record.getFields().get("GQ")) / 10.0; - return new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", record.getAlleles()), qual, record.getSampleName()); + int coverage = (record.getFields().containsKey("RD") ? Integer.valueOf(record.getFields().get("RD")) : 0); + return new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", record.getAlleles()), qual, coverage, record.getSampleName()); } } return null; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/CallsetConcordanceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/CallsetConcordanceWalker.java index 9a4547cba..354a4d365 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/CallsetConcordanceWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/CallsetConcordanceWalker.java @@ -128,9 +128,6 @@ public class CallsetConcordanceWalker extends RodWalker { if ( vcfRods.size() == 0 ) return 0; - // create a merged record from all input VCFs - VCFRecord record = VCFUtils.mergeRecords(vcfRods, rodNamesToSampleNames); - // pull out all of the individual calls from the rods and insert into a map based on the // mapping from rod/sample to uniquified name HashMap samplesToRecords = new HashMap(); @@ -149,6 +146,9 @@ public class CallsetConcordanceWalker extends RodWalker { } } + // create a merged record from all input VCFs + VCFRecord record = VCFUtils.mergeRecords(vcfRods, rodNamesToSampleNames); + // add in the info fields to the new record based on the results of each of the relevant concordance tests for ( ConcordanceType type : requestedTypes ) { String result = type.computeConcordance(samplesToRecords, ref); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SNPGenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SNPGenotypeConcordance.java index 44ebddd56..1cde7604a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SNPGenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/concordance/SNPGenotypeConcordance.java @@ -2,8 +2,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.concordance; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; -import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall; import java.util.*; @@ -13,7 +11,7 @@ import java.util.*; */ public class SNPGenotypeConcordance implements ConcordanceType { - private double LOD = 5.0; + private double Qscore = 30.0; private String sample1, sample2; @@ -23,8 +21,8 @@ public class SNPGenotypeConcordance implements ConcordanceType { if ( samples.size() != 2 ) throw new StingException("SNPGenotype concordance test cannot handle anything other than 2 VCF records"); - if ( args.get("lod") != null ) - LOD = Double.valueOf(args.get("lod")); + if ( args.get("qscore") != null ) + Qscore = Double.valueOf(args.get("qscore")); Iterator iter = samples.iterator(); sample1 = iter.next(); @@ -33,32 +31,28 @@ public class SNPGenotypeConcordance implements ConcordanceType { public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { - VCFGenotypeCall vcfCall1 = samplesToRecords.get(sample1); - VCFGenotypeCall vcfCall2 = samplesToRecords.get(sample2); - Variation call1 = (vcfCall1 == null ? null : vcfCall1.toVariation()); - Variation call2 = (vcfCall2 == null ? null : vcfCall2.toVariation()); + VCFGenotypeCall call1 = samplesToRecords.get(sample1); + VCFGenotypeCall call2 = samplesToRecords.get(sample2); // the only reason they would be null is a lack of coverage if ( call1 == null || call2 == null ) { - if ( call1 != null && call1.isSNP() && call1.getNegLog10PError() >= LOD ) + if ( call1 != null && call1.isPointGenotype() && (10.0 * call1.getNegLog10PError()) >= Qscore ) return "set1ConfidentVariantSet2NoCoverage"; - else if ( call2 != null && call2.isSNP() && call2.getNegLog10PError() >= LOD ) + else if ( call2 != null && call2.isPointGenotype() && (10.0 * call2.getNegLog10PError()) >= Qscore ) return "set1NoCoverageSet2ConfidentVariant"; return null; } - if (!(call1 instanceof VariantBackedByGenotype) || !(call2 instanceof VariantBackedByGenotype)) - throw new StingException("Both ROD tracks must be backed by genotype data. Ensure that your rod(s) contain genotyping information"); - double bestVsRef1 = call1.getNegLog10PError(); - double bestVsRef2 = call2.getNegLog10PError(); - String genotype1 = ((VariantBackedByGenotype)call1).getCalledGenotype().getBases(); - String genotype2 = ((VariantBackedByGenotype)call2).getCalledGenotype().getBases(); + double confidence1 = 10.0 * call1.getNegLog10PError(); + double confidence2 = 10.0 * call2.getNegLog10PError(); + String genotype1 = call1.getBases(); + String genotype2 = call2.getBases(); // are they both variant SNPs? - if ( call1.isSNP() && call2.isSNP() ) { + if ( call1.isPointGenotype() && call2.isPointGenotype() ) { // are they both confident calls? - if ( bestVsRef1 >= LOD && bestVsRef2 >= LOD ) { + if ( confidence1 >= Qscore && confidence2 >= Qscore ) { // same genotype if ( genotype1.equals(genotype2) ) return "sameConfidentVariant"; @@ -73,20 +67,20 @@ public class SNPGenotypeConcordance implements ConcordanceType { } // confident only when combined - else if ( bestVsRef1 < LOD && bestVsRef2 < LOD && bestVsRef1 + bestVsRef2 >= LOD ) { + else if ( confidence1 < Qscore && confidence2 < Qscore && confidence1 + confidence2 >= Qscore ) { return "confidentVariantWhenCombined"; } // only one is confident variant - else if ( (bestVsRef1 < LOD && bestVsRef2 >= LOD) || (bestVsRef1 >= LOD && bestVsRef2 < LOD) ) { + else if ( (confidence1 < Qscore && confidence2 >= Qscore) || (confidence1 >= Qscore && confidence2 < Qscore) ) { return "bothVariantOnlyOneIsConfident"; } } // one is variant and the other is ref - else if ( call1.isSNP() && call2.isReference() && bestVsRef1 >= LOD ) + else if ( call1.isPointGenotype() && call2.isVariant() && confidence1 >= Qscore ) return "set1VariantSet2Ref"; - else if ( call2.isSNP() && call1.isReference() && bestVsRef2 >= LOD ) + else if ( call2.isPointGenotype() && call1.isVariant() && confidence2 >= Qscore ) return "set1RefSet2Variant"; return null; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java index a533dddae..e18ca77b5 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java @@ -21,6 +21,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, private final GenomeLoc mLocation; private List mReads; + private int mCoverage = 0; private double[] mPosteriors; @@ -32,10 +33,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, // the sample name, used to propulate the SampleBacked interface private String mSampleName; - /** - * Generate a single sample genotype object - * - */ + public VCFGenotypeCall(char ref, GenomeLoc loc) { mRefBase = ref; mLocation = loc; @@ -47,12 +45,13 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, mReads = new ArrayList(); } - public VCFGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError, String sample) { + public VCFGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError, int coverage, String sample) { mRefBase = ref; mLocation = loc; mBestGenotype = DiploidGenotype.unorderedValueOf(genotype); mRefGenotype = DiploidGenotype.createHomGenotype(ref); mSampleName = sample; + mCoverage = coverage; // set general posteriors to min double value mPosteriors = new double[10]; @@ -245,7 +244,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, * @return the number of reads we're backed by */ public int getReadCount() { - return mReads.size(); + return (mCoverage > 0 ? mCoverage : mReads.size()); } /**