-Some minor fixes to get accurate vcf record merging done

-Improvement to snp genotype concordance test

And with that, it looks like I get revision #2000.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2000 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-11-09 06:40:55 +00:00
parent ab705565cf
commit 74751a8ed3
4 changed files with 31 additions and 35 deletions

View File

@ -309,7 +309,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
qual = refQual;
else if (lst.get(0).getFields().containsKey("GQ"))
qual = Double.valueOf(lst.get(0).getFields().get("GQ")) / 10.0;
return new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", lst.get(0).getAlleles()), qual, lst.get(0).getSampleName());
int coverage = (lst.get(0).getFields().containsKey("RD") ? Integer.valueOf(lst.get(0).getFields().get("RD")) : 0);
return new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", lst.get(0).getAlleles()), qual, coverage, lst.get(0).getSampleName());
}
return null;
}
@ -333,7 +334,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
qual = refQual;
else if (rec.getFields().containsKey("GQ"))
qual = Double.valueOf(rec.getFields().get("GQ")) / 10.0;
genotypes.add(new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", rec.getAlleles()), qual, rec.getSampleName()));
int coverage = (rec.getFields().containsKey("RD") ? Integer.valueOf(rec.getFields().get("RD")) : 0);
genotypes.add(new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", rec.getAlleles()), qual, coverage, rec.getSampleName()));
}
return genotypes;
}
@ -353,7 +355,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
qual = this.getNegLog10PError();
else if (record.getFields().containsKey("GQ"))
qual = Double.valueOf(record.getFields().get("GQ")) / 10.0;
return new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", record.getAlleles()), qual, record.getSampleName());
int coverage = (record.getFields().containsKey("RD") ? Integer.valueOf(record.getFields().get("RD")) : 0);
return new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", record.getAlleles()), qual, coverage, record.getSampleName());
}
}
return null;

View File

@ -128,9 +128,6 @@ public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
if ( vcfRods.size() == 0 )
return 0;
// create a merged record from all input VCFs
VCFRecord record = VCFUtils.mergeRecords(vcfRods, rodNamesToSampleNames);
// pull out all of the individual calls from the rods and insert into a map based on the
// mapping from rod/sample to uniquified name
HashMap<String, VCFGenotypeCall> samplesToRecords = new HashMap<String, VCFGenotypeCall>();
@ -149,6 +146,9 @@ public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
}
}
// create a merged record from all input VCFs
VCFRecord record = VCFUtils.mergeRecords(vcfRods, rodNamesToSampleNames);
// add in the info fields to the new record based on the results of each of the relevant concordance tests
for ( ConcordanceType type : requestedTypes ) {
String result = type.computeConcordance(samplesToRecords, ref);

View File

@ -2,8 +2,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.concordance;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
import java.util.*;
@ -13,7 +11,7 @@ import java.util.*;
*/
public class SNPGenotypeConcordance implements ConcordanceType {
private double LOD = 5.0;
private double Qscore = 30.0;
private String sample1, sample2;
@ -23,8 +21,8 @@ public class SNPGenotypeConcordance implements ConcordanceType {
if ( samples.size() != 2 )
throw new StingException("SNPGenotype concordance test cannot handle anything other than 2 VCF records");
if ( args.get("lod") != null )
LOD = Double.valueOf(args.get("lod"));
if ( args.get("qscore") != null )
Qscore = Double.valueOf(args.get("qscore"));
Iterator<String> iter = samples.iterator();
sample1 = iter.next();
@ -33,32 +31,28 @@ public class SNPGenotypeConcordance implements ConcordanceType {
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref) {
VCFGenotypeCall vcfCall1 = samplesToRecords.get(sample1);
VCFGenotypeCall vcfCall2 = samplesToRecords.get(sample2);
Variation call1 = (vcfCall1 == null ? null : vcfCall1.toVariation());
Variation call2 = (vcfCall2 == null ? null : vcfCall2.toVariation());
VCFGenotypeCall call1 = samplesToRecords.get(sample1);
VCFGenotypeCall call2 = samplesToRecords.get(sample2);
// the only reason they would be null is a lack of coverage
if ( call1 == null || call2 == null ) {
if ( call1 != null && call1.isSNP() && call1.getNegLog10PError() >= LOD )
if ( call1 != null && call1.isPointGenotype() && (10.0 * call1.getNegLog10PError()) >= Qscore )
return "set1ConfidentVariantSet2NoCoverage";
else if ( call2 != null && call2.isSNP() && call2.getNegLog10PError() >= LOD )
else if ( call2 != null && call2.isPointGenotype() && (10.0 * call2.getNegLog10PError()) >= Qscore )
return "set1NoCoverageSet2ConfidentVariant";
return null;
}
if (!(call1 instanceof VariantBackedByGenotype) || !(call2 instanceof VariantBackedByGenotype))
throw new StingException("Both ROD tracks must be backed by genotype data. Ensure that your rod(s) contain genotyping information");
double bestVsRef1 = call1.getNegLog10PError();
double bestVsRef2 = call2.getNegLog10PError();
String genotype1 = ((VariantBackedByGenotype)call1).getCalledGenotype().getBases();
String genotype2 = ((VariantBackedByGenotype)call2).getCalledGenotype().getBases();
double confidence1 = 10.0 * call1.getNegLog10PError();
double confidence2 = 10.0 * call2.getNegLog10PError();
String genotype1 = call1.getBases();
String genotype2 = call2.getBases();
// are they both variant SNPs?
if ( call1.isSNP() && call2.isSNP() ) {
if ( call1.isPointGenotype() && call2.isPointGenotype() ) {
// are they both confident calls?
if ( bestVsRef1 >= LOD && bestVsRef2 >= LOD ) {
if ( confidence1 >= Qscore && confidence2 >= Qscore ) {
// same genotype
if ( genotype1.equals(genotype2) )
return "sameConfidentVariant";
@ -73,20 +67,20 @@ public class SNPGenotypeConcordance implements ConcordanceType {
}
// confident only when combined
else if ( bestVsRef1 < LOD && bestVsRef2 < LOD && bestVsRef1 + bestVsRef2 >= LOD ) {
else if ( confidence1 < Qscore && confidence2 < Qscore && confidence1 + confidence2 >= Qscore ) {
return "confidentVariantWhenCombined";
}
// only one is confident variant
else if ( (bestVsRef1 < LOD && bestVsRef2 >= LOD) || (bestVsRef1 >= LOD && bestVsRef2 < LOD) ) {
else if ( (confidence1 < Qscore && confidence2 >= Qscore) || (confidence1 >= Qscore && confidence2 < Qscore) ) {
return "bothVariantOnlyOneIsConfident";
}
}
// one is variant and the other is ref
else if ( call1.isSNP() && call2.isReference() && bestVsRef1 >= LOD )
else if ( call1.isPointGenotype() && call2.isVariant() && confidence1 >= Qscore )
return "set1VariantSet2Ref";
else if ( call2.isSNP() && call1.isReference() && bestVsRef2 >= LOD )
else if ( call2.isPointGenotype() && call1.isVariant() && confidence2 >= Qscore )
return "set1RefSet2Variant";
return null;

View File

@ -21,6 +21,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
private final GenomeLoc mLocation;
private List<SAMRecord> mReads;
private int mCoverage = 0;
private double[] mPosteriors;
@ -32,10 +33,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
// the sample name, used to propulate the SampleBacked interface
private String mSampleName;
/**
* Generate a single sample genotype object
*
*/
public VCFGenotypeCall(char ref, GenomeLoc loc) {
mRefBase = ref;
mLocation = loc;
@ -47,12 +45,13 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
mReads = new ArrayList<SAMRecord>();
}
public VCFGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError, String sample) {
public VCFGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError, int coverage, String sample) {
mRefBase = ref;
mLocation = loc;
mBestGenotype = DiploidGenotype.unorderedValueOf(genotype);
mRefGenotype = DiploidGenotype.createHomGenotype(ref);
mSampleName = sample;
mCoverage = coverage;
// set general posteriors to min double value
mPosteriors = new double[10];
@ -245,7 +244,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
* @return the number of reads we're backed by
*/
public int getReadCount() {
return mReads.size();
return (mCoverage > 0 ? mCoverage : mReads.size());
}
/**