Code cleanup in MVLR and SelectVariants. Should fix JIRA GSA-509 and GSA-510

This commit is contained in:
Christopher Hartl 2012-08-24 12:25:11 -07:00
parent 0545664f91
commit 752f44c332
2 changed files with 74 additions and 41 deletions

View File

@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@ -21,21 +20,17 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
/** /**
* Created by IntelliJ IDEA. * Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation
* User: chartl * versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is
* Date: 9/14/11 * diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than
* Time: 12:24 PM * the strict 1-(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios.
*/ */
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation { public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
private MendelianViolation mendelianViolation = null; private MendelianViolation mendelianViolation = null;
public static final String MVLR_KEY = "MVLR";
private Set<Trio> trios; private Set<Trio> trios;
private class Trio {
String motherId;
String fatherId;
String childId;
}
public Map<String, Object> annotate(final RefMetaDataTracker tracker, public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker, final AnnotatorCompatible walker,
@ -44,7 +39,8 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
final VariantContext vc, final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) { final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if ( mendelianViolation == null ) { if ( mendelianViolation == null ) {
if (checkAndSetSamples(((Walker) walker).getSampleDB())) { trios = checkAndSetSamples(((Walker) walker).getSampleDB());
if ( trios.size() > 0 ) {
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP ); mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
} }
else { else {
@ -52,15 +48,12 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
} }
} }
Map<String,Object> toRet = new HashMap<String,Object>(1); Map<String,Object> attributeMap = new HashMap<String,Object>(1);
//double pNoMV = 1.0; //double pNoMV = 1.0;
double maxMVLR = Double.MIN_VALUE; double maxMVLR = Double.MIN_VALUE;
for ( Trio trio : trios ) { for ( Trio trio : trios ) {
boolean hasAppropriateGenotypes = vc.hasGenotype(trio.motherId) && vc.getGenotype(trio.motherId).hasLikelihoods() && if ( contextHasTrioLikelihoods(vc,trio) ) {
vc.hasGenotype(trio.fatherId) && vc.getGenotype(trio.fatherId).hasLikelihoods() && Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.getMaternalID(),trio.getPaternalID(),trio.childId);
vc.hasGenotype(trio.childId) && vc.getGenotype(trio.childId).hasLikelihoods();
if ( hasAppropriateGenotypes ) {
Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.motherId,trio.fatherId,trio.childId);
maxMVLR = likR > maxMVLR ? likR : maxMVLR; maxMVLR = likR > maxMVLR ? likR : maxMVLR;
//pNoMV *= (1.0-Math.pow(10.0,likR)/(1+Math.pow(10.0,likR))); //pNoMV *= (1.0-Math.pow(10.0,likR)/(1+Math.pow(10.0,likR)));
} }
@ -68,34 +61,79 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
//double pSomeMV = 1.0-pNoMV; //double pSomeMV = 1.0-pNoMV;
//toRet.put("MVLR",Math.log10(pSomeMV)-Math.log10(1.0-pSomeMV)); //toRet.put("MVLR",Math.log10(pSomeMV)-Math.log10(1.0-pSomeMV));
toRet.put("MVLR",maxMVLR); if ( Double.compare(maxMVLR,Double.MIN_VALUE) != 0 )
return toRet; attributeMap.put(MVLR_KEY,maxMVLR);
return attributeMap;
} }
// return the descriptions used for the VCF INFO meta field // return the descriptions used for the VCF INFO meta field
public List<String> getKeyNames() { return Arrays.asList("MVLR"); } public List<String> getKeyNames() { return Arrays.asList(MVLR_KEY); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); } public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
private boolean checkAndSetSamples(SampleDB db){ private Set<Trio> checkAndSetSamples(SampleDB db){
trios = new HashSet<Trio>(); Set<Trio> trioSet = new HashSet<Trio>();
Set<String> families = db.getFamilyIDs(); for ( String familyString : db.getFamilyIDs() ) {
for ( String familyString : families ) {
Set<Sample> family = db.getFamily(familyString); Set<Sample> family = db.getFamily(familyString);
Iterator<Sample> sampleIterator = family.iterator(); for ( Sample sample : family) {
Sample sample;
for ( sample = sampleIterator.next(); sampleIterator.hasNext(); sample=sampleIterator.next()) {
if ( sample.getParents().size() == 2 ) { if ( sample.getParents().size() == 2 ) {
Trio trio = new Trio(); Trio trio = new Trio(sample.getMaternalID(),sample.getPaternalID(),sample.getID());
trio.childId = sample.getID(); trioSet.add(trio);
trio.fatherId = sample.getFather().getID();
trio.motherId = sample.getMother().getID();
trios.add(trio);
} }
} }
} }
return trios.size() > 0; return trioSet;
} }
private boolean contextHasTrioLikelihoods(VariantContext context, Trio trio) {
for ( String sample : trio ) {
if ( ! context.hasGenotype(sample) )
return false;
if ( ! context.getGenotype(sample).hasLikelihoods() )
return false;
}
return true;
}
private class Trio implements Iterable<String> {
private String maternalID;
private String paternalID;
private String childId;
public Trio(String mom, String dad, String child) {
this.maternalID = mom;
this.paternalID = dad;
this.childId = child;
}
public String getMaternalID() {
return this.maternalID;
}
public String getPaternalID() {
return this.paternalID;
}
public String getChildId() {
return this.childId;
}
public void setMaternalID(String id) {
this.maternalID = id;
}
public void setPaternalID(String id) {
this.paternalID = id;
}
public void setChildId(String id) {
this.childId = id;
}
public Iterator<String> iterator() {
return Arrays.asList(maternalID,paternalID,childId).iterator();
}
}
} }

View File

@ -579,14 +579,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
} }
private boolean badIndelSize(final VariantContext vc) { private boolean badIndelSize(final VariantContext vc) {
if ( vc.getReference().length() > maxIndelSize ) { for ( Integer indelLength : vc.getIndelLengths() ) {
return true; if ( indelLength > maxIndelSize )
}
for ( Allele a : vc.getAlternateAlleles() ) {
if ( a.length() > maxIndelSize ) {
return true; return true;
}
} }
return false; return false;