Code cleanup in MVLR and SelectVariants. Should fix JIRA GSA-509 and GSA-510

This commit is contained in:
Christopher Hartl 2012-08-24 12:25:11 -07:00
parent 0545664f91
commit 752f44c332
2 changed files with 74 additions and 41 deletions

View File

@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@ -21,21 +20,17 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: 9/14/11
* Time: 12:24 PM
* Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation
* versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is
* diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than
* the strict 1-(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios.
*/
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
private MendelianViolation mendelianViolation = null;
public static final String MVLR_KEY = "MVLR";
private Set<Trio> trios;
private class Trio {
String motherId;
String fatherId;
String childId;
}
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
@ -44,7 +39,8 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if ( mendelianViolation == null ) {
if (checkAndSetSamples(((Walker) walker).getSampleDB())) {
trios = checkAndSetSamples(((Walker) walker).getSampleDB());
if ( trios.size() > 0 ) {
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
}
else {
@ -52,15 +48,12 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
}
}
Map<String,Object> toRet = new HashMap<String,Object>(1);
Map<String,Object> attributeMap = new HashMap<String,Object>(1);
//double pNoMV = 1.0;
double maxMVLR = Double.MIN_VALUE;
for ( Trio trio : trios ) {
boolean hasAppropriateGenotypes = vc.hasGenotype(trio.motherId) && vc.getGenotype(trio.motherId).hasLikelihoods() &&
vc.hasGenotype(trio.fatherId) && vc.getGenotype(trio.fatherId).hasLikelihoods() &&
vc.hasGenotype(trio.childId) && vc.getGenotype(trio.childId).hasLikelihoods();
if ( hasAppropriateGenotypes ) {
Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.motherId,trio.fatherId,trio.childId);
if ( contextHasTrioLikelihoods(vc,trio) ) {
Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.getMaternalID(),trio.getPaternalID(),trio.childId);
maxMVLR = likR > maxMVLR ? likR : maxMVLR;
//pNoMV *= (1.0-Math.pow(10.0,likR)/(1+Math.pow(10.0,likR)));
}
@ -68,34 +61,79 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
//double pSomeMV = 1.0-pNoMV;
//toRet.put("MVLR",Math.log10(pSomeMV)-Math.log10(1.0-pSomeMV));
toRet.put("MVLR",maxMVLR);
return toRet;
if ( Double.compare(maxMVLR,Double.MIN_VALUE) != 0 )
attributeMap.put(MVLR_KEY,maxMVLR);
return attributeMap;
}
// return the descriptions used for the VCF INFO meta field
public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
public List<String> getKeyNames() { return Arrays.asList(MVLR_KEY); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
private boolean checkAndSetSamples(SampleDB db){
trios = new HashSet<Trio>();
Set<String> families = db.getFamilyIDs();
for ( String familyString : families ) {
private Set<Trio> checkAndSetSamples(SampleDB db){
Set<Trio> trioSet = new HashSet<Trio>();
for ( String familyString : db.getFamilyIDs() ) {
Set<Sample> family = db.getFamily(familyString);
Iterator<Sample> sampleIterator = family.iterator();
Sample sample;
for ( sample = sampleIterator.next(); sampleIterator.hasNext(); sample=sampleIterator.next()) {
for ( Sample sample : family) {
if ( sample.getParents().size() == 2 ) {
Trio trio = new Trio();
trio.childId = sample.getID();
trio.fatherId = sample.getFather().getID();
trio.motherId = sample.getMother().getID();
trios.add(trio);
Trio trio = new Trio(sample.getMaternalID(),sample.getPaternalID(),sample.getID());
trioSet.add(trio);
}
}
}
return trios.size() > 0;
return trioSet;
}
private boolean contextHasTrioLikelihoods(VariantContext context, Trio trio) {
for ( String sample : trio ) {
if ( ! context.hasGenotype(sample) )
return false;
if ( ! context.getGenotype(sample).hasLikelihoods() )
return false;
}
return true;
}
private class Trio implements Iterable<String> {
private String maternalID;
private String paternalID;
private String childId;
public Trio(String mom, String dad, String child) {
this.maternalID = mom;
this.paternalID = dad;
this.childId = child;
}
public String getMaternalID() {
return this.maternalID;
}
public String getPaternalID() {
return this.paternalID;
}
public String getChildId() {
return this.childId;
}
public void setMaternalID(String id) {
this.maternalID = id;
}
public void setPaternalID(String id) {
this.paternalID = id;
}
public void setChildId(String id) {
this.childId = id;
}
public Iterator<String> iterator() {
return Arrays.asList(maternalID,paternalID,childId).iterator();
}
}
}

View File

@ -579,14 +579,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
}
private boolean badIndelSize(final VariantContext vc) {
if ( vc.getReference().length() > maxIndelSize ) {
return true;
}
for ( Allele a : vc.getAlternateAlleles() ) {
if ( a.length() > maxIndelSize ) {
for ( Integer indelLength : vc.getIndelLengths() ) {
if ( indelLength > maxIndelSize )
return true;
}
}
return false;