Code cleanup in MVLR and SelectVariants. Should fix JIRA GSA-509 and GSA-510
This commit is contained in:
parent
0545664f91
commit
752f44c332
|
|
@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
|
|
@ -21,21 +20,17 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 9/14/11
|
||||
* Time: 12:24 PM
|
||||
* Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation
|
||||
* versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is
|
||||
* diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than
|
||||
* the strict 1-∏(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios.
|
||||
*/
|
||||
|
||||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
|
||||
|
||||
private MendelianViolation mendelianViolation = null;
|
||||
public static final String MVLR_KEY = "MVLR";
|
||||
private Set<Trio> trios;
|
||||
private class Trio {
|
||||
String motherId;
|
||||
String fatherId;
|
||||
String childId;
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
|
|
@ -44,7 +39,8 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
|||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( mendelianViolation == null ) {
|
||||
if (checkAndSetSamples(((Walker) walker).getSampleDB())) {
|
||||
trios = checkAndSetSamples(((Walker) walker).getSampleDB());
|
||||
if ( trios.size() > 0 ) {
|
||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||
}
|
||||
else {
|
||||
|
|
@ -52,15 +48,12 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
|||
}
|
||||
}
|
||||
|
||||
Map<String,Object> toRet = new HashMap<String,Object>(1);
|
||||
Map<String,Object> attributeMap = new HashMap<String,Object>(1);
|
||||
//double pNoMV = 1.0;
|
||||
double maxMVLR = Double.MIN_VALUE;
|
||||
for ( Trio trio : trios ) {
|
||||
boolean hasAppropriateGenotypes = vc.hasGenotype(trio.motherId) && vc.getGenotype(trio.motherId).hasLikelihoods() &&
|
||||
vc.hasGenotype(trio.fatherId) && vc.getGenotype(trio.fatherId).hasLikelihoods() &&
|
||||
vc.hasGenotype(trio.childId) && vc.getGenotype(trio.childId).hasLikelihoods();
|
||||
if ( hasAppropriateGenotypes ) {
|
||||
Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.motherId,trio.fatherId,trio.childId);
|
||||
if ( contextHasTrioLikelihoods(vc,trio) ) {
|
||||
Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.getMaternalID(),trio.getPaternalID(),trio.childId);
|
||||
maxMVLR = likR > maxMVLR ? likR : maxMVLR;
|
||||
//pNoMV *= (1.0-Math.pow(10.0,likR)/(1+Math.pow(10.0,likR)));
|
||||
}
|
||||
|
|
@ -68,34 +61,79 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
|||
|
||||
//double pSomeMV = 1.0-pNoMV;
|
||||
//toRet.put("MVLR",Math.log10(pSomeMV)-Math.log10(1.0-pSomeMV));
|
||||
toRet.put("MVLR",maxMVLR);
|
||||
return toRet;
|
||||
if ( Double.compare(maxMVLR,Double.MIN_VALUE) != 0 )
|
||||
attributeMap.put(MVLR_KEY,maxMVLR);
|
||||
return attributeMap;
|
||||
}
|
||||
|
||||
// return the descriptions used for the VCF INFO meta field
|
||||
public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(MVLR_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
||||
|
||||
private boolean checkAndSetSamples(SampleDB db){
|
||||
trios = new HashSet<Trio>();
|
||||
Set<String> families = db.getFamilyIDs();
|
||||
for ( String familyString : families ) {
|
||||
private Set<Trio> checkAndSetSamples(SampleDB db){
|
||||
Set<Trio> trioSet = new HashSet<Trio>();
|
||||
for ( String familyString : db.getFamilyIDs() ) {
|
||||
Set<Sample> family = db.getFamily(familyString);
|
||||
Iterator<Sample> sampleIterator = family.iterator();
|
||||
Sample sample;
|
||||
for ( sample = sampleIterator.next(); sampleIterator.hasNext(); sample=sampleIterator.next()) {
|
||||
for ( Sample sample : family) {
|
||||
if ( sample.getParents().size() == 2 ) {
|
||||
Trio trio = new Trio();
|
||||
trio.childId = sample.getID();
|
||||
trio.fatherId = sample.getFather().getID();
|
||||
trio.motherId = sample.getMother().getID();
|
||||
trios.add(trio);
|
||||
Trio trio = new Trio(sample.getMaternalID(),sample.getPaternalID(),sample.getID());
|
||||
trioSet.add(trio);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return trios.size() > 0;
|
||||
return trioSet;
|
||||
}
|
||||
|
||||
private boolean contextHasTrioLikelihoods(VariantContext context, Trio trio) {
|
||||
for ( String sample : trio ) {
|
||||
if ( ! context.hasGenotype(sample) )
|
||||
return false;
|
||||
if ( ! context.getGenotype(sample).hasLikelihoods() )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private class Trio implements Iterable<String> {
|
||||
private String maternalID;
|
||||
private String paternalID;
|
||||
private String childId;
|
||||
|
||||
public Trio(String mom, String dad, String child) {
|
||||
this.maternalID = mom;
|
||||
this.paternalID = dad;
|
||||
this.childId = child;
|
||||
}
|
||||
|
||||
public String getMaternalID() {
|
||||
return this.maternalID;
|
||||
}
|
||||
|
||||
public String getPaternalID() {
|
||||
return this.paternalID;
|
||||
}
|
||||
|
||||
public String getChildId() {
|
||||
return this.childId;
|
||||
}
|
||||
|
||||
public void setMaternalID(String id) {
|
||||
this.maternalID = id;
|
||||
}
|
||||
|
||||
public void setPaternalID(String id) {
|
||||
this.paternalID = id;
|
||||
}
|
||||
|
||||
public void setChildId(String id) {
|
||||
this.childId = id;
|
||||
}
|
||||
|
||||
public Iterator<String> iterator() {
|
||||
return Arrays.asList(maternalID,paternalID,childId).iterator();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -579,14 +579,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
}
|
||||
|
||||
private boolean badIndelSize(final VariantContext vc) {
|
||||
if ( vc.getReference().length() > maxIndelSize ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for ( Allele a : vc.getAlternateAlleles() ) {
|
||||
if ( a.length() > maxIndelSize ) {
|
||||
for ( Integer indelLength : vc.getIndelLengths() ) {
|
||||
if ( indelLength > maxIndelSize )
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
|||
Loading…
Reference in New Issue