diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index 8aa961c75..a2a39da1f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; @@ -21,21 +20,17 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; /** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 9/14/11 - * Time: 12:24 PM + * Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation + * versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is + * diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than + * the strict 1-∏(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios. */ public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation { private MendelianViolation mendelianViolation = null; + public static final String MVLR_KEY = "MVLR"; private Set trios; - private class Trio { - String motherId; - String fatherId; - String childId; - } public Map annotate(final RefMetaDataTracker tracker, final AnnotatorCompatible walker, @@ -44,7 +39,8 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment final VariantContext vc, final Map stratifiedPerReadAlleleLikelihoodMap) { if ( mendelianViolation == null ) { - if (checkAndSetSamples(((Walker) walker).getSampleDB())) { + trios = checkAndSetSamples(((Walker) walker).getSampleDB()); + if ( trios.size() > 0 ) { mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP ); } else { @@ -52,15 +48,12 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment } } - Map toRet = new HashMap(1); + Map attributeMap = new HashMap(1); //double pNoMV = 1.0; double maxMVLR = Double.MIN_VALUE; for ( Trio trio : trios ) { - boolean hasAppropriateGenotypes = vc.hasGenotype(trio.motherId) && vc.getGenotype(trio.motherId).hasLikelihoods() && - vc.hasGenotype(trio.fatherId) && vc.getGenotype(trio.fatherId).hasLikelihoods() && - vc.hasGenotype(trio.childId) && vc.getGenotype(trio.childId).hasLikelihoods(); - if ( hasAppropriateGenotypes ) { - Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.motherId,trio.fatherId,trio.childId); + if ( contextHasTrioLikelihoods(vc,trio) ) { + Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.getMaternalID(),trio.getPaternalID(),trio.childId); maxMVLR = likR > maxMVLR ? likR : maxMVLR; //pNoMV *= (1.0-Math.pow(10.0,likR)/(1+Math.pow(10.0,likR))); } @@ -68,34 +61,79 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment //double pSomeMV = 1.0-pNoMV; //toRet.put("MVLR",Math.log10(pSomeMV)-Math.log10(1.0-pSomeMV)); - toRet.put("MVLR",maxMVLR); - return toRet; + if ( Double.compare(maxMVLR,Double.MIN_VALUE) != 0 ) + attributeMap.put(MVLR_KEY,maxMVLR); + return attributeMap; } // return the descriptions used for the VCF INFO meta field - public List getKeyNames() { return Arrays.asList("MVLR"); } + public List getKeyNames() { return Arrays.asList(MVLR_KEY); } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); } + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); } - private boolean checkAndSetSamples(SampleDB db){ - trios = new HashSet(); - Set families = db.getFamilyIDs(); - for ( String familyString : families ) { + private Set checkAndSetSamples(SampleDB db){ + Set trioSet = new HashSet(); + for ( String familyString : db.getFamilyIDs() ) { Set family = db.getFamily(familyString); - Iterator sampleIterator = family.iterator(); - Sample sample; - for ( sample = sampleIterator.next(); sampleIterator.hasNext(); sample=sampleIterator.next()) { + for ( Sample sample : family) { if ( sample.getParents().size() == 2 ) { - Trio trio = new Trio(); - trio.childId = sample.getID(); - trio.fatherId = sample.getFather().getID(); - trio.motherId = sample.getMother().getID(); - trios.add(trio); + Trio trio = new Trio(sample.getMaternalID(),sample.getPaternalID(),sample.getID()); + trioSet.add(trio); } } } - return trios.size() > 0; + return trioSet; } + private boolean contextHasTrioLikelihoods(VariantContext context, Trio trio) { + for ( String sample : trio ) { + if ( ! context.hasGenotype(sample) ) + return false; + if ( ! context.getGenotype(sample).hasLikelihoods() ) + return false; + } + + return true; + } + + private class Trio implements Iterable { + private String maternalID; + private String paternalID; + private String childId; + + public Trio(String mom, String dad, String child) { + this.maternalID = mom; + this.paternalID = dad; + this.childId = child; + } + + public String getMaternalID() { + return this.maternalID; + } + + public String getPaternalID() { + return this.paternalID; + } + + public String getChildId() { + return this.childId; + } + + public void setMaternalID(String id) { + this.maternalID = id; + } + + public void setPaternalID(String id) { + this.paternalID = id; + } + + public void setChildId(String id) { + this.childId = id; + } + + public Iterator iterator() { + return Arrays.asList(maternalID,paternalID,childId).iterator(); + } + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index fc29a7f02..567262756 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -579,14 +579,9 @@ public class SelectVariants extends RodWalker implements TreeR } private boolean badIndelSize(final VariantContext vc) { - if ( vc.getReference().length() > maxIndelSize ) { - return true; - } - - for ( Allele a : vc.getAlternateAlleles() ) { - if ( a.length() > maxIndelSize ) { + for ( Integer indelLength : vc.getIndelLengths() ) { + if ( indelLength > maxIndelSize ) return true; - } } return false;