From cd8a6d62bba320d276455d93b73290a61d6a203c Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Wed, 19 Oct 2011 17:42:37 -0400 Subject: [PATCH] You know how the wiki has a big section on commiting local changes to BRANCHES of the repository you clone it from? Yeah. It sucks if you don't do that. This commit contains: - IntronLossGenotyper is brought into its current incarnation - A couple of simple new filters (ReadName is super useful for debugging, MateUnmapped is useful for selecting out reads that may have a relevant unaligned mate) - RFA now matches my current local repository. It's in flux since I'm transitioning to the new traversal type. + the triggering read stash pilot required me to change the scope of some of the variables in the ReadClipping code, private -> protected. Those are all the changes there. - MendelianViolation restored to its former glory (and an annotator module that uses the likelihood calculation has been added) + use this rather than a hard GQ threshold if you're doing MV analyses. - Some miscellaneous QScripts --- .../sting/gatk/filters/ReadNameFilter.java | 23 ++++++++ .../walkers/annotator/MVLikelihoodRatio.java | 58 +++++++++++++++++++ .../sting/utils/MendelianViolation.java | 42 ++++++++++++++ 3 files changed, 123 insertions(+) create mode 100755 public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java new file mode 100755 index 000000000..a56af56d1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java @@ -0,0 +1,23 @@ +package org.broadinstitute.sting.gatk.filters; + +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 9/19/11 + * Time: 4:09 PM + * To change this template use File | Settings | File Templates. + */ +public class ReadNameFilter extends ReadFilter { + @Argument(fullName = "readName", shortName = "rn", doc="Filter out all reads except those with this read name", required=true) + private String readName; + + public boolean filterOut(final SAMRecord rec) { + return ! rec.getReadName().equals(readName); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java new file mode 100755 index 000000000..e0a2329f8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -0,0 +1,58 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MendelianViolation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 9/14/11 + * Time: 12:24 PM + * To change this template use File | Settings | File Templates. + */ +public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation { + + private MendelianViolation mendelianViolation = null; + + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + if ( mendelianViolation == null ) { + if ( walker instanceof VariantAnnotator ) { + mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP ); + } + else { + throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator"); + } + } + + Map toRet = new HashMap(1); + boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && + vc.hasGenotype(mendelianViolation.getSampleDad()) && + vc.hasGenotype(mendelianViolation.getSampleMom()); + if ( hasAppropriateGenotypes ) + toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc)); + + return toRet; + } + + // return the descriptions used for the VCF INFO meta field + public List getKeyNames() { return Arrays.asList("MVLR"); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index e62a7e512..cf45dab79 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -26,6 +26,9 @@ public class MendelianViolation { double minGenotypeQuality; + static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 }; + static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 }; + private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); public String getSampleMom() { @@ -134,4 +137,43 @@ public class MendelianViolation { return false; return true; } + + /** + * @return the likelihood ratio for a mendelian violation + */ + public double violationLikelihoodRatio(VariantContext vc) { + double[] logLikAssignments = new double[27]; + // the matrix to set up is + // MOM DAD CHILD + // |- AA + // AA AA | AB + // |- BB + // |- AA + // AA AB | AB + // |- BB + // etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs + double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector(); + double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector(); + double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector(); + int offset = 0; + for ( int oMom = 0; oMom < 3; oMom++ ) { + for ( int oDad = 0; oDad < 3; oDad++ ) { + for ( int oChild = 0; oChild < 3; oChild ++ ) { + logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild]; + } + } + } + double[] mvLiks = new double[12]; + double[] nonMVLiks = new double[15]; + for ( int i = 0; i < 12; i ++ ) { + mvLiks[i] = logLikAssignments[mvOffsets[i]]; + } + + for ( int i = 0; i < 15; i++) { + nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]]; + } + + return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks); + } + }