From 6cadaa84c977d0cecedb3871c354ecf152526fce Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 19 Oct 2011 11:48:23 -0400 Subject: [PATCH 2/7] Just use validate() from super class since it does the same thing --- .../utils/variantcontext/MutableGenotype.java | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableGenotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableGenotype.java index 0cd684cb6..14419a2a0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableGenotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableGenotype.java @@ -40,19 +40,7 @@ public class MutableGenotype extends Genotype { */ public void setAlleles(List alleles) { this.alleles = new ArrayList(alleles); - - // todo -- add validation checking here - - if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles"); - if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles"); - - int nNoCalls = 0; - for ( Allele allele : alleles ) { nNoCalls += allele.isNoCall() ? 1 : 0; } - if ( nNoCalls > 0 && nNoCalls != alleles.size() ) - throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this); - - for ( Allele allele : alleles ) - if ( allele == null ) throw new IllegalArgumentException("BUG: Cannot add a null allele to a genotype"); + validate(); } public void setPhase(boolean isPhased) { From 48c4a8cb335e9df5b336bc869a803a2f0b9e98ba Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 19 Oct 2011 11:49:16 -0400 Subject: [PATCH 3/7] Make error messages clearer (even I was confused) --- .../sting/utils/variantcontext/VariantContext.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index eac6d70b5..14a680af4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1357,10 +1357,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati throw new IllegalArgumentException("Duplicate allele added to VariantContext: " + a); } - // deal with the case where the first allele isn't the reference + // deal with the case where the first allele isn't the reference if ( a.isReference() ) { if ( sawRef ) - throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles); + throw new IllegalArgumentException("Alleles for a VariantContext must contain at most one reference allele: " + alleles); alleleList.add(0, a); sawRef = true; } @@ -1372,7 +1372,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati throw new IllegalArgumentException("Cannot create a VariantContext with an empty allele list"); if ( alleleList.get(0).isNonReference() ) - throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles); + throw new IllegalArgumentException("Alleles for a VariantContext must contain at least one reference allele: " + alleles); return alleleList; } From 5a6468c11ea1f3e72e4079f2073d66c0bff00e99 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 19 Oct 2011 11:52:05 -0400 Subject: [PATCH 4/7] Allowing ./X genotypes and adding a unit test to ensure that this case is covered from now on (especially given that we may want to revert in the future). Reverting this change is really easy and entails uncommenting a few lines of code. But for now, despite Mark's objections, this case is allowed in the VCF spec and we are wrong not to allow it. --- .../sting/utils/variantcontext/Genotype.java | 10 ++++++---- .../variantcontext/VariantContextUnitTest.java | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 7ab3f81f0..5639ef30e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -197,14 +197,16 @@ public class Genotype { if ( alleles == null ) return; if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0"); - int nNoCalls = 0; + // int nNoCalls = 0; for ( Allele allele : alleles ) { if ( allele == null ) throw new IllegalArgumentException("BUG: allele cannot be null in Genotype"); - nNoCalls += allele.isNoCall() ? 1 : 0; + // nNoCalls += allele.isNoCall() ? 1 : 0; } - if ( nNoCalls > 0 && nNoCalls != alleles.size() ) - throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this); + + // Technically, the spec does allow for the below case so this is not an illegal state + //if ( nNoCalls > 0 && nNoCalls != alleles.size() ) + // throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this); } public String getGenotypeString() { diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index 9026f09d6..a5060e54e 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -252,6 +252,23 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(vc.getSampleNames().size(), 0); } + @Test + public void testCreatingPartiallyCalledGenotype() { + List alleles = Arrays.asList(Aref, C); + Genotype g = new Genotype("foo", Arrays.asList(C, Allele.NO_CALL), 10); + VariantContext vc = new VariantContext("test", snpLoc, snpLocStart, snpLocStop, alleles, Arrays.asList(g)); + + Assert.assertTrue(vc.isSNP()); + Assert.assertEquals(vc.getNAlleles(), 2); + Assert.assertTrue(vc.hasGenotypes()); + Assert.assertFalse(vc.isMonomorphic()); + Assert.assertTrue(vc.isPolymorphic()); + Assert.assertEquals(vc.getGenotype("foo"), g); + Assert.assertEquals(vc.getChromosomeCount(), 2); // we know that there are 2 chromosomes, even though one isn't called + Assert.assertEquals(vc.getChromosomeCount(Aref), 0); + Assert.assertEquals(vc.getChromosomeCount(C), 1); + } + @Test (expectedExceptions = IllegalArgumentException.class) public void testBadConstructorArgs1() { new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATCref)); From d8d73fe4f2ffef079edd495066ae499de14a919a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 19 Oct 2011 15:11:13 -0400 Subject: [PATCH 5/7] Treat ./X genotypes as MIXED so that isHet, isHom, etc. still return the expected and correct values. Added docs to these accessors with contracts explicitly mentioned. Fixed case where NPE could be thrown. --- .../sting/utils/variantcontext/Genotype.java | 61 +++++++++++++++---- .../utils/variantcontext/VariantContext.java | 11 +++- .../VariantContextUnitTest.java | 6 ++ 3 files changed, 65 insertions(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 5639ef30e..e2e44e2b9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -108,14 +108,19 @@ public class Genotype { /** * @return the ploidy of this genotype */ - public int getPloidy() { return alleles.size(); } + public int getPloidy() { + if ( alleles == null ) + throw new ReviewedStingException("Requesting ploidy for an UNAVAILABLE genotype"); + return alleles.size(); + } public enum Type { NO_CALL, HOM_REF, HET, HOM_VAR, - UNAVAILABLE + UNAVAILABLE, + MIXED // no-call and call in the same genotype } public Type getType() { @@ -129,36 +134,68 @@ public class Genotype { if ( alleles == null ) return Type.UNAVAILABLE; - Allele firstAllele = alleles.get(0); + boolean sawNoCall = false, sawMultipleAlleles = false; + Allele observedAllele = null; - if ( firstAllele.isNoCall() ) { - return Type.NO_CALL; + for ( Allele allele : alleles ) { + if ( allele.isNoCall() ) + sawNoCall = true; + else if ( observedAllele == null ) + observedAllele = allele; + else if ( !allele.equals(observedAllele) ) + sawMultipleAlleles = true; } - for (Allele a : alleles) { - if ( ! firstAllele.equals(a) ) - return Type.HET; + if ( sawNoCall ) { + if ( observedAllele == null ) + return Type.NO_CALL; + return Type.MIXED; } - return firstAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR; + + if ( observedAllele == null ) + throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null"); + + return sawMultipleAlleles ? Type.HET : observedAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR; } /** - * @return true if all observed alleles are the same (regardless of whether they are ref or alt) + * @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false. */ public boolean isHom() { return isHomRef() || isHomVar(); } + + /** + * @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false. + */ public boolean isHomRef() { return getType() == Type.HOM_REF; } + + /** + * @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false. + */ public boolean isHomVar() { return getType() == Type.HOM_VAR; } /** - * @return true if we're het (observed alleles differ) + * @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false. */ public boolean isHet() { return getType() == Type.HET; } /** - * @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF) + * @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false. */ public boolean isNoCall() { return getType() == Type.NO_CALL; } + + /** + * @return true if this genotype is comprised of any alleles that are not no-calls (even if some are). + */ public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; } + + /** + * @return true if this genotype is comprised of both calls and no-calls. + */ + public boolean isMixed() { return getType() == Type.MIXED; } + + /** + * @return true if the type of this genotype is set. + */ public boolean isAvailable() { return getType() != Type.UNAVAILABLE; } // diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 14a680af4..f52a7087b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -998,7 +998,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati else if ( g.isHomVar() ) genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]++; else - throw new IllegalStateException("Genotype of unknown type: " + g); + genotypeCounts[Genotype.Type.MIXED.ordinal()]++; } } } @@ -1042,6 +1042,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati return genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]; } + /** + * Genotype-specific functions -- how many mixed calls are there in the genotypes? + * + * @return number of mixed calls + */ + public int getMixedCount() { + return genotypeCounts[Genotype.Type.MIXED.ordinal()]; + } + // --------------------------------------------------------------------------------------------------------- // // validation: extra-strict validation routines for paranoid users diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index a5060e54e..a4d78b637 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -267,6 +267,12 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(vc.getChromosomeCount(), 2); // we know that there are 2 chromosomes, even though one isn't called Assert.assertEquals(vc.getChromosomeCount(Aref), 0); Assert.assertEquals(vc.getChromosomeCount(C), 1); + Assert.assertFalse(vc.getGenotype("foo").isHet()); + Assert.assertFalse(vc.getGenotype("foo").isHom()); + Assert.assertFalse(vc.getGenotype("foo").isNoCall()); + Assert.assertFalse(vc.getGenotype("foo").isHom()); + Assert.assertTrue(vc.getGenotype("foo").isMixed()); + Assert.assertEquals(vc.getGenotype("foo").getType(), Genotype.Type.MIXED); } @Test (expectedExceptions = IllegalArgumentException.class) From cd8a6d62bba320d276455d93b73290a61d6a203c Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Wed, 19 Oct 2011 17:42:37 -0400 Subject: [PATCH 6/7] You know how the wiki has a big section on commiting local changes to BRANCHES of the repository you clone it from? Yeah. It sucks if you don't do that. This commit contains: - IntronLossGenotyper is brought into its current incarnation - A couple of simple new filters (ReadName is super useful for debugging, MateUnmapped is useful for selecting out reads that may have a relevant unaligned mate) - RFA now matches my current local repository. It's in flux since I'm transitioning to the new traversal type. + the triggering read stash pilot required me to change the scope of some of the variables in the ReadClipping code, private -> protected. Those are all the changes there. - MendelianViolation restored to its former glory (and an annotator module that uses the likelihood calculation has been added) + use this rather than a hard GQ threshold if you're doing MV analyses. - Some miscellaneous QScripts --- .../sting/gatk/filters/ReadNameFilter.java | 23 ++++++++ .../walkers/annotator/MVLikelihoodRatio.java | 58 +++++++++++++++++++ .../sting/utils/MendelianViolation.java | 42 ++++++++++++++ 3 files changed, 123 insertions(+) create mode 100755 public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java new file mode 100755 index 000000000..a56af56d1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java @@ -0,0 +1,23 @@ +package org.broadinstitute.sting.gatk.filters; + +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 9/19/11 + * Time: 4:09 PM + * To change this template use File | Settings | File Templates. + */ +public class ReadNameFilter extends ReadFilter { + @Argument(fullName = "readName", shortName = "rn", doc="Filter out all reads except those with this read name", required=true) + private String readName; + + public boolean filterOut(final SAMRecord rec) { + return ! rec.getReadName().equals(readName); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java new file mode 100755 index 000000000..e0a2329f8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -0,0 +1,58 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MendelianViolation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 9/14/11 + * Time: 12:24 PM + * To change this template use File | Settings | File Templates. + */ +public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation { + + private MendelianViolation mendelianViolation = null; + + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + if ( mendelianViolation == null ) { + if ( walker instanceof VariantAnnotator ) { + mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP ); + } + else { + throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator"); + } + } + + Map toRet = new HashMap(1); + boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && + vc.hasGenotype(mendelianViolation.getSampleDad()) && + vc.hasGenotype(mendelianViolation.getSampleMom()); + if ( hasAppropriateGenotypes ) + toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc)); + + return toRet; + } + + // return the descriptions used for the VCF INFO meta field + public List getKeyNames() { return Arrays.asList("MVLR"); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index e62a7e512..cf45dab79 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -26,6 +26,9 @@ public class MendelianViolation { double minGenotypeQuality; + static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 }; + static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 }; + private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); public String getSampleMom() { @@ -134,4 +137,43 @@ public class MendelianViolation { return false; return true; } + + /** + * @return the likelihood ratio for a mendelian violation + */ + public double violationLikelihoodRatio(VariantContext vc) { + double[] logLikAssignments = new double[27]; + // the matrix to set up is + // MOM DAD CHILD + // |- AA + // AA AA | AB + // |- BB + // |- AA + // AA AB | AB + // |- BB + // etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs + double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector(); + double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector(); + double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector(); + int offset = 0; + for ( int oMom = 0; oMom < 3; oMom++ ) { + for ( int oDad = 0; oDad < 3; oDad++ ) { + for ( int oChild = 0; oChild < 3; oChild ++ ) { + logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild]; + } + } + } + double[] mvLiks = new double[12]; + double[] nonMVLiks = new double[15]; + for ( int i = 0; i < 12; i ++ ) { + mvLiks[i] = logLikAssignments[mvOffsets[i]]; + } + + for ( int i = 0; i < 15; i++) { + nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]]; + } + + return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks); + } + }