diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java new file mode 100755 index 000000000..a56af56d1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java @@ -0,0 +1,23 @@ +package org.broadinstitute.sting.gatk.filters; + +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 9/19/11 + * Time: 4:09 PM + * To change this template use File | Settings | File Templates. + */ +public class ReadNameFilter extends ReadFilter { + @Argument(fullName = "readName", shortName = "rn", doc="Filter out all reads except those with this read name", required=true) + private String readName; + + public boolean filterOut(final SAMRecord rec) { + return ! rec.getReadName().equals(readName); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java new file mode 100755 index 000000000..e0a2329f8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -0,0 +1,58 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MendelianViolation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 9/14/11 + * Time: 12:24 PM + * To change this template use File | Settings | File Templates. + */ +public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation { + + private MendelianViolation mendelianViolation = null; + + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + if ( mendelianViolation == null ) { + if ( walker instanceof VariantAnnotator ) { + mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP ); + } + else { + throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator"); + } + } + + Map toRet = new HashMap(1); + boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && + vc.hasGenotype(mendelianViolation.getSampleDad()) && + vc.hasGenotype(mendelianViolation.getSampleMom()); + if ( hasAppropriateGenotypes ) + toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc)); + + return toRet; + } + + // return the descriptions used for the VCF INFO meta field + public List getKeyNames() { return Arrays.asList("MVLR"); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index e62a7e512..cf45dab79 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -26,6 +26,9 @@ public class MendelianViolation { double minGenotypeQuality; + static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 }; + static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 }; + private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); public String getSampleMom() { @@ -134,4 +137,43 @@ public class MendelianViolation { return false; return true; } + + /** + * @return the likelihood ratio for a mendelian violation + */ + public double violationLikelihoodRatio(VariantContext vc) { + double[] logLikAssignments = new double[27]; + // the matrix to set up is + // MOM DAD CHILD + // |- AA + // AA AA | AB + // |- BB + // |- AA + // AA AB | AB + // |- BB + // etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs + double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector(); + double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector(); + double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector(); + int offset = 0; + for ( int oMom = 0; oMom < 3; oMom++ ) { + for ( int oDad = 0; oDad < 3; oDad++ ) { + for ( int oChild = 0; oChild < 3; oChild ++ ) { + logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild]; + } + } + } + double[] mvLiks = new double[12]; + double[] nonMVLiks = new double[15]; + for ( int i = 0; i < 12; i ++ ) { + mvLiks[i] = logLikAssignments[mvOffsets[i]]; + } + + for ( int i = 0; i < 15; i++) { + nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]]; + } + + return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks); + } + } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 7ab3f81f0..e2e44e2b9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -108,14 +108,19 @@ public class Genotype { /** * @return the ploidy of this genotype */ - public int getPloidy() { return alleles.size(); } + public int getPloidy() { + if ( alleles == null ) + throw new ReviewedStingException("Requesting ploidy for an UNAVAILABLE genotype"); + return alleles.size(); + } public enum Type { NO_CALL, HOM_REF, HET, HOM_VAR, - UNAVAILABLE + UNAVAILABLE, + MIXED // no-call and call in the same genotype } public Type getType() { @@ -129,36 +134,68 @@ public class Genotype { if ( alleles == null ) return Type.UNAVAILABLE; - Allele firstAllele = alleles.get(0); + boolean sawNoCall = false, sawMultipleAlleles = false; + Allele observedAllele = null; - if ( firstAllele.isNoCall() ) { - return Type.NO_CALL; + for ( Allele allele : alleles ) { + if ( allele.isNoCall() ) + sawNoCall = true; + else if ( observedAllele == null ) + observedAllele = allele; + else if ( !allele.equals(observedAllele) ) + sawMultipleAlleles = true; } - for (Allele a : alleles) { - if ( ! firstAllele.equals(a) ) - return Type.HET; + if ( sawNoCall ) { + if ( observedAllele == null ) + return Type.NO_CALL; + return Type.MIXED; } - return firstAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR; + + if ( observedAllele == null ) + throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null"); + + return sawMultipleAlleles ? Type.HET : observedAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR; } /** - * @return true if all observed alleles are the same (regardless of whether they are ref or alt) + * @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false. */ public boolean isHom() { return isHomRef() || isHomVar(); } + + /** + * @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false. + */ public boolean isHomRef() { return getType() == Type.HOM_REF; } + + /** + * @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false. + */ public boolean isHomVar() { return getType() == Type.HOM_VAR; } /** - * @return true if we're het (observed alleles differ) + * @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false. */ public boolean isHet() { return getType() == Type.HET; } /** - * @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF) + * @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false. */ public boolean isNoCall() { return getType() == Type.NO_CALL; } + + /** + * @return true if this genotype is comprised of any alleles that are not no-calls (even if some are). + */ public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; } + + /** + * @return true if this genotype is comprised of both calls and no-calls. + */ + public boolean isMixed() { return getType() == Type.MIXED; } + + /** + * @return true if the type of this genotype is set. + */ public boolean isAvailable() { return getType() != Type.UNAVAILABLE; } // @@ -197,14 +234,16 @@ public class Genotype { if ( alleles == null ) return; if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0"); - int nNoCalls = 0; + // int nNoCalls = 0; for ( Allele allele : alleles ) { if ( allele == null ) throw new IllegalArgumentException("BUG: allele cannot be null in Genotype"); - nNoCalls += allele.isNoCall() ? 1 : 0; + // nNoCalls += allele.isNoCall() ? 1 : 0; } - if ( nNoCalls > 0 && nNoCalls != alleles.size() ) - throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this); + + // Technically, the spec does allow for the below case so this is not an illegal state + //if ( nNoCalls > 0 && nNoCalls != alleles.size() ) + // throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this); } public String getGenotypeString() { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableGenotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableGenotype.java index 0cd684cb6..14419a2a0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableGenotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableGenotype.java @@ -40,19 +40,7 @@ public class MutableGenotype extends Genotype { */ public void setAlleles(List alleles) { this.alleles = new ArrayList(alleles); - - // todo -- add validation checking here - - if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles"); - if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles"); - - int nNoCalls = 0; - for ( Allele allele : alleles ) { nNoCalls += allele.isNoCall() ? 1 : 0; } - if ( nNoCalls > 0 && nNoCalls != alleles.size() ) - throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this); - - for ( Allele allele : alleles ) - if ( allele == null ) throw new IllegalArgumentException("BUG: Cannot add a null allele to a genotype"); + validate(); } public void setPhase(boolean isPhased) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index eac6d70b5..f52a7087b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -998,7 +998,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati else if ( g.isHomVar() ) genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]++; else - throw new IllegalStateException("Genotype of unknown type: " + g); + genotypeCounts[Genotype.Type.MIXED.ordinal()]++; } } } @@ -1042,6 +1042,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati return genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]; } + /** + * Genotype-specific functions -- how many mixed calls are there in the genotypes? + * + * @return number of mixed calls + */ + public int getMixedCount() { + return genotypeCounts[Genotype.Type.MIXED.ordinal()]; + } + // --------------------------------------------------------------------------------------------------------- // // validation: extra-strict validation routines for paranoid users @@ -1357,10 +1366,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati throw new IllegalArgumentException("Duplicate allele added to VariantContext: " + a); } - // deal with the case where the first allele isn't the reference + // deal with the case where the first allele isn't the reference if ( a.isReference() ) { if ( sawRef ) - throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles); + throw new IllegalArgumentException("Alleles for a VariantContext must contain at most one reference allele: " + alleles); alleleList.add(0, a); sawRef = true; } @@ -1372,7 +1381,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati throw new IllegalArgumentException("Cannot create a VariantContext with an empty allele list"); if ( alleleList.get(0).isNonReference() ) - throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles); + throw new IllegalArgumentException("Alleles for a VariantContext must contain at least one reference allele: " + alleles); return alleleList; } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index 9026f09d6..a4d78b637 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -252,6 +252,29 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(vc.getSampleNames().size(), 0); } + @Test + public void testCreatingPartiallyCalledGenotype() { + List alleles = Arrays.asList(Aref, C); + Genotype g = new Genotype("foo", Arrays.asList(C, Allele.NO_CALL), 10); + VariantContext vc = new VariantContext("test", snpLoc, snpLocStart, snpLocStop, alleles, Arrays.asList(g)); + + Assert.assertTrue(vc.isSNP()); + Assert.assertEquals(vc.getNAlleles(), 2); + Assert.assertTrue(vc.hasGenotypes()); + Assert.assertFalse(vc.isMonomorphic()); + Assert.assertTrue(vc.isPolymorphic()); + Assert.assertEquals(vc.getGenotype("foo"), g); + Assert.assertEquals(vc.getChromosomeCount(), 2); // we know that there are 2 chromosomes, even though one isn't called + Assert.assertEquals(vc.getChromosomeCount(Aref), 0); + Assert.assertEquals(vc.getChromosomeCount(C), 1); + Assert.assertFalse(vc.getGenotype("foo").isHet()); + Assert.assertFalse(vc.getGenotype("foo").isHom()); + Assert.assertFalse(vc.getGenotype("foo").isNoCall()); + Assert.assertFalse(vc.getGenotype("foo").isHom()); + Assert.assertTrue(vc.getGenotype("foo").isMixed()); + Assert.assertEquals(vc.getGenotype("foo").getType(), Genotype.Type.MIXED); + } + @Test (expectedExceptions = IllegalArgumentException.class) public void testBadConstructorArgs1() { new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATCref));