Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Mauricio Carneiro 2011-10-20 16:23:32 -04:00
commit 558a7a81f0
7 changed files with 215 additions and 33 deletions

View File

@ -0,0 +1,23 @@
package org.broadinstitute.sting.gatk.filters;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: 9/19/11
* Time: 4:09 PM
* To change this template use File | Settings | File Templates.
*/
public class ReadNameFilter extends ReadFilter {
@Argument(fullName = "readName", shortName = "rn", doc="Filter out all reads except those with this read name", required=true)
private String readName;
public boolean filterOut(final SAMRecord rec) {
return ! rec.getReadName().equals(readName);
}
}

View File

@ -0,0 +1,58 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: 9/14/11
* Time: 12:24 PM
* To change this template use File | Settings | File Templates.
*/
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
private MendelianViolation mendelianViolation = null;
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( mendelianViolation == null ) {
if ( walker instanceof VariantAnnotator ) {
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP );
}
else {
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator");
}
}
Map<String,Object> toRet = new HashMap<String,Object>(1);
boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) &&
vc.hasGenotype(mendelianViolation.getSampleDad()) &&
vc.hasGenotype(mendelianViolation.getSampleMom());
if ( hasAppropriateGenotypes )
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc));
return toRet;
}
// return the descriptions used for the VCF INFO meta field
public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
}

View File

@ -26,6 +26,9 @@ public class MendelianViolation {
double minGenotypeQuality;
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
public String getSampleMom() {
@ -134,4 +137,43 @@ public class MendelianViolation {
return false;
return true;
}
/**
* @return the likelihood ratio for a mendelian violation
*/
public double violationLikelihoodRatio(VariantContext vc) {
double[] logLikAssignments = new double[27];
// the matrix to set up is
// MOM DAD CHILD
// |- AA
// AA AA | AB
// |- BB
// |- AA
// AA AB | AB
// |- BB
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
int offset = 0;
for ( int oMom = 0; oMom < 3; oMom++ ) {
for ( int oDad = 0; oDad < 3; oDad++ ) {
for ( int oChild = 0; oChild < 3; oChild ++ ) {
logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild];
}
}
}
double[] mvLiks = new double[12];
double[] nonMVLiks = new double[15];
for ( int i = 0; i < 12; i ++ ) {
mvLiks[i] = logLikAssignments[mvOffsets[i]];
}
for ( int i = 0; i < 15; i++) {
nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]];
}
return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks);
}
}

View File

@ -108,14 +108,19 @@ public class Genotype {
/**
* @return the ploidy of this genotype
*/
public int getPloidy() { return alleles.size(); }
public int getPloidy() {
if ( alleles == null )
throw new ReviewedStingException("Requesting ploidy for an UNAVAILABLE genotype");
return alleles.size();
}
public enum Type {
NO_CALL,
HOM_REF,
HET,
HOM_VAR,
UNAVAILABLE
UNAVAILABLE,
MIXED // no-call and call in the same genotype
}
public Type getType() {
@ -129,36 +134,68 @@ public class Genotype {
if ( alleles == null )
return Type.UNAVAILABLE;
Allele firstAllele = alleles.get(0);
boolean sawNoCall = false, sawMultipleAlleles = false;
Allele observedAllele = null;
if ( firstAllele.isNoCall() ) {
return Type.NO_CALL;
for ( Allele allele : alleles ) {
if ( allele.isNoCall() )
sawNoCall = true;
else if ( observedAllele == null )
observedAllele = allele;
else if ( !allele.equals(observedAllele) )
sawMultipleAlleles = true;
}
for (Allele a : alleles) {
if ( ! firstAllele.equals(a) )
return Type.HET;
if ( sawNoCall ) {
if ( observedAllele == null )
return Type.NO_CALL;
return Type.MIXED;
}
return firstAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR;
if ( observedAllele == null )
throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null");
return sawMultipleAlleles ? Type.HET : observedAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR;
}
/**
* @return true if all observed alleles are the same (regardless of whether they are ref or alt)
* @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false.
*/
public boolean isHom() { return isHomRef() || isHomVar(); }
/**
* @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
*/
public boolean isHomRef() { return getType() == Type.HOM_REF; }
/**
* @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
*/
public boolean isHomVar() { return getType() == Type.HOM_VAR; }
/**
* @return true if we're het (observed alleles differ)
* @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
*/
public boolean isHet() { return getType() == Type.HET; }
/**
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF)
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
*/
public boolean isNoCall() { return getType() == Type.NO_CALL; }
/**
* @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
*/
public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; }
/**
* @return true if this genotype is comprised of both calls and no-calls.
*/
public boolean isMixed() { return getType() == Type.MIXED; }
/**
* @return true if the type of this genotype is set.
*/
public boolean isAvailable() { return getType() != Type.UNAVAILABLE; }
//
@ -197,14 +234,16 @@ public class Genotype {
if ( alleles == null ) return;
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0");
int nNoCalls = 0;
// int nNoCalls = 0;
for ( Allele allele : alleles ) {
if ( allele == null )
throw new IllegalArgumentException("BUG: allele cannot be null in Genotype");
nNoCalls += allele.isNoCall() ? 1 : 0;
// nNoCalls += allele.isNoCall() ? 1 : 0;
}
if ( nNoCalls > 0 && nNoCalls != alleles.size() )
throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
// Technically, the spec does allow for the below case so this is not an illegal state
//if ( nNoCalls > 0 && nNoCalls != alleles.size() )
// throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
}
public String getGenotypeString() {

View File

@ -40,19 +40,7 @@ public class MutableGenotype extends Genotype {
*/
public void setAlleles(List<Allele> alleles) {
this.alleles = new ArrayList<Allele>(alleles);
// todo -- add validation checking here
if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles");
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles");
int nNoCalls = 0;
for ( Allele allele : alleles ) { nNoCalls += allele.isNoCall() ? 1 : 0; }
if ( nNoCalls > 0 && nNoCalls != alleles.size() )
throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
for ( Allele allele : alleles )
if ( allele == null ) throw new IllegalArgumentException("BUG: Cannot add a null allele to a genotype");
validate();
}
public void setPhase(boolean isPhased) {

View File

@ -998,7 +998,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
else if ( g.isHomVar() )
genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]++;
else
throw new IllegalStateException("Genotype of unknown type: " + g);
genotypeCounts[Genotype.Type.MIXED.ordinal()]++;
}
}
}
@ -1042,6 +1042,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati
return genotypeCounts[Genotype.Type.HOM_VAR.ordinal()];
}
/**
* Genotype-specific functions -- how many mixed calls are there in the genotypes?
*
* @return number of mixed calls
*/
public int getMixedCount() {
return genotypeCounts[Genotype.Type.MIXED.ordinal()];
}
// ---------------------------------------------------------------------------------------------------------
//
// validation: extra-strict validation routines for paranoid users
@ -1357,10 +1366,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
throw new IllegalArgumentException("Duplicate allele added to VariantContext: " + a);
}
// deal with the case where the first allele isn't the reference
// deal with the case where the first allele isn't the reference
if ( a.isReference() ) {
if ( sawRef )
throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles);
throw new IllegalArgumentException("Alleles for a VariantContext must contain at most one reference allele: " + alleles);
alleleList.add(0, a);
sawRef = true;
}
@ -1372,7 +1381,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
throw new IllegalArgumentException("Cannot create a VariantContext with an empty allele list");
if ( alleleList.get(0).isNonReference() )
throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles);
throw new IllegalArgumentException("Alleles for a VariantContext must contain at least one reference allele: " + alleles);
return alleleList;
}

View File

@ -252,6 +252,29 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertEquals(vc.getSampleNames().size(), 0);
}
@Test
public void testCreatingPartiallyCalledGenotype() {
List<Allele> alleles = Arrays.asList(Aref, C);
Genotype g = new Genotype("foo", Arrays.asList(C, Allele.NO_CALL), 10);
VariantContext vc = new VariantContext("test", snpLoc, snpLocStart, snpLocStop, alleles, Arrays.asList(g));
Assert.assertTrue(vc.isSNP());
Assert.assertEquals(vc.getNAlleles(), 2);
Assert.assertTrue(vc.hasGenotypes());
Assert.assertFalse(vc.isMonomorphic());
Assert.assertTrue(vc.isPolymorphic());
Assert.assertEquals(vc.getGenotype("foo"), g);
Assert.assertEquals(vc.getChromosomeCount(), 2); // we know that there are 2 chromosomes, even though one isn't called
Assert.assertEquals(vc.getChromosomeCount(Aref), 0);
Assert.assertEquals(vc.getChromosomeCount(C), 1);
Assert.assertFalse(vc.getGenotype("foo").isHet());
Assert.assertFalse(vc.getGenotype("foo").isHom());
Assert.assertFalse(vc.getGenotype("foo").isNoCall());
Assert.assertFalse(vc.getGenotype("foo").isHom());
Assert.assertTrue(vc.getGenotype("foo").isMixed());
Assert.assertEquals(vc.getGenotype("foo").getType(), Genotype.Type.MIXED);
}
@Test (expectedExceptions = IllegalArgumentException.class)
public void testBadConstructorArgs1() {
new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATCref));