Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
558a7a81f0
|
|
@ -0,0 +1,23 @@
|
|||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 9/19/11
|
||||
* Time: 4:09 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadNameFilter extends ReadFilter {
|
||||
@Argument(fullName = "readName", shortName = "rn", doc="Filter out all reads except those with this read name", required=true)
|
||||
private String readName;
|
||||
|
||||
public boolean filterOut(final SAMRecord rec) {
|
||||
return ! rec.getReadName().equals(readName);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 9/14/11
|
||||
* Time: 12:24 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
private MendelianViolation mendelianViolation = null;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( mendelianViolation == null ) {
|
||||
if ( walker instanceof VariantAnnotator ) {
|
||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP );
|
||||
}
|
||||
else {
|
||||
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator");
|
||||
}
|
||||
}
|
||||
|
||||
Map<String,Object> toRet = new HashMap<String,Object>(1);
|
||||
boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) &&
|
||||
vc.hasGenotype(mendelianViolation.getSampleDad()) &&
|
||||
vc.hasGenotype(mendelianViolation.getSampleMom());
|
||||
if ( hasAppropriateGenotypes )
|
||||
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc));
|
||||
|
||||
return toRet;
|
||||
}
|
||||
|
||||
// return the descriptions used for the VCF INFO meta field
|
||||
public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
||||
}
|
||||
|
|
@ -26,6 +26,9 @@ public class MendelianViolation {
|
|||
|
||||
double minGenotypeQuality;
|
||||
|
||||
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
|
||||
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
|
||||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
|
||||
public String getSampleMom() {
|
||||
|
|
@ -134,4 +137,43 @@ public class MendelianViolation {
|
|||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the likelihood ratio for a mendelian violation
|
||||
*/
|
||||
public double violationLikelihoodRatio(VariantContext vc) {
|
||||
double[] logLikAssignments = new double[27];
|
||||
// the matrix to set up is
|
||||
// MOM DAD CHILD
|
||||
// |- AA
|
||||
// AA AA | AB
|
||||
// |- BB
|
||||
// |- AA
|
||||
// AA AB | AB
|
||||
// |- BB
|
||||
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
|
||||
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
|
||||
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
|
||||
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
|
||||
int offset = 0;
|
||||
for ( int oMom = 0; oMom < 3; oMom++ ) {
|
||||
for ( int oDad = 0; oDad < 3; oDad++ ) {
|
||||
for ( int oChild = 0; oChild < 3; oChild ++ ) {
|
||||
logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild];
|
||||
}
|
||||
}
|
||||
}
|
||||
double[] mvLiks = new double[12];
|
||||
double[] nonMVLiks = new double[15];
|
||||
for ( int i = 0; i < 12; i ++ ) {
|
||||
mvLiks[i] = logLikAssignments[mvOffsets[i]];
|
||||
}
|
||||
|
||||
for ( int i = 0; i < 15; i++) {
|
||||
nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]];
|
||||
}
|
||||
|
||||
return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -108,14 +108,19 @@ public class Genotype {
|
|||
/**
|
||||
* @return the ploidy of this genotype
|
||||
*/
|
||||
public int getPloidy() { return alleles.size(); }
|
||||
public int getPloidy() {
|
||||
if ( alleles == null )
|
||||
throw new ReviewedStingException("Requesting ploidy for an UNAVAILABLE genotype");
|
||||
return alleles.size();
|
||||
}
|
||||
|
||||
public enum Type {
|
||||
NO_CALL,
|
||||
HOM_REF,
|
||||
HET,
|
||||
HOM_VAR,
|
||||
UNAVAILABLE
|
||||
UNAVAILABLE,
|
||||
MIXED // no-call and call in the same genotype
|
||||
}
|
||||
|
||||
public Type getType() {
|
||||
|
|
@ -129,36 +134,68 @@ public class Genotype {
|
|||
if ( alleles == null )
|
||||
return Type.UNAVAILABLE;
|
||||
|
||||
Allele firstAllele = alleles.get(0);
|
||||
boolean sawNoCall = false, sawMultipleAlleles = false;
|
||||
Allele observedAllele = null;
|
||||
|
||||
if ( firstAllele.isNoCall() ) {
|
||||
return Type.NO_CALL;
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele.isNoCall() )
|
||||
sawNoCall = true;
|
||||
else if ( observedAllele == null )
|
||||
observedAllele = allele;
|
||||
else if ( !allele.equals(observedAllele) )
|
||||
sawMultipleAlleles = true;
|
||||
}
|
||||
|
||||
for (Allele a : alleles) {
|
||||
if ( ! firstAllele.equals(a) )
|
||||
return Type.HET;
|
||||
if ( sawNoCall ) {
|
||||
if ( observedAllele == null )
|
||||
return Type.NO_CALL;
|
||||
return Type.MIXED;
|
||||
}
|
||||
return firstAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR;
|
||||
|
||||
if ( observedAllele == null )
|
||||
throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null");
|
||||
|
||||
return sawMultipleAlleles ? Type.HET : observedAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if all observed alleles are the same (regardless of whether they are ref or alt)
|
||||
* @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHom() { return isHomRef() || isHomVar(); }
|
||||
|
||||
/**
|
||||
* @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHomRef() { return getType() == Type.HOM_REF; }
|
||||
|
||||
/**
|
||||
* @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHomVar() { return getType() == Type.HOM_VAR; }
|
||||
|
||||
/**
|
||||
* @return true if we're het (observed alleles differ)
|
||||
* @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHet() { return getType() == Type.HET; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF)
|
||||
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
|
||||
*/
|
||||
public boolean isNoCall() { return getType() == Type.NO_CALL; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
|
||||
*/
|
||||
public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is comprised of both calls and no-calls.
|
||||
*/
|
||||
public boolean isMixed() { return getType() == Type.MIXED; }
|
||||
|
||||
/**
|
||||
* @return true if the type of this genotype is set.
|
||||
*/
|
||||
public boolean isAvailable() { return getType() != Type.UNAVAILABLE; }
|
||||
|
||||
//
|
||||
|
|
@ -197,14 +234,16 @@ public class Genotype {
|
|||
if ( alleles == null ) return;
|
||||
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0");
|
||||
|
||||
int nNoCalls = 0;
|
||||
// int nNoCalls = 0;
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele == null )
|
||||
throw new IllegalArgumentException("BUG: allele cannot be null in Genotype");
|
||||
nNoCalls += allele.isNoCall() ? 1 : 0;
|
||||
// nNoCalls += allele.isNoCall() ? 1 : 0;
|
||||
}
|
||||
if ( nNoCalls > 0 && nNoCalls != alleles.size() )
|
||||
throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
|
||||
|
||||
// Technically, the spec does allow for the below case so this is not an illegal state
|
||||
//if ( nNoCalls > 0 && nNoCalls != alleles.size() )
|
||||
// throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
|
||||
}
|
||||
|
||||
public String getGenotypeString() {
|
||||
|
|
|
|||
|
|
@ -40,19 +40,7 @@ public class MutableGenotype extends Genotype {
|
|||
*/
|
||||
public void setAlleles(List<Allele> alleles) {
|
||||
this.alleles = new ArrayList<Allele>(alleles);
|
||||
|
||||
// todo -- add validation checking here
|
||||
|
||||
if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles");
|
||||
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles");
|
||||
|
||||
int nNoCalls = 0;
|
||||
for ( Allele allele : alleles ) { nNoCalls += allele.isNoCall() ? 1 : 0; }
|
||||
if ( nNoCalls > 0 && nNoCalls != alleles.size() )
|
||||
throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
|
||||
|
||||
for ( Allele allele : alleles )
|
||||
if ( allele == null ) throw new IllegalArgumentException("BUG: Cannot add a null allele to a genotype");
|
||||
validate();
|
||||
}
|
||||
|
||||
public void setPhase(boolean isPhased) {
|
||||
|
|
|
|||
|
|
@ -998,7 +998,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
else if ( g.isHomVar() )
|
||||
genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]++;
|
||||
else
|
||||
throw new IllegalStateException("Genotype of unknown type: " + g);
|
||||
genotypeCounts[Genotype.Type.MIXED.ordinal()]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1042,6 +1042,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
return genotypeCounts[Genotype.Type.HOM_VAR.ordinal()];
|
||||
}
|
||||
|
||||
/**
|
||||
* Genotype-specific functions -- how many mixed calls are there in the genotypes?
|
||||
*
|
||||
* @return number of mixed calls
|
||||
*/
|
||||
public int getMixedCount() {
|
||||
return genotypeCounts[Genotype.Type.MIXED.ordinal()];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// validation: extra-strict validation routines for paranoid users
|
||||
|
|
@ -1357,10 +1366,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
throw new IllegalArgumentException("Duplicate allele added to VariantContext: " + a);
|
||||
}
|
||||
|
||||
// deal with the case where the first allele isn't the reference
|
||||
// deal with the case where the first allele isn't the reference
|
||||
if ( a.isReference() ) {
|
||||
if ( sawRef )
|
||||
throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles);
|
||||
throw new IllegalArgumentException("Alleles for a VariantContext must contain at most one reference allele: " + alleles);
|
||||
alleleList.add(0, a);
|
||||
sawRef = true;
|
||||
}
|
||||
|
|
@ -1372,7 +1381,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
throw new IllegalArgumentException("Cannot create a VariantContext with an empty allele list");
|
||||
|
||||
if ( alleleList.get(0).isNonReference() )
|
||||
throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles);
|
||||
throw new IllegalArgumentException("Alleles for a VariantContext must contain at least one reference allele: " + alleles);
|
||||
|
||||
return alleleList;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -252,6 +252,29 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
Assert.assertEquals(vc.getSampleNames().size(), 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreatingPartiallyCalledGenotype() {
|
||||
List<Allele> alleles = Arrays.asList(Aref, C);
|
||||
Genotype g = new Genotype("foo", Arrays.asList(C, Allele.NO_CALL), 10);
|
||||
VariantContext vc = new VariantContext("test", snpLoc, snpLocStart, snpLocStop, alleles, Arrays.asList(g));
|
||||
|
||||
Assert.assertTrue(vc.isSNP());
|
||||
Assert.assertEquals(vc.getNAlleles(), 2);
|
||||
Assert.assertTrue(vc.hasGenotypes());
|
||||
Assert.assertFalse(vc.isMonomorphic());
|
||||
Assert.assertTrue(vc.isPolymorphic());
|
||||
Assert.assertEquals(vc.getGenotype("foo"), g);
|
||||
Assert.assertEquals(vc.getChromosomeCount(), 2); // we know that there are 2 chromosomes, even though one isn't called
|
||||
Assert.assertEquals(vc.getChromosomeCount(Aref), 0);
|
||||
Assert.assertEquals(vc.getChromosomeCount(C), 1);
|
||||
Assert.assertFalse(vc.getGenotype("foo").isHet());
|
||||
Assert.assertFalse(vc.getGenotype("foo").isHom());
|
||||
Assert.assertFalse(vc.getGenotype("foo").isNoCall());
|
||||
Assert.assertFalse(vc.getGenotype("foo").isHom());
|
||||
Assert.assertTrue(vc.getGenotype("foo").isMixed());
|
||||
Assert.assertEquals(vc.getGenotype("foo").getType(), Genotype.Type.MIXED);
|
||||
}
|
||||
|
||||
@Test (expectedExceptions = IllegalArgumentException.class)
|
||||
public void testBadConstructorArgs1() {
|
||||
new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATCref));
|
||||
|
|
|
|||
Loading…
Reference in New Issue