Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
558a7a81f0
|
|
@ -0,0 +1,23 @@
|
||||||
|
package org.broadinstitute.sting.gatk.filters;
|
||||||
|
|
||||||
|
import net.sf.samtools.Cigar;
|
||||||
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.CigarOperator;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: chartl
|
||||||
|
* Date: 9/19/11
|
||||||
|
* Time: 4:09 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class ReadNameFilter extends ReadFilter {
|
||||||
|
@Argument(fullName = "readName", shortName = "rn", doc="Filter out all reads except those with this read name", required=true)
|
||||||
|
private String readName;
|
||||||
|
|
||||||
|
public boolean filterOut(final SAMRecord rec) {
|
||||||
|
return ! rec.getReadName().equals(readName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,58 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: chartl
|
||||||
|
* Date: 9/14/11
|
||||||
|
* Time: 12:24 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
|
private MendelianViolation mendelianViolation = null;
|
||||||
|
|
||||||
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
if ( mendelianViolation == null ) {
|
||||||
|
if ( walker instanceof VariantAnnotator ) {
|
||||||
|
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String,Object> toRet = new HashMap<String,Object>(1);
|
||||||
|
boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) &&
|
||||||
|
vc.hasGenotype(mendelianViolation.getSampleDad()) &&
|
||||||
|
vc.hasGenotype(mendelianViolation.getSampleMom());
|
||||||
|
if ( hasAppropriateGenotypes )
|
||||||
|
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc));
|
||||||
|
|
||||||
|
return toRet;
|
||||||
|
}
|
||||||
|
|
||||||
|
// return the descriptions used for the VCF INFO meta field
|
||||||
|
public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
|
||||||
|
|
||||||
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
||||||
|
}
|
||||||
|
|
@ -26,6 +26,9 @@ public class MendelianViolation {
|
||||||
|
|
||||||
double minGenotypeQuality;
|
double minGenotypeQuality;
|
||||||
|
|
||||||
|
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
|
||||||
|
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
|
||||||
|
|
||||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||||
|
|
||||||
public String getSampleMom() {
|
public String getSampleMom() {
|
||||||
|
|
@ -134,4 +137,43 @@ public class MendelianViolation {
|
||||||
return false;
|
return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the likelihood ratio for a mendelian violation
|
||||||
|
*/
|
||||||
|
public double violationLikelihoodRatio(VariantContext vc) {
|
||||||
|
double[] logLikAssignments = new double[27];
|
||||||
|
// the matrix to set up is
|
||||||
|
// MOM DAD CHILD
|
||||||
|
// |- AA
|
||||||
|
// AA AA | AB
|
||||||
|
// |- BB
|
||||||
|
// |- AA
|
||||||
|
// AA AB | AB
|
||||||
|
// |- BB
|
||||||
|
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
|
||||||
|
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
|
||||||
|
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
|
||||||
|
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
|
||||||
|
int offset = 0;
|
||||||
|
for ( int oMom = 0; oMom < 3; oMom++ ) {
|
||||||
|
for ( int oDad = 0; oDad < 3; oDad++ ) {
|
||||||
|
for ( int oChild = 0; oChild < 3; oChild ++ ) {
|
||||||
|
logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
double[] mvLiks = new double[12];
|
||||||
|
double[] nonMVLiks = new double[15];
|
||||||
|
for ( int i = 0; i < 12; i ++ ) {
|
||||||
|
mvLiks[i] = logLikAssignments[mvOffsets[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( int i = 0; i < 15; i++) {
|
||||||
|
nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -108,14 +108,19 @@ public class Genotype {
|
||||||
/**
|
/**
|
||||||
* @return the ploidy of this genotype
|
* @return the ploidy of this genotype
|
||||||
*/
|
*/
|
||||||
public int getPloidy() { return alleles.size(); }
|
public int getPloidy() {
|
||||||
|
if ( alleles == null )
|
||||||
|
throw new ReviewedStingException("Requesting ploidy for an UNAVAILABLE genotype");
|
||||||
|
return alleles.size();
|
||||||
|
}
|
||||||
|
|
||||||
public enum Type {
|
public enum Type {
|
||||||
NO_CALL,
|
NO_CALL,
|
||||||
HOM_REF,
|
HOM_REF,
|
||||||
HET,
|
HET,
|
||||||
HOM_VAR,
|
HOM_VAR,
|
||||||
UNAVAILABLE
|
UNAVAILABLE,
|
||||||
|
MIXED // no-call and call in the same genotype
|
||||||
}
|
}
|
||||||
|
|
||||||
public Type getType() {
|
public Type getType() {
|
||||||
|
|
@ -129,36 +134,68 @@ public class Genotype {
|
||||||
if ( alleles == null )
|
if ( alleles == null )
|
||||||
return Type.UNAVAILABLE;
|
return Type.UNAVAILABLE;
|
||||||
|
|
||||||
Allele firstAllele = alleles.get(0);
|
boolean sawNoCall = false, sawMultipleAlleles = false;
|
||||||
|
Allele observedAllele = null;
|
||||||
|
|
||||||
if ( firstAllele.isNoCall() ) {
|
for ( Allele allele : alleles ) {
|
||||||
return Type.NO_CALL;
|
if ( allele.isNoCall() )
|
||||||
|
sawNoCall = true;
|
||||||
|
else if ( observedAllele == null )
|
||||||
|
observedAllele = allele;
|
||||||
|
else if ( !allele.equals(observedAllele) )
|
||||||
|
sawMultipleAlleles = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Allele a : alleles) {
|
if ( sawNoCall ) {
|
||||||
if ( ! firstAllele.equals(a) )
|
if ( observedAllele == null )
|
||||||
return Type.HET;
|
return Type.NO_CALL;
|
||||||
|
return Type.MIXED;
|
||||||
}
|
}
|
||||||
return firstAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR;
|
|
||||||
|
if ( observedAllele == null )
|
||||||
|
throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null");
|
||||||
|
|
||||||
|
return sawMultipleAlleles ? Type.HET : observedAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true if all observed alleles are the same (regardless of whether they are ref or alt)
|
* @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false.
|
||||||
*/
|
*/
|
||||||
public boolean isHom() { return isHomRef() || isHomVar(); }
|
public boolean isHom() { return isHomRef() || isHomVar(); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
|
||||||
|
*/
|
||||||
public boolean isHomRef() { return getType() == Type.HOM_REF; }
|
public boolean isHomRef() { return getType() == Type.HOM_REF; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
|
||||||
|
*/
|
||||||
public boolean isHomVar() { return getType() == Type.HOM_VAR; }
|
public boolean isHomVar() { return getType() == Type.HOM_VAR; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true if we're het (observed alleles differ)
|
* @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
|
||||||
*/
|
*/
|
||||||
public boolean isHet() { return getType() == Type.HET; }
|
public boolean isHet() { return getType() == Type.HET; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF)
|
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
|
||||||
*/
|
*/
|
||||||
public boolean isNoCall() { return getType() == Type.NO_CALL; }
|
public boolean isNoCall() { return getType() == Type.NO_CALL; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
|
||||||
|
*/
|
||||||
public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; }
|
public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if this genotype is comprised of both calls and no-calls.
|
||||||
|
*/
|
||||||
|
public boolean isMixed() { return getType() == Type.MIXED; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if the type of this genotype is set.
|
||||||
|
*/
|
||||||
public boolean isAvailable() { return getType() != Type.UNAVAILABLE; }
|
public boolean isAvailable() { return getType() != Type.UNAVAILABLE; }
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
@ -197,14 +234,16 @@ public class Genotype {
|
||||||
if ( alleles == null ) return;
|
if ( alleles == null ) return;
|
||||||
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0");
|
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0");
|
||||||
|
|
||||||
int nNoCalls = 0;
|
// int nNoCalls = 0;
|
||||||
for ( Allele allele : alleles ) {
|
for ( Allele allele : alleles ) {
|
||||||
if ( allele == null )
|
if ( allele == null )
|
||||||
throw new IllegalArgumentException("BUG: allele cannot be null in Genotype");
|
throw new IllegalArgumentException("BUG: allele cannot be null in Genotype");
|
||||||
nNoCalls += allele.isNoCall() ? 1 : 0;
|
// nNoCalls += allele.isNoCall() ? 1 : 0;
|
||||||
}
|
}
|
||||||
if ( nNoCalls > 0 && nNoCalls != alleles.size() )
|
|
||||||
throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
|
// Technically, the spec does allow for the below case so this is not an illegal state
|
||||||
|
//if ( nNoCalls > 0 && nNoCalls != alleles.size() )
|
||||||
|
// throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getGenotypeString() {
|
public String getGenotypeString() {
|
||||||
|
|
|
||||||
|
|
@ -40,19 +40,7 @@ public class MutableGenotype extends Genotype {
|
||||||
*/
|
*/
|
||||||
public void setAlleles(List<Allele> alleles) {
|
public void setAlleles(List<Allele> alleles) {
|
||||||
this.alleles = new ArrayList<Allele>(alleles);
|
this.alleles = new ArrayList<Allele>(alleles);
|
||||||
|
validate();
|
||||||
// todo -- add validation checking here
|
|
||||||
|
|
||||||
if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles");
|
|
||||||
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles");
|
|
||||||
|
|
||||||
int nNoCalls = 0;
|
|
||||||
for ( Allele allele : alleles ) { nNoCalls += allele.isNoCall() ? 1 : 0; }
|
|
||||||
if ( nNoCalls > 0 && nNoCalls != alleles.size() )
|
|
||||||
throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
|
|
||||||
|
|
||||||
for ( Allele allele : alleles )
|
|
||||||
if ( allele == null ) throw new IllegalArgumentException("BUG: Cannot add a null allele to a genotype");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setPhase(boolean isPhased) {
|
public void setPhase(boolean isPhased) {
|
||||||
|
|
|
||||||
|
|
@ -998,7 +998,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
else if ( g.isHomVar() )
|
else if ( g.isHomVar() )
|
||||||
genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]++;
|
genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]++;
|
||||||
else
|
else
|
||||||
throw new IllegalStateException("Genotype of unknown type: " + g);
|
genotypeCounts[Genotype.Type.MIXED.ordinal()]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1042,6 +1042,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
return genotypeCounts[Genotype.Type.HOM_VAR.ordinal()];
|
return genotypeCounts[Genotype.Type.HOM_VAR.ordinal()];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Genotype-specific functions -- how many mixed calls are there in the genotypes?
|
||||||
|
*
|
||||||
|
* @return number of mixed calls
|
||||||
|
*/
|
||||||
|
public int getMixedCount() {
|
||||||
|
return genotypeCounts[Genotype.Type.MIXED.ordinal()];
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// validation: extra-strict validation routines for paranoid users
|
// validation: extra-strict validation routines for paranoid users
|
||||||
|
|
@ -1357,10 +1366,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
throw new IllegalArgumentException("Duplicate allele added to VariantContext: " + a);
|
throw new IllegalArgumentException("Duplicate allele added to VariantContext: " + a);
|
||||||
}
|
}
|
||||||
|
|
||||||
// deal with the case where the first allele isn't the reference
|
// deal with the case where the first allele isn't the reference
|
||||||
if ( a.isReference() ) {
|
if ( a.isReference() ) {
|
||||||
if ( sawRef )
|
if ( sawRef )
|
||||||
throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles);
|
throw new IllegalArgumentException("Alleles for a VariantContext must contain at most one reference allele: " + alleles);
|
||||||
alleleList.add(0, a);
|
alleleList.add(0, a);
|
||||||
sawRef = true;
|
sawRef = true;
|
||||||
}
|
}
|
||||||
|
|
@ -1372,7 +1381,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
throw new IllegalArgumentException("Cannot create a VariantContext with an empty allele list");
|
throw new IllegalArgumentException("Cannot create a VariantContext with an empty allele list");
|
||||||
|
|
||||||
if ( alleleList.get(0).isNonReference() )
|
if ( alleleList.get(0).isNonReference() )
|
||||||
throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles);
|
throw new IllegalArgumentException("Alleles for a VariantContext must contain at least one reference allele: " + alleles);
|
||||||
|
|
||||||
return alleleList;
|
return alleleList;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -252,6 +252,29 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(vc.getSampleNames().size(), 0);
|
Assert.assertEquals(vc.getSampleNames().size(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCreatingPartiallyCalledGenotype() {
|
||||||
|
List<Allele> alleles = Arrays.asList(Aref, C);
|
||||||
|
Genotype g = new Genotype("foo", Arrays.asList(C, Allele.NO_CALL), 10);
|
||||||
|
VariantContext vc = new VariantContext("test", snpLoc, snpLocStart, snpLocStop, alleles, Arrays.asList(g));
|
||||||
|
|
||||||
|
Assert.assertTrue(vc.isSNP());
|
||||||
|
Assert.assertEquals(vc.getNAlleles(), 2);
|
||||||
|
Assert.assertTrue(vc.hasGenotypes());
|
||||||
|
Assert.assertFalse(vc.isMonomorphic());
|
||||||
|
Assert.assertTrue(vc.isPolymorphic());
|
||||||
|
Assert.assertEquals(vc.getGenotype("foo"), g);
|
||||||
|
Assert.assertEquals(vc.getChromosomeCount(), 2); // we know that there are 2 chromosomes, even though one isn't called
|
||||||
|
Assert.assertEquals(vc.getChromosomeCount(Aref), 0);
|
||||||
|
Assert.assertEquals(vc.getChromosomeCount(C), 1);
|
||||||
|
Assert.assertFalse(vc.getGenotype("foo").isHet());
|
||||||
|
Assert.assertFalse(vc.getGenotype("foo").isHom());
|
||||||
|
Assert.assertFalse(vc.getGenotype("foo").isNoCall());
|
||||||
|
Assert.assertFalse(vc.getGenotype("foo").isHom());
|
||||||
|
Assert.assertTrue(vc.getGenotype("foo").isMixed());
|
||||||
|
Assert.assertEquals(vc.getGenotype("foo").getType(), Genotype.Type.MIXED);
|
||||||
|
}
|
||||||
|
|
||||||
@Test (expectedExceptions = IllegalArgumentException.class)
|
@Test (expectedExceptions = IllegalArgumentException.class)
|
||||||
public void testBadConstructorArgs1() {
|
public void testBadConstructorArgs1() {
|
||||||
new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATCref));
|
new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATCref));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue