You know how the wiki has a big section on commiting local changes to BRANCHES of the repository you clone it from? Yeah. It sucks if you don't do that.
This commit contains: - IntronLossGenotyper is brought into its current incarnation - A couple of simple new filters (ReadName is super useful for debugging, MateUnmapped is useful for selecting out reads that may have a relevant unaligned mate) - RFA now matches my current local repository. It's in flux since I'm transitioning to the new traversal type. + the triggering read stash pilot required me to change the scope of some of the variables in the ReadClipping code, private -> protected. Those are all the changes there. - MendelianViolation restored to its former glory (and an annotator module that uses the likelihood calculation has been added) + use this rather than a hard GQ threshold if you're doing MV analyses. - Some miscellaneous QScripts
This commit is contained in:
parent
d8d73fe4f2
commit
cd8a6d62bb
|
|
@ -0,0 +1,23 @@
|
|||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 9/19/11
|
||||
* Time: 4:09 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadNameFilter extends ReadFilter {
|
||||
@Argument(fullName = "readName", shortName = "rn", doc="Filter out all reads except those with this read name", required=true)
|
||||
private String readName;
|
||||
|
||||
public boolean filterOut(final SAMRecord rec) {
|
||||
return ! rec.getReadName().equals(readName);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 9/14/11
|
||||
* Time: 12:24 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
private MendelianViolation mendelianViolation = null;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( mendelianViolation == null ) {
|
||||
if ( walker instanceof VariantAnnotator ) {
|
||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP );
|
||||
}
|
||||
else {
|
||||
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator");
|
||||
}
|
||||
}
|
||||
|
||||
Map<String,Object> toRet = new HashMap<String,Object>(1);
|
||||
boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) &&
|
||||
vc.hasGenotype(mendelianViolation.getSampleDad()) &&
|
||||
vc.hasGenotype(mendelianViolation.getSampleMom());
|
||||
if ( hasAppropriateGenotypes )
|
||||
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc));
|
||||
|
||||
return toRet;
|
||||
}
|
||||
|
||||
// return the descriptions used for the VCF INFO meta field
|
||||
public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
||||
}
|
||||
|
|
@ -26,6 +26,9 @@ public class MendelianViolation {
|
|||
|
||||
double minGenotypeQuality;
|
||||
|
||||
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
|
||||
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
|
||||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
|
||||
public String getSampleMom() {
|
||||
|
|
@ -134,4 +137,43 @@ public class MendelianViolation {
|
|||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the likelihood ratio for a mendelian violation
|
||||
*/
|
||||
public double violationLikelihoodRatio(VariantContext vc) {
|
||||
double[] logLikAssignments = new double[27];
|
||||
// the matrix to set up is
|
||||
// MOM DAD CHILD
|
||||
// |- AA
|
||||
// AA AA | AB
|
||||
// |- BB
|
||||
// |- AA
|
||||
// AA AB | AB
|
||||
// |- BB
|
||||
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
|
||||
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
|
||||
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
|
||||
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
|
||||
int offset = 0;
|
||||
for ( int oMom = 0; oMom < 3; oMom++ ) {
|
||||
for ( int oDad = 0; oDad < 3; oDad++ ) {
|
||||
for ( int oChild = 0; oChild < 3; oChild ++ ) {
|
||||
logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild];
|
||||
}
|
||||
}
|
||||
}
|
||||
double[] mvLiks = new double[12];
|
||||
double[] nonMVLiks = new double[15];
|
||||
for ( int i = 0; i < 12; i ++ ) {
|
||||
mvLiks[i] = logLikAssignments[mvOffsets[i]];
|
||||
}
|
||||
|
||||
for ( int i = 0; i < 15; i++) {
|
||||
nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]];
|
||||
}
|
||||
|
||||
return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue