improvements to ve2, including more meaningful mendelian violation counting. Support for VCF emitted interesting sites, annotated according to the evaluations themselves. Basic intergration test for VE2 started
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2819 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3de72daa88
commit
c66861746a
|
|
@ -247,11 +247,18 @@ public class RefMetaDataTracker {
|
|||
* @return
|
||||
*/
|
||||
public Collection<VariantContext> getVariantContexts(String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
RODRecordList<ReferenceOrderedDatum> rodList = getTrackData(name, null);
|
||||
return getVariantContexts(Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
||||
}
|
||||
|
||||
public Collection<VariantContext> getVariantContexts(Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
Collection<VariantContext> contexts = new ArrayList<VariantContext>();
|
||||
|
||||
if ( rodList != null )
|
||||
addVariantContexts(contexts, rodList, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
|
||||
for ( String name : names ) {
|
||||
RODRecordList<ReferenceOrderedDatum> rodList = getTrackData(name, null);
|
||||
|
||||
if ( rodList != null )
|
||||
addVariantContexts(contexts, rodList, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
|
@ -271,8 +278,10 @@ public class RefMetaDataTracker {
|
|||
|
||||
if ( contexts.size() > 1 )
|
||||
throw new StingException("Requested a single VariantContext object for track " + name + " but multiple variants were present at position " + curLocation);
|
||||
|
||||
return contexts.iterator().next();
|
||||
else if ( contexts.size() == 0 )
|
||||
return null;
|
||||
else
|
||||
return contexts.iterator().next();
|
||||
}
|
||||
|
||||
private void addVariantContexts(Collection<VariantContext> contexts, RODRecordList<ReferenceOrderedDatum> rodList, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
|
|
|
|||
|
|
@ -43,11 +43,11 @@ public class CountVariants extends VariantEvaluator {
|
|||
return 1; // we only need to see each eval track
|
||||
}
|
||||
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
nProcessedLoci += context.getSkippedBases() + 1;
|
||||
}
|
||||
|
||||
public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
//nProcessedLoci++;
|
||||
nCalledLoci++;
|
||||
|
||||
|
|
@ -70,6 +70,8 @@ public class CountVariants extends VariantEvaluator {
|
|||
default: throw new StingException("BUG: Unexpected genotype type: " + g);
|
||||
}
|
||||
}
|
||||
|
||||
return null; // we don't capture any intersting sites
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ public class DbSNPPercentage extends VariantEvaluator {
|
|||
public double dbSNPRate() { return rate(nSNPsAtdbSNPs(), nEvalSNPs()); }
|
||||
public double concordanceRate() { return rate(nConcordant(), nSNPsAtdbSNPs()); }
|
||||
|
||||
public void update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public String update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered();
|
||||
boolean evalIsGood = eval != null && eval.isSNP();
|
||||
|
||||
|
|
@ -104,5 +104,7 @@ public class DbSNPPercentage extends VariantEvaluator {
|
|||
if ( ! discordantP(eval, dbsnp) ) // count whether we're concordant or not with the dbSNP value
|
||||
nConcordant++;
|
||||
}
|
||||
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,10 +40,12 @@ import java.util.regex.Matcher;
|
|||
* Check that the child is one of these.
|
||||
*/
|
||||
public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||
long nVariants, nViolations, nOverCalls, nUnderCalls;
|
||||
long nVariants, nViolations;
|
||||
TrioStructure trio;
|
||||
VariantEval2Walker parent;
|
||||
|
||||
long KidHomRef_ParentHomVar, KidHet_ParentsHomRef, KidHet_ParentsHomVar, KidHomVar_ParentHomRef;
|
||||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
|
||||
public static class TrioStructure {
|
||||
|
|
@ -90,7 +92,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
return 1; // we only need to see each eval track
|
||||
}
|
||||
|
||||
public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( vc.isBiallelic() && vc.hasGenotypes() ) { // todo -- currently limited to biallelic loci
|
||||
nVariants++;
|
||||
|
||||
|
|
@ -105,61 +107,34 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
nViolations++;
|
||||
|
||||
String label = null;
|
||||
switch ( getViolationType( vc, momG, dadG, childG ) ) {
|
||||
case UNDER_CALL:
|
||||
nUnderCalls++;
|
||||
label = "under_called";
|
||||
break;
|
||||
case OVER_CALL:
|
||||
nOverCalls++;
|
||||
label = "over_called";
|
||||
break;
|
||||
default:
|
||||
throw new StingException("BUG: unexpected violation type at " + vc);
|
||||
|
||||
if ( childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar() )) {
|
||||
label = "KidHomRef_ParentHomVar";
|
||||
KidHomRef_ParentHomVar++;
|
||||
} else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef()) ) {
|
||||
label = "KidHet_ParentsHomRef";
|
||||
KidHet_ParentsHomRef++;
|
||||
} else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar()) ) {
|
||||
label = "KidHet_ParentsHomVar";
|
||||
KidHet_ParentsHomVar++;
|
||||
} else if (childG.isHomVar() && (momG.isHomRef() || dadG.isHomRef())) {
|
||||
label = "KidHomVar_ParentHomRef";
|
||||
KidHomVar_ParentHomRef++;
|
||||
} else {
|
||||
throw new StingException("BUG: unexpected child genotype class " + childG);
|
||||
}
|
||||
|
||||
String why = String.format("Mendelian violation %s: at %s m=%s d=%s c=%s", label, vc.getLocation(), momG.toBriefString(), dadG.toBriefString(), childG.toBriefString());
|
||||
addInterestingSite(why , vc);
|
||||
return label;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Are we under or over calling?
|
||||
*
|
||||
* Assuming this is a bialleic locus, we then have 2 alleles A and B. There are really two types of violations:
|
||||
*
|
||||
* Undercall: where the child is A/A but parent genotypes imply that child must carry at least one B allele
|
||||
* Overall: where the child carries a B allele but this B allele couldn't have been inherited from either parent
|
||||
*
|
||||
* The way to determine this is to look at mom and dad separately. If the child doesn't carry at least one
|
||||
* allele from each parent, it's an under calls. Otherwise it's an overcall.
|
||||
*/
|
||||
public ViolationType getViolationType(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG ) {
|
||||
switch ( childG.getType() ) {
|
||||
case HOM_REF:
|
||||
return ViolationType.UNDER_CALL; // if you have to undercalled as a hom ref child
|
||||
case HET:
|
||||
// the only two violations of a het is where both parents are hom. If they are hom ref, you overcalled,
|
||||
// otherwise you undercalled
|
||||
return momG.isHomRef() ? ViolationType.OVER_CALL : ViolationType.UNDER_CALL;
|
||||
case HOM_VAR:
|
||||
return ViolationType.OVER_CALL; // if you have to overcalled as a hom var child
|
||||
default:
|
||||
throw new StingException("BUG: unexpected child genotype class " + childG);
|
||||
}
|
||||
return null; // we don't capture any intersting sites
|
||||
}
|
||||
|
||||
private boolean includeGenotype(Genotype g) {
|
||||
return g.getNegLog10PError() > getQThreshold() && g.isCalled();
|
||||
}
|
||||
|
||||
private enum ViolationType {
|
||||
UNDER_CALL, OVER_CALL
|
||||
}
|
||||
|
||||
public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG ) {
|
||||
//VariantContext momVC = vc.subContextFromGenotypes(momG);
|
||||
//VariantContext dadVC = vc.subContextFromGenotypes(dadG);
|
||||
|
|
@ -183,17 +158,11 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
}
|
||||
|
||||
private String summaryLine() {
|
||||
return String.format("%d %d %.2e %d %.2e %.2f %d %.2e %.2f",
|
||||
nVariants, nViolations, rate(nViolations, nVariants),
|
||||
nOverCalls, rate(nOverCalls, nVariants), ratio(nOverCalls, nViolations),
|
||||
nUnderCalls, rate(nUnderCalls, nVariants), ratio(nUnderCalls, nViolations));
|
||||
return String.format("%d %d %d %d %d %d", nVariants, nViolations, KidHomRef_ParentHomVar, KidHet_ParentsHomRef, KidHet_ParentsHomVar, KidHomVar_ParentHomRef);
|
||||
}
|
||||
|
||||
private static List<String> HEADER =
|
||||
Arrays.asList("nVariants",
|
||||
"nViolations", "violationRate",
|
||||
"nOverCalls", "overCallRate", "overCallFraction",
|
||||
"nUnderCalls", "underCallRate", "underCallFraction");
|
||||
Arrays.asList("nVariants", "nViolations", "KidHomRef_ParentHomVar", "KidHet_ParentsHomRef", "KidHet_ParentsHomVar", "KidHomVar_ParentHomRef");
|
||||
|
||||
// making it a table
|
||||
public List<String> getTableHeader() {
|
||||
|
|
@ -203,4 +172,4 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
public List<List<String>> getTableRows() {
|
||||
return Arrays.asList(Arrays.asList(summaryLine().split(" ")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,9 +36,11 @@ public class TiTvVariantEvaluator extends VariantEvaluator {
|
|||
}
|
||||
}
|
||||
|
||||
public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( vc1 != null ) updateTiTv(vc1, false);
|
||||
if ( vc2 != null ) updateTiTv(vc2, true);
|
||||
|
||||
return null; // we don't capture any intersting sites
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
|
|
|
|||
|
|
@ -4,17 +4,25 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.util.*;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.io.File;
|
||||
|
||||
// todo -- evalations should support comment lines
|
||||
// todo -- add Mendelian variable explanations (nDeNovo and nMissingTransmissions)
|
||||
|
||||
// todo -- interesting sites should support VCF generation, so that FN, FP, DeNovo, etc calls get put into a single VCF and
|
||||
// todo -- an explanation added to the INFO field as to why it showed up there.
|
||||
|
||||
//
|
||||
// todo -- write a simple column table system and have the evaluators return this instead of the list<list<string>> objects
|
||||
|
|
@ -28,6 +36,12 @@ import java.lang.reflect.InvocationTargetException;
|
|||
// todo -- create JEXL context implementing object that simply looks up values for JEXL evaluations. Throws error for unknown fields
|
||||
//
|
||||
|
||||
// todo -- port over SNP density evaluator.
|
||||
|
||||
// todo -- add subgroup of known variants as to those at hapmap sites [it's in the dbSNP record]
|
||||
|
||||
// todo -- deal with performance issues with variant contexts
|
||||
|
||||
//
|
||||
// Todo -- should really include argument parsing @annotations from subclass in this walker. Very
|
||||
// todo -- useful general capability. Right now you need to add arguments to VariantEval2 to handle new
|
||||
|
|
@ -36,7 +50,8 @@ import java.lang.reflect.InvocationTargetException;
|
|||
|
||||
//
|
||||
// todo -- the whole organization only supports a single eval x comp evaluation. We need to instantiate
|
||||
// todo -- new contexts for each comparison object too!
|
||||
// todo -- new contexts for each comparison object too! The output table should be clear as to what the "comp"
|
||||
// todo -- variable is in the analysis
|
||||
//
|
||||
|
||||
//
|
||||
|
|
@ -73,12 +88,9 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
@Argument(shortName="MVQ", fullName="MendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
|
||||
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
|
||||
|
||||
@Argument(shortName="PI", fullName="PrintInterestingSites", doc="If provided, interesting sites in the unselected, called set will be printed", required=false)
|
||||
protected boolean PRINT_INTERESTING_SITES = false;
|
||||
@Argument(shortName="outputVCF", fullName="InterestingSitesVCF", doc="If provided, interesting sites emitted to this vcf and the INFO field annotated as to why they are interesting", required=false)
|
||||
protected String outputVCF = null;
|
||||
|
||||
// @Argument(shortName="PIA", fullName="PrintAllInterestingSites", doc="If provided, interesting sites will be printed for all sets. Verbose", required=false)
|
||||
// protected boolean PRINT_ALL_INTERESTING_SITES = false;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// private walker data
|
||||
|
|
@ -89,12 +101,14 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
private class EvaluationContext extends HashMap<String, Set<VariantEvaluator>> {
|
||||
// useful for typing
|
||||
public String trackName, contextName;
|
||||
public boolean enableInterestingSiteCaptures = false;
|
||||
VariantContextUtils.JexlVCMatchExp selectExp;
|
||||
|
||||
public EvaluationContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp) {
|
||||
public EvaluationContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp, boolean enableInterestingSiteCaptures) {
|
||||
this.trackName = trackName;
|
||||
this.contextName = contextName;
|
||||
this.selectExp = selectExp;
|
||||
this.enableInterestingSiteCaptures = enableInterestingSiteCaptures;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -112,6 +126,10 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
private List<String> variantEvaluationNames = new ArrayList<String>();
|
||||
private List<Class<? extends VariantEvaluator>> evaluationClasses = null;
|
||||
|
||||
/** output writer for interesting sites */
|
||||
private VCFWriter writer = null;
|
||||
private boolean wroteHeader = false;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// initialize
|
||||
|
|
@ -136,11 +154,14 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
determineContextNamePartSizes();
|
||||
|
||||
if ( outputVCF != null )
|
||||
writer = new VCFWriter(new File(outputVCF));
|
||||
}
|
||||
|
||||
private void determineAllEvalations() {
|
||||
evaluationClasses = PackageUtils.getClassesImplementingInterface(VariantEvaluator.class);
|
||||
for ( VariantEvaluator e : instantiateEvalationsSet(false, null) ) {
|
||||
for ( VariantEvaluator e : instantiateEvalationsSet() ) {
|
||||
// for collecting purposes
|
||||
variantEvaluationNames.add(e.getName());
|
||||
logger.debug("Including VariantEvaluator " + e.getName() + " of class " + e.getClass());
|
||||
|
|
@ -149,7 +170,7 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
Collections.sort(variantEvaluationNames);
|
||||
}
|
||||
|
||||
private Set<VariantEvaluator> instantiateEvalationsSet(boolean baseline, String name) {
|
||||
private Set<VariantEvaluator> instantiateEvalationsSet() {
|
||||
Set<VariantEvaluator> evals = new HashSet<VariantEvaluator>();
|
||||
Object[] args = new Object[]{this};
|
||||
Class[] argTypes = new Class[]{this.getClass()};
|
||||
|
|
@ -158,7 +179,6 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
try {
|
||||
Constructor constructor = c.getConstructor(argTypes);
|
||||
VariantEvaluator eval = (VariantEvaluator)constructor.newInstance(args);
|
||||
if ( baseline ) eval.printInterestingSites(name);
|
||||
evals.add(eval);
|
||||
} catch (InstantiationException e) {
|
||||
throw new StingException(String.format("Cannot instantiate class '%s': must be concrete class", c.getSimpleName()));
|
||||
|
|
@ -175,18 +195,25 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
private void addNewContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp) {
|
||||
EvaluationContext group = new EvaluationContext(trackName, contextName, selectExp);
|
||||
EvaluationContext group = new EvaluationContext(trackName, contextName, selectExp, selectExp == null);
|
||||
|
||||
for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) {
|
||||
for ( String subname : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) {
|
||||
String name = subname + "." + filteredName;
|
||||
group.put(name, instantiateEvalationsSet(subname == ALL_SET_NAME && filteredName == RETAINED_SET_NAME, trackName + "." + (selectExp == null ? "all" : selectExp.name) + "." + name));
|
||||
//System.out.printf("Creating group name: " + name);
|
||||
group.put(name, instantiateEvalationsSet());
|
||||
//group.put(name, instantiateEvalationsSet(subname == ALL_SET_NAME && filteredName == RETAINED_SET_NAME, trackName + "." + (selectExp == null ? "all" : selectExp.name) + "." + name));
|
||||
}
|
||||
}
|
||||
|
||||
contexts.put(contextName, group);
|
||||
}
|
||||
|
||||
private boolean captureInterestingSitesOfEvalSet(String name) {
|
||||
//System.out.printf("checking %s%n", name);
|
||||
return name.contains(ALL_SET_NAME + "." + RETAINED_SET_NAME);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// map
|
||||
|
|
@ -199,17 +226,19 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
if ( ref == null )
|
||||
return 0;
|
||||
|
||||
Map<String, VariantContext> comps = getCompVariantContexts(tracker, context);
|
||||
Collection<VariantContext> comps = getCompVariantContexts(tracker, context);
|
||||
|
||||
// to enable walking over pairs where eval or comps have no elements
|
||||
for ( EvaluationContext group : contexts.values() ) {
|
||||
VariantContext vc = getVariantContext(group.trackName, tracker, context);
|
||||
VariantContext vc = getEvalContext(group.trackName, tracker, context);
|
||||
|
||||
//logger.debug(String.format("Updating %s of %s with variant", group.name, vc));
|
||||
|
||||
for ( Map.Entry<String, Set<VariantEvaluator>> namedEvaluations : group.entrySet() ) {
|
||||
String evaluationName = namedEvaluations.getKey();
|
||||
Set<VariantEvaluator> evaluations = namedEvaluations.getValue();
|
||||
boolean evalWantsVC = applyVCtoEvaluation(evaluationName, vc, comps, group);
|
||||
List<String> interestingReasons = new ArrayList<String>();
|
||||
|
||||
for ( VariantEvaluator evaluation : evaluations ) {
|
||||
if ( evaluation.enabled() ) {
|
||||
|
|
@ -219,12 +248,15 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
// now call the single or paired update function
|
||||
switch ( evaluation.getComparisonOrder() ) {
|
||||
case 1:
|
||||
if ( evalWantsVC && vc != null )
|
||||
evaluation.update1(vc, tracker, ref, context);
|
||||
if ( evalWantsVC && vc != null ) {
|
||||
String interesting = evaluation.update1(vc, tracker, ref, context);
|
||||
if ( interesting != null ) interestingReasons.add(interesting);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for ( VariantContext comp : comps.values() ) {
|
||||
evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context);
|
||||
for ( VariantContext comp : comps ) {
|
||||
String interesting = evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context);
|
||||
if ( interesting != null ) interestingReasons.add(interesting);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
|
@ -232,16 +264,53 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( group.enableInterestingSiteCaptures && captureInterestingSitesOfEvalSet(evaluationName) )
|
||||
writeInterestingSite(interestingReasons, vc);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private boolean applyVCtoEvaluation(String evaluationName, VariantContext vc, Map<String, VariantContext> comps, EvaluationContext group) {
|
||||
private void writeInterestingSite(List<String> interestingReasons, VariantContext vc) {
|
||||
if ( writer != null && interestingReasons.size() > 0 ) {
|
||||
MutableVariantContext mvc = new MutableVariantContext(vc);
|
||||
|
||||
for ( String why : interestingReasons ) {
|
||||
String key, value;
|
||||
String[] parts = why.split("=");
|
||||
|
||||
switch ( parts.length ) {
|
||||
case 1:
|
||||
key = parts[0];
|
||||
value = "1";
|
||||
break;
|
||||
case 2:
|
||||
key = parts[0];
|
||||
value = parts[1];
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("BUG: saw a interesting site reason sting with multiple = signs " + why);
|
||||
}
|
||||
|
||||
mvc.putAttribute(key, value);
|
||||
}
|
||||
|
||||
if ( ! wroteHeader ) {
|
||||
writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc));
|
||||
wroteHeader = true;
|
||||
}
|
||||
|
||||
writer.addRecord(VariantContextAdaptors.toVCF(mvc));
|
||||
//interestingReasons.clear();
|
||||
}
|
||||
}
|
||||
|
||||
private boolean applyVCtoEvaluation(String evaluationName, VariantContext vc, Collection<VariantContext> comps, EvaluationContext group) {
|
||||
if ( vc == null )
|
||||
return true;
|
||||
|
||||
|
||||
if ( evaluationName.contains(FILTERED_SET_NAME) && vc.isNotFiltered() )
|
||||
return false;
|
||||
|
||||
|
|
@ -261,11 +330,15 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
return true;
|
||||
}
|
||||
|
||||
private boolean vcIsKnown(VariantContext vc, Map<String, VariantContext> comps, String[] knownNames ) {
|
||||
for ( String knownName : knownNames ) {
|
||||
VariantContext known = comps.get(knownName);
|
||||
if ( known != null && known.isNotFiltered() && known.getType() == vc.getType() )
|
||||
return true;
|
||||
private boolean vcIsKnown(VariantContext vc, Collection<VariantContext> comps, String[] knownNames ) {
|
||||
for ( VariantContext comp : comps ) {
|
||||
if ( comp.isNotFiltered() && comp.getType() == vc.getType() ) {
|
||||
for ( String knownName : knownNames ) {
|
||||
if ( comp.getName().equals(knownName) ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
@ -278,20 +351,23 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
//
|
||||
//logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec));
|
||||
|
||||
private Map<String, VariantContext> getCompVariantContexts(RefMetaDataTracker tracker, AlignmentContext context) {
|
||||
Map<String, VariantContext> comps = new HashMap<String, VariantContext>();
|
||||
|
||||
for ( String compName : compNames ) {
|
||||
comps.put(compName, getVariantContext(compName, tracker, context));
|
||||
}
|
||||
private Collection<VariantContext> getCompVariantContexts(RefMetaDataTracker tracker, AlignmentContext context) {
|
||||
// todo -- we need to deal with dbSNP where there can be multiple records at the same start site. A potential solution is to
|
||||
// todo -- allow the variant evaluation to specify the type of variants it wants to see and only take the first such record at a site
|
||||
Collection<VariantContext> comps = tracker.getVariantContexts(compNames, null, context.getLocation(), true, true);
|
||||
|
||||
// todo -- remove me when the loop works correctly for comparisons of eval x comp for each comp
|
||||
if ( comps.size() > 1 ) throw new StingException("VariantEval2 currently only supports comparisons of N eval tracks vs. a single comparison track. Yes, I know...");
|
||||
return comps;
|
||||
}
|
||||
|
||||
private VariantContext getVariantContext(String name, RefMetaDataTracker tracker, AlignmentContext context) {
|
||||
return tracker.getVariantContext(name, null, context.getLocation(), true);
|
||||
private VariantContext getEvalContext(String name, RefMetaDataTracker tracker, AlignmentContext context) {
|
||||
Collection<VariantContext> contexts = tracker.getVariantContexts(name, null, context.getLocation(), true, false);
|
||||
|
||||
if ( context.size() > 1 )
|
||||
throw new StingException("Found multiple variant contexts at " + context.getLocation());
|
||||
|
||||
return contexts.size() == 1 ? contexts.iterator().next() : null;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -338,7 +414,7 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private String formatKeyword(String keyWord) {
|
||||
//System.out.printf("keyword %s%n", keyWord);
|
||||
|
||||
|
|
|
|||
|
|
@ -16,24 +16,25 @@ import java.util.ArrayList;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
abstract class VariantEvaluator {
|
||||
protected boolean accumulateInterestingSites = false, printInterestingSites = false;
|
||||
protected String interestingSitePrefix = null;
|
||||
// protected boolean accumulateInterestingSites = false, printInterestingSites = false;
|
||||
// protected String interestingSitePrefix = null;
|
||||
protected boolean processedASite = false;
|
||||
protected List<VariantContext> interestingSites = new ArrayList<VariantContext>();
|
||||
// protected List<VariantContext> interestingSites = new ArrayList<VariantContext>();
|
||||
|
||||
public abstract String getName();
|
||||
|
||||
// do we want to keep track of things that are interesting
|
||||
public void accumulateInterestingSites(boolean enable) { accumulateInterestingSites = enable; }
|
||||
public void printInterestingSites(String prefix) { printInterestingSites = true; interestingSitePrefix = prefix; }
|
||||
public boolean isAccumulatingInterestingSites() { return accumulateInterestingSites; }
|
||||
public List<VariantContext> getInterestingSites() { return interestingSites; }
|
||||
protected void addInterestingSite(String why, VariantContext vc) {
|
||||
if ( accumulateInterestingSites )
|
||||
interestingSites.add(vc);
|
||||
if ( printInterestingSites )
|
||||
System.out.printf("%40s %s%n", interestingSitePrefix, why);
|
||||
}
|
||||
// public void accumulateInterestingSites(boolean enable) { accumulateInterestingSites = enable; }
|
||||
// public void printInterestingSites(String prefix) { printInterestingSites = true; interestingSitePrefix = prefix; }
|
||||
// public boolean isAccumulatingInterestingSites() { return accumulateInterestingSites; }
|
||||
// public List<VariantContext> getInterestingSites() { return interestingSites; }
|
||||
|
||||
// protected void addInterestingSite(String why, VariantContext vc) {
|
||||
// if ( accumulateInterestingSites )
|
||||
// interestingSites.add(vc);
|
||||
// if ( printInterestingSites )
|
||||
// System.out.printf("%40s %s%n", interestingSitePrefix, why);
|
||||
// }
|
||||
|
||||
public abstract boolean enabled();
|
||||
//public boolean processedAnySites() { return processedASite; }
|
||||
|
|
@ -43,9 +44,17 @@ abstract class VariantEvaluator {
|
|||
public abstract int getComparisonOrder();
|
||||
|
||||
/** called at all sites, regardless of eval context itself; useful for counting processed bases */
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
|
||||
public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
|
||||
public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
}
|
||||
|
||||
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public void finalize() {}
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,8 @@ public class BeagleTrioToVCFWalker extends RodWalker<VariantContext, Long> {
|
|||
@Argument(shortName="eth", fullName="excludeTripleHets", doc="If provide, sites that are triple hets calls will not be phased, regardless of Beagle's value", required=false)
|
||||
protected boolean dontPhaseTripleHets = false;
|
||||
|
||||
int nTripletHets = 0;
|
||||
|
||||
private MendelianViolationEvaluator.TrioStructure trio = null;
|
||||
|
||||
private VCFWriter writer;
|
||||
|
|
@ -81,8 +83,10 @@ public class BeagleTrioToVCFWalker extends RodWalker<VariantContext, Long> {
|
|||
Genotype unphasedDad = unphased.getGenotype(trio.dad);
|
||||
Genotype unphasedKid = unphased.getGenotype(trio.child);
|
||||
|
||||
if ( dontPhaseTripleHets && unphasedMom.isHet() && unphasedDad.isHet() && unphasedKid.isHet() )
|
||||
if ( dontPhaseTripleHets && unphasedMom.isHet() && unphasedDad.isHet() && unphasedKid.isHet() ) {
|
||||
nTripletHets++;
|
||||
return unphased;
|
||||
}
|
||||
else {
|
||||
Allele momTrans = unphased.getAllele(momBgl.get(0));
|
||||
Allele momUntrans = unphased.getAllele(momBgl.get(1));
|
||||
|
|
@ -111,8 +115,8 @@ public class BeagleTrioToVCFWalker extends RodWalker<VariantContext, Long> {
|
|||
return sum;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
//logger.info(String.format("Saw %d raw SNPs", result.nVariants));
|
||||
public void onTraversalDone(Long result) {
|
||||
logger.info(String.format("Ignored phasing of %d het/het/het genotypes", nTripletHets));
|
||||
//logger.info(String.format("Converted %d (%.2f%%) of these sites", result.nConverted, (100.0 * result.nConverted) / result.nVariants));
|
||||
}
|
||||
|
||||
|
|
@ -126,4 +130,4 @@ public class BeagleTrioToVCFWalker extends RodWalker<VariantContext, Long> {
|
|||
|
||||
return prevReduce;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,48 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval2;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.io.File;
|
||||
|
||||
public class VariantEval2IntegrationTest extends WalkerTest {
|
||||
private static String cmdRoot = "-T VariantEval2" +
|
||||
" -R " + oneKGLocation + "reference/human_b36_both.fasta";
|
||||
|
||||
private static String root = cmdRoot +
|
||||
" -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
|
||||
" -B eval,VCF,/humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf";
|
||||
|
||||
static HashMap<String, String> expectations = new HashMap<String, String>();
|
||||
static {
|
||||
expectations.put("-L 1:1-10,000,000", "e7dba09b1856b9be86816939596a5062");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "c170c672ca2ef86068cc5dee9aaac022");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVE2Simple() {
|
||||
|
||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||
String extraArgs = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testVE2Simple", spec);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVE2WriteVCF() {
|
||||
String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";
|
||||
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s",
|
||||
2,
|
||||
Arrays.asList("c53d7638df2d7440dee1fd274d1f6384", "9ec81f7389c0971e44e4b8d2d4af3008"));
|
||||
executeTest("testVE2WriteVCF", spec);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue