improvements to ve2, including more meaningful mendelian violation counting. Support for VCF emitted interesting sites, annotated according to the evaluations themselves. Basic intergration test for VE2 started

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2819 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-02-10 16:12:29 +00:00
parent 3de72daa88
commit c66861746a
9 changed files with 238 additions and 117 deletions

View File

@ -247,11 +247,18 @@ public class RefMetaDataTracker {
* @return
*/
public Collection<VariantContext> getVariantContexts(String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
RODRecordList<ReferenceOrderedDatum> rodList = getTrackData(name, null);
return getVariantContexts(Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
}
public Collection<VariantContext> getVariantContexts(Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
Collection<VariantContext> contexts = new ArrayList<VariantContext>();
if ( rodList != null )
addVariantContexts(contexts, rodList, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
for ( String name : names ) {
RODRecordList<ReferenceOrderedDatum> rodList = getTrackData(name, null);
if ( rodList != null )
addVariantContexts(contexts, rodList, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
}
return contexts;
}
@ -271,8 +278,10 @@ public class RefMetaDataTracker {
if ( contexts.size() > 1 )
throw new StingException("Requested a single VariantContext object for track " + name + " but multiple variants were present at position " + curLocation);
return contexts.iterator().next();
else if ( contexts.size() == 0 )
return null;
else
return contexts.iterator().next();
}
private void addVariantContexts(Collection<VariantContext> contexts, RODRecordList<ReferenceOrderedDatum> rodList, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {

View File

@ -43,11 +43,11 @@ public class CountVariants extends VariantEvaluator {
return 1; // we only need to see each eval track
}
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nProcessedLoci += context.getSkippedBases() + 1;
}
public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
//nProcessedLoci++;
nCalledLoci++;
@ -70,6 +70,8 @@ public class CountVariants extends VariantEvaluator {
default: throw new StingException("BUG: Unexpected genotype type: " + g);
}
}
return null; // we don't capture any intersting sites
}
public String toString() {

View File

@ -91,7 +91,7 @@ public class DbSNPPercentage extends VariantEvaluator {
public double dbSNPRate() { return rate(nSNPsAtdbSNPs(), nEvalSNPs()); }
public double concordanceRate() { return rate(nConcordant(), nSNPsAtdbSNPs()); }
public void update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
public String update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered();
boolean evalIsGood = eval != null && eval.isSNP();
@ -104,5 +104,7 @@ public class DbSNPPercentage extends VariantEvaluator {
if ( ! discordantP(eval, dbsnp) ) // count whether we're concordant or not with the dbSNP value
nConcordant++;
}
return null; // we don't capture any interesting sites
}
}

View File

@ -40,10 +40,12 @@ import java.util.regex.Matcher;
* Check that the child is one of these.
*/
public class MendelianViolationEvaluator extends VariantEvaluator {
long nVariants, nViolations, nOverCalls, nUnderCalls;
long nVariants, nViolations;
TrioStructure trio;
VariantEval2Walker parent;
long KidHomRef_ParentHomVar, KidHet_ParentsHomRef, KidHet_ParentsHomVar, KidHomVar_ParentHomRef;
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
public static class TrioStructure {
@ -90,7 +92,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
return 1; // we only need to see each eval track
}
public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( vc.isBiallelic() && vc.hasGenotypes() ) { // todo -- currently limited to biallelic loci
nVariants++;
@ -105,61 +107,34 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
nViolations++;
String label = null;
switch ( getViolationType( vc, momG, dadG, childG ) ) {
case UNDER_CALL:
nUnderCalls++;
label = "under_called";
break;
case OVER_CALL:
nOverCalls++;
label = "over_called";
break;
default:
throw new StingException("BUG: unexpected violation type at " + vc);
if ( childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar() )) {
label = "KidHomRef_ParentHomVar";
KidHomRef_ParentHomVar++;
} else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef()) ) {
label = "KidHet_ParentsHomRef";
KidHet_ParentsHomRef++;
} else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar()) ) {
label = "KidHet_ParentsHomVar";
KidHet_ParentsHomVar++;
} else if (childG.isHomVar() && (momG.isHomRef() || dadG.isHomRef())) {
label = "KidHomVar_ParentHomRef";
KidHomVar_ParentHomRef++;
} else {
throw new StingException("BUG: unexpected child genotype class " + childG);
}
String why = String.format("Mendelian violation %s: at %s m=%s d=%s c=%s", label, vc.getLocation(), momG.toBriefString(), dadG.toBriefString(), childG.toBriefString());
addInterestingSite(why , vc);
return label;
}
}
}
}
/**
* Are we under or over calling?
*
* Assuming this is a bialleic locus, we then have 2 alleles A and B. There are really two types of violations:
*
* Undercall: where the child is A/A but parent genotypes imply that child must carry at least one B allele
* Overall: where the child carries a B allele but this B allele couldn't have been inherited from either parent
*
* The way to determine this is to look at mom and dad separately. If the child doesn't carry at least one
* allele from each parent, it's an under calls. Otherwise it's an overcall.
*/
public ViolationType getViolationType(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG ) {
switch ( childG.getType() ) {
case HOM_REF:
return ViolationType.UNDER_CALL; // if you have to undercalled as a hom ref child
case HET:
// the only two violations of a het is where both parents are hom. If they are hom ref, you overcalled,
// otherwise you undercalled
return momG.isHomRef() ? ViolationType.OVER_CALL : ViolationType.UNDER_CALL;
case HOM_VAR:
return ViolationType.OVER_CALL; // if you have to overcalled as a hom var child
default:
throw new StingException("BUG: unexpected child genotype class " + childG);
}
return null; // we don't capture any intersting sites
}
private boolean includeGenotype(Genotype g) {
return g.getNegLog10PError() > getQThreshold() && g.isCalled();
}
private enum ViolationType {
UNDER_CALL, OVER_CALL
}
public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG ) {
//VariantContext momVC = vc.subContextFromGenotypes(momG);
//VariantContext dadVC = vc.subContextFromGenotypes(dadG);
@ -183,17 +158,11 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
}
private String summaryLine() {
return String.format("%d %d %.2e %d %.2e %.2f %d %.2e %.2f",
nVariants, nViolations, rate(nViolations, nVariants),
nOverCalls, rate(nOverCalls, nVariants), ratio(nOverCalls, nViolations),
nUnderCalls, rate(nUnderCalls, nVariants), ratio(nUnderCalls, nViolations));
return String.format("%d %d %d %d %d %d", nVariants, nViolations, KidHomRef_ParentHomVar, KidHet_ParentsHomRef, KidHet_ParentsHomVar, KidHomVar_ParentHomRef);
}
private static List<String> HEADER =
Arrays.asList("nVariants",
"nViolations", "violationRate",
"nOverCalls", "overCallRate", "overCallFraction",
"nUnderCalls", "underCallRate", "underCallFraction");
Arrays.asList("nVariants", "nViolations", "KidHomRef_ParentHomVar", "KidHet_ParentsHomRef", "KidHet_ParentsHomVar", "KidHomVar_ParentHomRef");
// making it a table
public List<String> getTableHeader() {
@ -203,4 +172,4 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
public List<List<String>> getTableRows() {
return Arrays.asList(Arrays.asList(summaryLine().split(" ")));
}
}
}

View File

@ -36,9 +36,11 @@ public class TiTvVariantEvaluator extends VariantEvaluator {
}
}
public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( vc1 != null ) updateTiTv(vc1, false);
if ( vc2 != null ) updateTiTv(vc2, true);
return null; // we don't capture any intersting sites
}
public String toString() {

View File

@ -4,17 +4,25 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.apache.log4j.Logger;
import java.util.*;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.io.File;
// todo -- evalations should support comment lines
// todo -- add Mendelian variable explanations (nDeNovo and nMissingTransmissions)
// todo -- interesting sites should support VCF generation, so that FN, FP, DeNovo, etc calls get put into a single VCF and
// todo -- an explanation added to the INFO field as to why it showed up there.
//
// todo -- write a simple column table system and have the evaluators return this instead of the list<list<string>> objects
@ -28,6 +36,12 @@ import java.lang.reflect.InvocationTargetException;
// todo -- create JEXL context implementing object that simply looks up values for JEXL evaluations. Throws error for unknown fields
//
// todo -- port over SNP density evaluator.
// todo -- add subgroup of known variants as to those at hapmap sites [it's in the dbSNP record]
// todo -- deal with performance issues with variant contexts
//
// Todo -- should really include argument parsing @annotations from subclass in this walker. Very
// todo -- useful general capability. Right now you need to add arguments to VariantEval2 to handle new
@ -36,7 +50,8 @@ import java.lang.reflect.InvocationTargetException;
//
// todo -- the whole organization only supports a single eval x comp evaluation. We need to instantiate
// todo -- new contexts for each comparison object too!
// todo -- new contexts for each comparison object too! The output table should be clear as to what the "comp"
// todo -- variable is in the analysis
//
//
@ -73,12 +88,9 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
@Argument(shortName="MVQ", fullName="MendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
@Argument(shortName="PI", fullName="PrintInterestingSites", doc="If provided, interesting sites in the unselected, called set will be printed", required=false)
protected boolean PRINT_INTERESTING_SITES = false;
@Argument(shortName="outputVCF", fullName="InterestingSitesVCF", doc="If provided, interesting sites emitted to this vcf and the INFO field annotated as to why they are interesting", required=false)
protected String outputVCF = null;
// @Argument(shortName="PIA", fullName="PrintAllInterestingSites", doc="If provided, interesting sites will be printed for all sets. Verbose", required=false)
// protected boolean PRINT_ALL_INTERESTING_SITES = false;
// --------------------------------------------------------------------------------------------------------------
//
// private walker data
@ -89,12 +101,14 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
private class EvaluationContext extends HashMap<String, Set<VariantEvaluator>> {
// useful for typing
public String trackName, contextName;
public boolean enableInterestingSiteCaptures = false;
VariantContextUtils.JexlVCMatchExp selectExp;
public EvaluationContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp) {
public EvaluationContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp, boolean enableInterestingSiteCaptures) {
this.trackName = trackName;
this.contextName = contextName;
this.selectExp = selectExp;
this.enableInterestingSiteCaptures = enableInterestingSiteCaptures;
}
}
@ -112,6 +126,10 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
private List<String> variantEvaluationNames = new ArrayList<String>();
private List<Class<? extends VariantEvaluator>> evaluationClasses = null;
/** output writer for interesting sites */
private VCFWriter writer = null;
private boolean wroteHeader = false;
// --------------------------------------------------------------------------------------------------------------
//
// initialize
@ -136,11 +154,14 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
}
determineContextNamePartSizes();
if ( outputVCF != null )
writer = new VCFWriter(new File(outputVCF));
}
private void determineAllEvalations() {
evaluationClasses = PackageUtils.getClassesImplementingInterface(VariantEvaluator.class);
for ( VariantEvaluator e : instantiateEvalationsSet(false, null) ) {
for ( VariantEvaluator e : instantiateEvalationsSet() ) {
// for collecting purposes
variantEvaluationNames.add(e.getName());
logger.debug("Including VariantEvaluator " + e.getName() + " of class " + e.getClass());
@ -149,7 +170,7 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
Collections.sort(variantEvaluationNames);
}
private Set<VariantEvaluator> instantiateEvalationsSet(boolean baseline, String name) {
private Set<VariantEvaluator> instantiateEvalationsSet() {
Set<VariantEvaluator> evals = new HashSet<VariantEvaluator>();
Object[] args = new Object[]{this};
Class[] argTypes = new Class[]{this.getClass()};
@ -158,7 +179,6 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
try {
Constructor constructor = c.getConstructor(argTypes);
VariantEvaluator eval = (VariantEvaluator)constructor.newInstance(args);
if ( baseline ) eval.printInterestingSites(name);
evals.add(eval);
} catch (InstantiationException e) {
throw new StingException(String.format("Cannot instantiate class '%s': must be concrete class", c.getSimpleName()));
@ -175,18 +195,25 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
}
private void addNewContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp) {
EvaluationContext group = new EvaluationContext(trackName, contextName, selectExp);
EvaluationContext group = new EvaluationContext(trackName, contextName, selectExp, selectExp == null);
for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) {
for ( String subname : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) {
String name = subname + "." + filteredName;
group.put(name, instantiateEvalationsSet(subname == ALL_SET_NAME && filteredName == RETAINED_SET_NAME, trackName + "." + (selectExp == null ? "all" : selectExp.name) + "." + name));
//System.out.printf("Creating group name: " + name);
group.put(name, instantiateEvalationsSet());
//group.put(name, instantiateEvalationsSet(subname == ALL_SET_NAME && filteredName == RETAINED_SET_NAME, trackName + "." + (selectExp == null ? "all" : selectExp.name) + "." + name));
}
}
contexts.put(contextName, group);
}
private boolean captureInterestingSitesOfEvalSet(String name) {
//System.out.printf("checking %s%n", name);
return name.contains(ALL_SET_NAME + "." + RETAINED_SET_NAME);
}
// --------------------------------------------------------------------------------------------------------------
//
// map
@ -199,17 +226,19 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
if ( ref == null )
return 0;
Map<String, VariantContext> comps = getCompVariantContexts(tracker, context);
Collection<VariantContext> comps = getCompVariantContexts(tracker, context);
// to enable walking over pairs where eval or comps have no elements
for ( EvaluationContext group : contexts.values() ) {
VariantContext vc = getVariantContext(group.trackName, tracker, context);
VariantContext vc = getEvalContext(group.trackName, tracker, context);
//logger.debug(String.format("Updating %s of %s with variant", group.name, vc));
for ( Map.Entry<String, Set<VariantEvaluator>> namedEvaluations : group.entrySet() ) {
String evaluationName = namedEvaluations.getKey();
Set<VariantEvaluator> evaluations = namedEvaluations.getValue();
boolean evalWantsVC = applyVCtoEvaluation(evaluationName, vc, comps, group);
List<String> interestingReasons = new ArrayList<String>();
for ( VariantEvaluator evaluation : evaluations ) {
if ( evaluation.enabled() ) {
@ -219,12 +248,15 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
// now call the single or paired update function
switch ( evaluation.getComparisonOrder() ) {
case 1:
if ( evalWantsVC && vc != null )
evaluation.update1(vc, tracker, ref, context);
if ( evalWantsVC && vc != null ) {
String interesting = evaluation.update1(vc, tracker, ref, context);
if ( interesting != null ) interestingReasons.add(interesting);
}
break;
case 2:
for ( VariantContext comp : comps.values() ) {
evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context);
for ( VariantContext comp : comps ) {
String interesting = evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context);
if ( interesting != null ) interestingReasons.add(interesting);
}
break;
default:
@ -232,16 +264,53 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
}
}
}
if ( group.enableInterestingSiteCaptures && captureInterestingSitesOfEvalSet(evaluationName) )
writeInterestingSite(interestingReasons, vc);
}
}
return 0;
}
private boolean applyVCtoEvaluation(String evaluationName, VariantContext vc, Map<String, VariantContext> comps, EvaluationContext group) {
private void writeInterestingSite(List<String> interestingReasons, VariantContext vc) {
if ( writer != null && interestingReasons.size() > 0 ) {
MutableVariantContext mvc = new MutableVariantContext(vc);
for ( String why : interestingReasons ) {
String key, value;
String[] parts = why.split("=");
switch ( parts.length ) {
case 1:
key = parts[0];
value = "1";
break;
case 2:
key = parts[0];
value = parts[1];
break;
default:
throw new IllegalStateException("BUG: saw a interesting site reason sting with multiple = signs " + why);
}
mvc.putAttribute(key, value);
}
if ( ! wroteHeader ) {
writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc));
wroteHeader = true;
}
writer.addRecord(VariantContextAdaptors.toVCF(mvc));
//interestingReasons.clear();
}
}
private boolean applyVCtoEvaluation(String evaluationName, VariantContext vc, Collection<VariantContext> comps, EvaluationContext group) {
if ( vc == null )
return true;
if ( evaluationName.contains(FILTERED_SET_NAME) && vc.isNotFiltered() )
return false;
@ -261,11 +330,15 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
return true;
}
private boolean vcIsKnown(VariantContext vc, Map<String, VariantContext> comps, String[] knownNames ) {
for ( String knownName : knownNames ) {
VariantContext known = comps.get(knownName);
if ( known != null && known.isNotFiltered() && known.getType() == vc.getType() )
return true;
private boolean vcIsKnown(VariantContext vc, Collection<VariantContext> comps, String[] knownNames ) {
for ( VariantContext comp : comps ) {
if ( comp.isNotFiltered() && comp.getType() == vc.getType() ) {
for ( String knownName : knownNames ) {
if ( comp.getName().equals(knownName) ) {
return true;
}
}
}
}
return false;
@ -278,20 +351,23 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
//
//logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec));
private Map<String, VariantContext> getCompVariantContexts(RefMetaDataTracker tracker, AlignmentContext context) {
Map<String, VariantContext> comps = new HashMap<String, VariantContext>();
for ( String compName : compNames ) {
comps.put(compName, getVariantContext(compName, tracker, context));
}
private Collection<VariantContext> getCompVariantContexts(RefMetaDataTracker tracker, AlignmentContext context) {
// todo -- we need to deal with dbSNP where there can be multiple records at the same start site. A potential solution is to
// todo -- allow the variant evaluation to specify the type of variants it wants to see and only take the first such record at a site
Collection<VariantContext> comps = tracker.getVariantContexts(compNames, null, context.getLocation(), true, true);
// todo -- remove me when the loop works correctly for comparisons of eval x comp for each comp
if ( comps.size() > 1 ) throw new StingException("VariantEval2 currently only supports comparisons of N eval tracks vs. a single comparison track. Yes, I know...");
return comps;
}
private VariantContext getVariantContext(String name, RefMetaDataTracker tracker, AlignmentContext context) {
return tracker.getVariantContext(name, null, context.getLocation(), true);
private VariantContext getEvalContext(String name, RefMetaDataTracker tracker, AlignmentContext context) {
Collection<VariantContext> contexts = tracker.getVariantContexts(name, null, context.getLocation(), true, false);
if ( context.size() > 1 )
throw new StingException("Found multiple variant contexts at " + context.getLocation());
return contexts.size() == 1 ? contexts.iterator().next() : null;
}
// --------------------------------------------------------------------------------------------------------------
@ -338,7 +414,7 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
}
}
}
private String formatKeyword(String keyWord) {
//System.out.printf("keyword %s%n", keyWord);

View File

@ -16,24 +16,25 @@ import java.util.ArrayList;
* To change this template use File | Settings | File Templates.
*/
abstract class VariantEvaluator {
protected boolean accumulateInterestingSites = false, printInterestingSites = false;
protected String interestingSitePrefix = null;
// protected boolean accumulateInterestingSites = false, printInterestingSites = false;
// protected String interestingSitePrefix = null;
protected boolean processedASite = false;
protected List<VariantContext> interestingSites = new ArrayList<VariantContext>();
// protected List<VariantContext> interestingSites = new ArrayList<VariantContext>();
public abstract String getName();
// do we want to keep track of things that are interesting
public void accumulateInterestingSites(boolean enable) { accumulateInterestingSites = enable; }
public void printInterestingSites(String prefix) { printInterestingSites = true; interestingSitePrefix = prefix; }
public boolean isAccumulatingInterestingSites() { return accumulateInterestingSites; }
public List<VariantContext> getInterestingSites() { return interestingSites; }
protected void addInterestingSite(String why, VariantContext vc) {
if ( accumulateInterestingSites )
interestingSites.add(vc);
if ( printInterestingSites )
System.out.printf("%40s %s%n", interestingSitePrefix, why);
}
// public void accumulateInterestingSites(boolean enable) { accumulateInterestingSites = enable; }
// public void printInterestingSites(String prefix) { printInterestingSites = true; interestingSitePrefix = prefix; }
// public boolean isAccumulatingInterestingSites() { return accumulateInterestingSites; }
// public List<VariantContext> getInterestingSites() { return interestingSites; }
// protected void addInterestingSite(String why, VariantContext vc) {
// if ( accumulateInterestingSites )
// interestingSites.add(vc);
// if ( printInterestingSites )
// System.out.printf("%40s %s%n", interestingSitePrefix, why);
// }
public abstract boolean enabled();
//public boolean processedAnySites() { return processedASite; }
@ -43,9 +44,17 @@ abstract class VariantEvaluator {
public abstract int getComparisonOrder();
/** called at all sites, regardless of eval context itself; useful for counting processed bases */
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
}
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return null;
}
public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return null;
}
public void finalize() {}

View File

@ -35,6 +35,8 @@ public class BeagleTrioToVCFWalker extends RodWalker<VariantContext, Long> {
@Argument(shortName="eth", fullName="excludeTripleHets", doc="If provide, sites that are triple hets calls will not be phased, regardless of Beagle's value", required=false)
protected boolean dontPhaseTripleHets = false;
int nTripletHets = 0;
private MendelianViolationEvaluator.TrioStructure trio = null;
private VCFWriter writer;
@ -81,8 +83,10 @@ public class BeagleTrioToVCFWalker extends RodWalker<VariantContext, Long> {
Genotype unphasedDad = unphased.getGenotype(trio.dad);
Genotype unphasedKid = unphased.getGenotype(trio.child);
if ( dontPhaseTripleHets && unphasedMom.isHet() && unphasedDad.isHet() && unphasedKid.isHet() )
if ( dontPhaseTripleHets && unphasedMom.isHet() && unphasedDad.isHet() && unphasedKid.isHet() ) {
nTripletHets++;
return unphased;
}
else {
Allele momTrans = unphased.getAllele(momBgl.get(0));
Allele momUntrans = unphased.getAllele(momBgl.get(1));
@ -111,8 +115,8 @@ public class BeagleTrioToVCFWalker extends RodWalker<VariantContext, Long> {
return sum;
}
public void onTraversalDone(Integer result) {
//logger.info(String.format("Saw %d raw SNPs", result.nVariants));
public void onTraversalDone(Long result) {
logger.info(String.format("Ignored phasing of %d het/het/het genotypes", nTripletHets));
//logger.info(String.format("Converted %d (%.2f%%) of these sites", result.nConverted, (100.0 * result.nConverted) / result.nVariants));
}
@ -126,4 +130,4 @@ public class BeagleTrioToVCFWalker extends RodWalker<VariantContext, Long> {
return prevReduce;
}
}
}

View File

@ -0,0 +1,48 @@
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval2;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.util.HashMap;
import java.util.Map;
import java.util.Arrays;
import java.util.List;
import java.io.File;
public class VariantEval2IntegrationTest extends WalkerTest {
private static String cmdRoot = "-T VariantEval2" +
" -R " + oneKGLocation + "reference/human_b36_both.fasta";
private static String root = cmdRoot +
" -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
" -B eval,VCF,/humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf";
static HashMap<String, String> expectations = new HashMap<String, String>();
static {
expectations.put("-L 1:1-10,000,000", "e7dba09b1856b9be86816939596a5062");
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "c170c672ca2ef86068cc5dee9aaac022");
}
@Test
public void testVE2Simple() {
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
String extraArgs = entry.getKey();
String md5 = entry.getValue();
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s",
1, // just one output file
Arrays.asList(md5));
executeTest("testVE2Simple", spec);
}
}
@Test
public void testVE2WriteVCF() {
String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s",
2,
Arrays.asList("c53d7638df2d7440dee1fd274d1f6384", "9ec81f7389c0971e44e4b8d2d4af3008"));
executeTest("testVE2WriteVCF", spec);
}
}