Parallelized VariantEval. Refactored output to support parallel output style. Minor improvements to testing framework to enable easy executeTestParallel to run -nt 1 and -nt 4 by default.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4574 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-10-26 20:21:38 +00:00
parent 8211cee0b2
commit b085648141
4 changed files with 84 additions and 72 deletions

View File

@ -278,7 +278,7 @@ public class GenotypeConcordance extends VariantEvaluator implements StandardEva
// todo -- method that gets the header (or samples) for the first eval sites?
if (missedValidationData.size() > MAX_MISSED_VALIDATION_DATA) {
if (!warnedAboutValidationData) {
logger.warn("Too many genotype sites missed before eval site appeared; ignoring");
//logger.warn("Too many genotype sites missed before eval site appeared; ignoring");
warnedAboutValidationData = true;
}
} else {

View File

@ -30,6 +30,8 @@ import org.broad.tribble.util.variantcontext.MutableVariantContext;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFConstants;
import org.broad.tribble.vcf.VCFWriter;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
@ -40,6 +42,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.ApplyVariantCuts;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.report.ReportMarshaller;
@ -49,6 +52,7 @@ import org.broadinstitute.sting.utils.report.utils.Node;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.classloader.PackageUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
@ -112,7 +116,7 @@ import java.util.*;
* General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ts/Tv ratios, and a lot more)
*/
@Reference(window=@Window(start=-50,stop=50))
public class VariantEvalWalker extends RodWalker<Integer, Integer> {
public class VariantEvalWalker extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
@Output
protected PrintStream out;
@ -162,7 +166,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
@Argument(shortName="MVQ", fullName="MendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
@Argument(shortName="outputVCF", fullName="InterestingSitesVCF", doc="If provided, interesting sites emitted to this vcf and the INFO field annotated as to why they are interesting", required=false)
@Output(shortName="outputVCF", fullName="InterestingSitesVCF", doc="If provided, interesting sites emitted to this vcf and the INFO field annotated as to why they are interesting", required=false)
protected VCFWriter writer = null;
@Argument(shortName="gcLog", fullName="GenotypeCocordanceLog", doc="If provided, sites with genotype concordance problems (e.g., FP and FNs) will be emitted ot this file", required=false)
@ -291,9 +295,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
// Dynamically determined variantEvaluation classes
private Set<Class<? extends VariantEvaluator>> evaluationClasses = null;
/** output writer for interesting sites */
private boolean wroteHeader = false;
// --------------------------------------------------------------------------------------------------------------
//
// initialize
@ -353,6 +354,12 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
throw new IllegalArgumentException("rsIDFile " + rsIDFile + " was given but associated max RSID build parameter wasn't available");
rsIDsToExclude = getrsIDsToExclude(new File(rsIDFile), maxRsIDBuild);
}
if ( writer != null ) {
Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), evalNames);
final VCFHeader vcfHeader = new VCFHeader(new HashSet<VCFHeaderLine>(), samples);
writer.writeHeader(vcfHeader);
}
}
private void listModulesAndExit() {
@ -514,48 +521,50 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
List<String> interestingReasons = new ArrayList<String>();
for ( VariantEvaluator evaluation : evaluations ) {
if ( evaluation.enabled() ) {
// we always call update0 in case the evaluation tracks things like number of bases covered
evaluation.update0(tracker, ref, context);
synchronized ( evaluation ) {
if ( evaluation.enabled() ) {
// we always call update0 in case the evaluation tracks things like number of bases covered
evaluation.update0(tracker, ref, context);
// the other updateN methods don't see a null context
if ( tracker == null )
continue;
// now call the single or paired update function
switch ( evaluation.getComparisonOrder() ) {
case 1:
if ( evalWantsVC && vc != null ) {
String interesting = evaluation.update1(vc, tracker, ref, context, group);
if ( interesting != null ) interestingReasons.add(interesting);
}
break;
case 2:
VariantContext comp = vcs.get(group.compTrackName);
if ( comp != null &&
minCompQualScore != NO_MIN_QUAL_SCORE &&
comp.hasNegLog10PError() &&
comp.getNegLog10PError() < (minCompQualScore / 10.0) )
comp = null;
String interesting = evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context, group );
/** TODO
-- for Eric: Fix me (current implementation causes GenotypeConcordance
to treat sites that don't match JEXL as no-calls)
String interesting = null;
if (evalWantsVC)
{
interesting = evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context, group );
}
**/
// the other updateN methods don't see a null context
if ( tracker == null )
continue;
// now call the single or paired update function
switch ( evaluation.getComparisonOrder() ) {
case 1:
if ( evalWantsVC && vc != null ) {
String interesting = evaluation.update1(vc, tracker, ref, context, group);
if ( interesting != null ) interestingReasons.add(interesting);
}
break;
case 2:
VariantContext comp = vcs.get(group.compTrackName);
if ( comp != null &&
minCompQualScore != NO_MIN_QUAL_SCORE &&
comp.hasNegLog10PError() &&
comp.getNegLog10PError() < (minCompQualScore / 10.0) )
comp = null;
String interesting = evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context, group );
/** TODO
-- for Eric: Fix me (current implementation causes GenotypeConcordance
to treat sites that don't match JEXL as no-calls)
String interesting = null;
if (evalWantsVC)
{
interesting = evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context, group );
}
**/
if ( interesting != null ) interestingReasons.add(interesting);
break;
default:
throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation);
break;
default:
throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation);
}
}
}
}
@ -592,10 +601,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
mvc.putAttribute(key, value);
}
if ( ! wroteHeader ) {
writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc));
wroteHeader = true;
}
writer.add(mvc, ref);
//interestingReasons.clear();
@ -715,6 +720,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
return point + sum;
}
public Integer treeReduce(Integer point, Integer sum) {
return point + sum;
}
public VariantEvaluator getEvalByName(String name, Set<VariantEvaluator> s) {
for ( VariantEvaluator e : s )
if ( e.getName().equals(name) )
@ -734,20 +743,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
}
}
// private String formatKeyword(String keyWord) {
// //System.out.printf("keyword %s%n", keyWord);
//
// StringBuilder s = new StringBuilder();
// int i = 0;
// for ( String part : keyWord.split("\\.") ) {
// //System.out.printf("part %s %d%n", part, nameSizes[i]);
// s.append(String.format("%" + nameSizes[i] + "s ", part));
// i++;
// }
//
// return s.toString();
// }
public void onTraversalDone(Integer result) {
// our report mashaller
ReportMarshaller marshaller;

View File

@ -246,6 +246,23 @@ public class WalkerTest extends BaseTest {
return false;
}
protected Pair<List<File>, List<String>> executeTestParallel(final String name, WalkerTestSpec spec) {
return executeTest(name, spec, Arrays.asList(1, 4));
}
protected Pair<List<File>, List<String>> executeTest(final String name, WalkerTestSpec spec, List<Integer> parallelThreads) {
String originalArgs = spec.args;
Pair<List<File>, List<String>> results = null;
for ( int nt : parallelThreads ) {
String extra = nt == 1 ? "" : (" -nt " + nt);
spec.args = originalArgs + extra;
results = executeTest(name + "-nt-" + nt, spec);
}
return results;
}
protected Pair<List<File>, List<String>> executeTest(final String name, WalkerTestSpec spec) {
ensureMd5DbDirectory(); // ensure the md5 directory exists

View File

@ -30,7 +30,7 @@ public class
for (String tests : testsEnumerations) {
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -o %s",
1, Arrays.asList("158ac8e6d32eb2ea1bbeebfa512965de"));
executeTest("testSelect1", spec);
executeTestParallel("testSelect1", spec);
}
}
@ -39,7 +39,7 @@ public class
String extraArgs = "-L 1:1-10,000,000";
WalkerTestSpec spec = new WalkerTestSpec( withSelect(withSelect(root, "DP < 50", "DP50"), "set==\"Intersection\"", "intersection") + " " + extraArgs + " -o %s",
1, Arrays.asList("cee96f61ffa1d042fe0c63550c508ec9"));
executeTest("testSelect2", spec);
executeTestParallel("testSelect2", spec);
}
@Test
@ -49,7 +49,7 @@ public class
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B:eval,VCF " + validationDataLocation + vcfFile + " -B:comp,VCF " + validationDataLocation + "GenotypeConcordanceComp.vcf -noStandard -E GenotypeConcordance -reportType CSV -o %s",
1,
Arrays.asList("7e9ce1b26cdeaa50705f5de163847638"));
executeTest("testVEGenotypeConcordance" + vcfFile, spec);
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
}
}
@ -67,7 +67,7 @@ public class
WalkerTestSpec spec = new WalkerTestSpec( tests + " " + extraArgs + " -o %s",
1, // just one output file
Arrays.asList(md5));
executeTest("testVESimple", spec);
executeTestParallel("testVESimple", spec);
}
}
}
@ -92,7 +92,7 @@ public class
WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs1 + extraArgs2 + " -o %s",
1, // just one output file
Arrays.asList(md5));
executeTest("testVEComplex", spec);
executeTestParallel("testVEComplex", spec);
}
}
}
@ -109,17 +109,17 @@ public class
String md5 = "d41d8cd98f00b204e9800998ecf8427e";
WalkerTestSpec spec = new WalkerTestSpec(vecmd, 1, Arrays.asList(md5));
executeTest("testVEGenomicallyAnnotated", spec);
executeTestParallel("testVEGenomicallyAnnotated", spec);
}
@Test
public void testVEWriteVCF() {
String extraArgs = "-L 1:1-10,000,000 -NO_HEADER -family NA19238+NA19239=NA19240 -MVQ 30 -E MendelianViolationEvaluator";
for (String tests : testsEnumerations) {
WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s",
WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s -NO_HEADER",
2,
Arrays.asList("6b97a019402b3984fead9a4e8b7c7c2a", "989bc30dea6c8a4cf771cd1b9fdab488"));
executeTest("testVEWriteVCF", spec);
executeTestParallel("testVEWriteVCF", spec);
}
}
@ -127,7 +127,7 @@ public class
public void testCompVsEvalAC() {
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -E GenotypeConcordance -B:evalYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.fake.genotypes.ac.test.vcf -reportType CSV";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("25a681855cb26e7380fbf1a93de0a41f"));
executeTest("testACDiscordanceAtAC1EvalAC2Comp",spec);
executeTestParallel("testACDiscordanceAtAC1EvalAC2Comp",spec);
}
private static String withSelect(String cmd, String select, String name) {