Parallelized VariantEval. Refactored output to support parallel output style. Minor improvements to testing framework to enable easy executeTestParallel to run -nt 1 and -nt 4 by default.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4574 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8211cee0b2
commit
b085648141
|
|
@ -278,7 +278,7 @@ public class GenotypeConcordance extends VariantEvaluator implements StandardEva
|
||||||
// todo -- method that gets the header (or samples) for the first eval sites?
|
// todo -- method that gets the header (or samples) for the first eval sites?
|
||||||
if (missedValidationData.size() > MAX_MISSED_VALIDATION_DATA) {
|
if (missedValidationData.size() > MAX_MISSED_VALIDATION_DATA) {
|
||||||
if (!warnedAboutValidationData) {
|
if (!warnedAboutValidationData) {
|
||||||
logger.warn("Too many genotype sites missed before eval site appeared; ignoring");
|
//logger.warn("Too many genotype sites missed before eval site appeared; ignoring");
|
||||||
warnedAboutValidationData = true;
|
warnedAboutValidationData = true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,8 @@ import org.broad.tribble.util.variantcontext.MutableVariantContext;
|
||||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
import org.broad.tribble.vcf.VCFConstants;
|
import org.broad.tribble.vcf.VCFConstants;
|
||||||
import org.broad.tribble.vcf.VCFWriter;
|
import org.broad.tribble.vcf.VCFWriter;
|
||||||
|
import org.broad.tribble.vcf.VCFHeader;
|
||||||
|
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||||
|
|
@ -40,6 +42,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Reference;
|
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.ApplyVariantCuts;
|
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.ApplyVariantCuts;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.utils.report.ReportMarshaller;
|
import org.broadinstitute.sting.utils.report.ReportMarshaller;
|
||||||
|
|
@ -49,6 +52,7 @@ import org.broadinstitute.sting.utils.report.utils.Node;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.classloader.PackageUtils;
|
import org.broadinstitute.sting.utils.classloader.PackageUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||||
|
|
@ -112,7 +116,7 @@ import java.util.*;
|
||||||
* General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ts/Tv ratios, and a lot more)
|
* General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ts/Tv ratios, and a lot more)
|
||||||
*/
|
*/
|
||||||
@Reference(window=@Window(start=-50,stop=50))
|
@Reference(window=@Window(start=-50,stop=50))
|
||||||
public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
public class VariantEvalWalker extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||||
@Output
|
@Output
|
||||||
protected PrintStream out;
|
protected PrintStream out;
|
||||||
|
|
||||||
|
|
@ -162,7 +166,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
||||||
@Argument(shortName="MVQ", fullName="MendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
|
@Argument(shortName="MVQ", fullName="MendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
|
||||||
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
|
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
|
||||||
|
|
||||||
@Argument(shortName="outputVCF", fullName="InterestingSitesVCF", doc="If provided, interesting sites emitted to this vcf and the INFO field annotated as to why they are interesting", required=false)
|
@Output(shortName="outputVCF", fullName="InterestingSitesVCF", doc="If provided, interesting sites emitted to this vcf and the INFO field annotated as to why they are interesting", required=false)
|
||||||
protected VCFWriter writer = null;
|
protected VCFWriter writer = null;
|
||||||
|
|
||||||
@Argument(shortName="gcLog", fullName="GenotypeCocordanceLog", doc="If provided, sites with genotype concordance problems (e.g., FP and FNs) will be emitted ot this file", required=false)
|
@Argument(shortName="gcLog", fullName="GenotypeCocordanceLog", doc="If provided, sites with genotype concordance problems (e.g., FP and FNs) will be emitted ot this file", required=false)
|
||||||
|
|
@ -291,9 +295,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
||||||
// Dynamically determined variantEvaluation classes
|
// Dynamically determined variantEvaluation classes
|
||||||
private Set<Class<? extends VariantEvaluator>> evaluationClasses = null;
|
private Set<Class<? extends VariantEvaluator>> evaluationClasses = null;
|
||||||
|
|
||||||
/** output writer for interesting sites */
|
|
||||||
private boolean wroteHeader = false;
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// initialize
|
// initialize
|
||||||
|
|
@ -353,6 +354,12 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
||||||
throw new IllegalArgumentException("rsIDFile " + rsIDFile + " was given but associated max RSID build parameter wasn't available");
|
throw new IllegalArgumentException("rsIDFile " + rsIDFile + " was given but associated max RSID build parameter wasn't available");
|
||||||
rsIDsToExclude = getrsIDsToExclude(new File(rsIDFile), maxRsIDBuild);
|
rsIDsToExclude = getrsIDsToExclude(new File(rsIDFile), maxRsIDBuild);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( writer != null ) {
|
||||||
|
Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), evalNames);
|
||||||
|
final VCFHeader vcfHeader = new VCFHeader(new HashSet<VCFHeaderLine>(), samples);
|
||||||
|
writer.writeHeader(vcfHeader);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void listModulesAndExit() {
|
private void listModulesAndExit() {
|
||||||
|
|
@ -514,6 +521,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
||||||
List<String> interestingReasons = new ArrayList<String>();
|
List<String> interestingReasons = new ArrayList<String>();
|
||||||
|
|
||||||
for ( VariantEvaluator evaluation : evaluations ) {
|
for ( VariantEvaluator evaluation : evaluations ) {
|
||||||
|
synchronized ( evaluation ) {
|
||||||
if ( evaluation.enabled() ) {
|
if ( evaluation.enabled() ) {
|
||||||
// we always call update0 in case the evaluation tracks things like number of bases covered
|
// we always call update0 in case the evaluation tracks things like number of bases covered
|
||||||
evaluation.update0(tracker, ref, context);
|
evaluation.update0(tracker, ref, context);
|
||||||
|
|
@ -559,6 +567,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ( tracker != null && group.enableInterestingSiteCaptures && captureInterestingSitesOfEvalSet(group) )
|
if ( tracker != null && group.enableInterestingSiteCaptures && captureInterestingSitesOfEvalSet(group) )
|
||||||
writeInterestingSite(interestingReasons, vc, ref.getBase());
|
writeInterestingSite(interestingReasons, vc, ref.getBase());
|
||||||
|
|
@ -592,10 +601,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
||||||
mvc.putAttribute(key, value);
|
mvc.putAttribute(key, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ! wroteHeader ) {
|
|
||||||
writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc));
|
|
||||||
wroteHeader = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
writer.add(mvc, ref);
|
writer.add(mvc, ref);
|
||||||
//interestingReasons.clear();
|
//interestingReasons.clear();
|
||||||
|
|
@ -715,6 +720,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
||||||
return point + sum;
|
return point + sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Integer treeReduce(Integer point, Integer sum) {
|
||||||
|
return point + sum;
|
||||||
|
}
|
||||||
|
|
||||||
public VariantEvaluator getEvalByName(String name, Set<VariantEvaluator> s) {
|
public VariantEvaluator getEvalByName(String name, Set<VariantEvaluator> s) {
|
||||||
for ( VariantEvaluator e : s )
|
for ( VariantEvaluator e : s )
|
||||||
if ( e.getName().equals(name) )
|
if ( e.getName().equals(name) )
|
||||||
|
|
@ -734,20 +743,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// private String formatKeyword(String keyWord) {
|
|
||||||
// //System.out.printf("keyword %s%n", keyWord);
|
|
||||||
//
|
|
||||||
// StringBuilder s = new StringBuilder();
|
|
||||||
// int i = 0;
|
|
||||||
// for ( String part : keyWord.split("\\.") ) {
|
|
||||||
// //System.out.printf("part %s %d%n", part, nameSizes[i]);
|
|
||||||
// s.append(String.format("%" + nameSizes[i] + "s ", part));
|
|
||||||
// i++;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return s.toString();
|
|
||||||
// }
|
|
||||||
|
|
||||||
public void onTraversalDone(Integer result) {
|
public void onTraversalDone(Integer result) {
|
||||||
// our report mashaller
|
// our report mashaller
|
||||||
ReportMarshaller marshaller;
|
ReportMarshaller marshaller;
|
||||||
|
|
|
||||||
|
|
@ -246,6 +246,23 @@ public class WalkerTest extends BaseTest {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected Pair<List<File>, List<String>> executeTestParallel(final String name, WalkerTestSpec spec) {
|
||||||
|
return executeTest(name, spec, Arrays.asList(1, 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Pair<List<File>, List<String>> executeTest(final String name, WalkerTestSpec spec, List<Integer> parallelThreads) {
|
||||||
|
String originalArgs = spec.args;
|
||||||
|
Pair<List<File>, List<String>> results = null;
|
||||||
|
|
||||||
|
for ( int nt : parallelThreads ) {
|
||||||
|
String extra = nt == 1 ? "" : (" -nt " + nt);
|
||||||
|
spec.args = originalArgs + extra;
|
||||||
|
results = executeTest(name + "-nt-" + nt, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
protected Pair<List<File>, List<String>> executeTest(final String name, WalkerTestSpec spec) {
|
protected Pair<List<File>, List<String>> executeTest(final String name, WalkerTestSpec spec) {
|
||||||
ensureMd5DbDirectory(); // ensure the md5 directory exists
|
ensureMd5DbDirectory(); // ensure the md5 directory exists
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ public class
|
||||||
for (String tests : testsEnumerations) {
|
for (String tests : testsEnumerations) {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -o %s",
|
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -o %s",
|
||||||
1, Arrays.asList("158ac8e6d32eb2ea1bbeebfa512965de"));
|
1, Arrays.asList("158ac8e6d32eb2ea1bbeebfa512965de"));
|
||||||
executeTest("testSelect1", spec);
|
executeTestParallel("testSelect1", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -39,7 +39,7 @@ public class
|
||||||
String extraArgs = "-L 1:1-10,000,000";
|
String extraArgs = "-L 1:1-10,000,000";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec( withSelect(withSelect(root, "DP < 50", "DP50"), "set==\"Intersection\"", "intersection") + " " + extraArgs + " -o %s",
|
WalkerTestSpec spec = new WalkerTestSpec( withSelect(withSelect(root, "DP < 50", "DP50"), "set==\"Intersection\"", "intersection") + " " + extraArgs + " -o %s",
|
||||||
1, Arrays.asList("cee96f61ffa1d042fe0c63550c508ec9"));
|
1, Arrays.asList("cee96f61ffa1d042fe0c63550c508ec9"));
|
||||||
executeTest("testSelect2", spec);
|
executeTestParallel("testSelect2", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -49,7 +49,7 @@ public class
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B:eval,VCF " + validationDataLocation + vcfFile + " -B:comp,VCF " + validationDataLocation + "GenotypeConcordanceComp.vcf -noStandard -E GenotypeConcordance -reportType CSV -o %s",
|
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B:eval,VCF " + validationDataLocation + vcfFile + " -B:comp,VCF " + validationDataLocation + "GenotypeConcordanceComp.vcf -noStandard -E GenotypeConcordance -reportType CSV -o %s",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("7e9ce1b26cdeaa50705f5de163847638"));
|
Arrays.asList("7e9ce1b26cdeaa50705f5de163847638"));
|
||||||
executeTest("testVEGenotypeConcordance" + vcfFile, spec);
|
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -67,7 +67,7 @@ public class
|
||||||
WalkerTestSpec spec = new WalkerTestSpec( tests + " " + extraArgs + " -o %s",
|
WalkerTestSpec spec = new WalkerTestSpec( tests + " " + extraArgs + " -o %s",
|
||||||
1, // just one output file
|
1, // just one output file
|
||||||
Arrays.asList(md5));
|
Arrays.asList(md5));
|
||||||
executeTest("testVESimple", spec);
|
executeTestParallel("testVESimple", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -92,7 +92,7 @@ public class
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs1 + extraArgs2 + " -o %s",
|
WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs1 + extraArgs2 + " -o %s",
|
||||||
1, // just one output file
|
1, // just one output file
|
||||||
Arrays.asList(md5));
|
Arrays.asList(md5));
|
||||||
executeTest("testVEComplex", spec);
|
executeTestParallel("testVEComplex", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -109,17 +109,17 @@ public class
|
||||||
String md5 = "d41d8cd98f00b204e9800998ecf8427e";
|
String md5 = "d41d8cd98f00b204e9800998ecf8427e";
|
||||||
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(vecmd, 1, Arrays.asList(md5));
|
WalkerTestSpec spec = new WalkerTestSpec(vecmd, 1, Arrays.asList(md5));
|
||||||
executeTest("testVEGenomicallyAnnotated", spec);
|
executeTestParallel("testVEGenomicallyAnnotated", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testVEWriteVCF() {
|
public void testVEWriteVCF() {
|
||||||
String extraArgs = "-L 1:1-10,000,000 -NO_HEADER -family NA19238+NA19239=NA19240 -MVQ 30 -E MendelianViolationEvaluator";
|
String extraArgs = "-L 1:1-10,000,000 -NO_HEADER -family NA19238+NA19239=NA19240 -MVQ 30 -E MendelianViolationEvaluator";
|
||||||
for (String tests : testsEnumerations) {
|
for (String tests : testsEnumerations) {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s",
|
WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s -NO_HEADER",
|
||||||
2,
|
2,
|
||||||
Arrays.asList("6b97a019402b3984fead9a4e8b7c7c2a", "989bc30dea6c8a4cf771cd1b9fdab488"));
|
Arrays.asList("6b97a019402b3984fead9a4e8b7c7c2a", "989bc30dea6c8a4cf771cd1b9fdab488"));
|
||||||
executeTest("testVEWriteVCF", spec);
|
executeTestParallel("testVEWriteVCF", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -127,7 +127,7 @@ public class
|
||||||
public void testCompVsEvalAC() {
|
public void testCompVsEvalAC() {
|
||||||
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -E GenotypeConcordance -B:evalYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.fake.genotypes.ac.test.vcf -reportType CSV";
|
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -E GenotypeConcordance -B:evalYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.fake.genotypes.ac.test.vcf -reportType CSV";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("25a681855cb26e7380fbf1a93de0a41f"));
|
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("25a681855cb26e7380fbf1a93de0a41f"));
|
||||||
executeTest("testACDiscordanceAtAC1EvalAC2Comp",spec);
|
executeTestParallel("testACDiscordanceAtAC1EvalAC2Comp",spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String withSelect(String cmd, String select, String name) {
|
private static String withSelect(String cmd, String select, String name) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue