Bug fix in variant eval 2. Preliminary (slow and buggy) support for -XL exclude lists.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2991 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1eb5f97255
commit
b39b5edca8
|
|
@ -158,10 +158,21 @@ public class GenomeAnalysisEngine {
|
|||
// create the output streams
|
||||
initializeOutputStreams(my_walker, microScheduler.getOutputTracker());
|
||||
|
||||
// todo -- call createSetFromList for -XL argument, and unify this with intervals, if provided
|
||||
GenomeLocSortedSet excludeIntervals = null;
|
||||
if (argCollection.excludeIntervals != null && argCollection.intervalMerging.check()) {
|
||||
excludeIntervals = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.excludeIntervals, IntervalMergingRule.ALL));
|
||||
}
|
||||
|
||||
if (argCollection.intervals != null && argCollection.intervalMerging.check()) {
|
||||
intervals = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals));
|
||||
}
|
||||
|
||||
if ( excludeIntervals != null ) {
|
||||
GenomeLocSortedSet toPrune = intervals == null ? GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getSequenceDictionary()) : intervals;
|
||||
intervals = pruneIntervals( toPrune, excludeIntervals );
|
||||
}
|
||||
|
||||
ShardStrategy shardStrategy = getShardStrategy(my_walker,
|
||||
microScheduler.getReference(),
|
||||
intervals,
|
||||
|
|
@ -294,6 +305,15 @@ public class GenomeAnalysisEngine {
|
|||
return microScheduler;
|
||||
}
|
||||
|
||||
private GenomeLocSortedSet pruneIntervals( GenomeLocSortedSet toPrune, GenomeLocSortedSet toExclude) {
|
||||
logger.info(String.format("pruning intervals from %d against %d", toPrune.size(), toExclude.size()));
|
||||
for ( GenomeLoc exclude : toExclude )
|
||||
toPrune.removeRegion(exclude);
|
||||
logger.info(String.format("done pruning intervals == now have %d", toPrune.size()));
|
||||
|
||||
return toPrune;
|
||||
}
|
||||
|
||||
/**
|
||||
* setup the interval regions, from either the interval file of the genome region string
|
||||
*
|
||||
|
|
|
|||
|
|
@ -64,6 +64,10 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
|
||||
public List<String> intervals = null;
|
||||
|
||||
@ElementList(required = false)
|
||||
@Argument(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false)
|
||||
public List<String> excludeIntervals = null;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
|
||||
public File referenceFile = null;
|
||||
|
|
@ -261,6 +265,9 @@ public class GATKArgumentCollection {
|
|||
if (!other.intervals.equals(this.intervals)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.excludeIntervals.equals(this.excludeIntervals)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.DBSNPFile.equals(this.DBSNPFile)) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -490,7 +490,7 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
|
||||
VariantContext vc = contexts.size() == 1 ? contexts.iterator().next() : null;
|
||||
|
||||
if ( vc != null && vc.hasGenotypes(SAMPLES_LIST) ) {
|
||||
if ( vc != null && vc.hasGenotypes(SAMPLES_LIST) && SAMPLES_LIST.size() > 0 ) {
|
||||
//if ( ! name.equals("eval") ) logger.info(String.format("subsetting VC %s", vc));
|
||||
vc = vc.subContextFromGenotypes(vc.getGenotypes(SAMPLES_LIST).values());
|
||||
//if ( ! name.equals("eval") ) logger.info(String.format(" => VC %s", vc));
|
||||
|
|
|
|||
|
|
@ -91,6 +91,7 @@ public class GATKArgumentCollectionTest extends BaseTest {
|
|||
collect.downsampleCoverage = null;
|
||||
collect.intervals = new ArrayList<String>();
|
||||
collect.intervals.add("intervals".toLowerCase());
|
||||
collect.excludeIntervals = new ArrayList<String>();
|
||||
collect.disableThreading = false;
|
||||
collect.outFileName = "outFileName".toLowerCase();
|
||||
collect.errFileName = "errFileName".toLowerCase();
|
||||
|
|
|
|||
|
|
@ -20,8 +20,8 @@ public class VariantEval2IntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testVE2Simple() {
|
||||
HashMap<String, String> expectations = new HashMap<String, String>();
|
||||
expectations.put("-L 1:1-10,000,000", "32b2e9758078b66e6d50d140acb37947");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "5ee420ebf7c2d3c2e3827c0114a6706d");
|
||||
expectations.put("-L 1:1-10,000,000", "8f6d7d4ded62c4558b4c72053ca2f3d5");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "3adaab00a5475504ede7bb13b2c8736f");
|
||||
|
||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||
String extraArgs = entry.getKey();
|
||||
|
|
@ -41,10 +41,10 @@ public class VariantEval2IntegrationTest extends WalkerTest {
|
|||
" -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" +
|
||||
" -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
|
||||
|
||||
String eqMD5s = "ba021a4c963200191710a220a5577753"; // next two examples should be the same!
|
||||
String eqMD5s = "5b51146d4282a236f2b6b73fe585a305"; // next two examples should be the same!
|
||||
expectations.put("", eqMD5s);
|
||||
expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s);
|
||||
expectations.put(" -known comp_hapmap", "5ce16165f4242d77b4e82c704273c11d");
|
||||
expectations.put(" -known comp_hapmap", "bcb832f75afd63e2f66bc5490f89cac3");
|
||||
|
||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||
String extraArgs2 = entry.getKey();
|
||||
|
|
@ -62,7 +62,7 @@ public class VariantEval2IntegrationTest extends WalkerTest {
|
|||
String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";
|
||||
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s",
|
||||
2,
|
||||
Arrays.asList("0b29285da3ca778b9c8b7f62e99aa72d", "d41d8cd98f00b204e9800998ecf8427e"));
|
||||
Arrays.asList("2c9e03fe3d1c9aa32fbdbf74a5758e85", "a3ce1d70d8ae3874807e9d61994d42af"));
|
||||
executeTest("testVE2WriteVCF", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue