Merge pull request #219 from broadinstitute/eb_rr_multisample_fix
Fix bug in Reduce Reads that arises in multi-sample mode.
This commit is contained in:
commit
111e8cef0f
|
|
@ -64,6 +64,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -236,6 +237,15 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
|
|||
@Argument(fullName = "downsample_coverage", shortName = "ds", doc = "", required = false)
|
||||
public int downsampleCoverage = 250;
|
||||
|
||||
/**
|
||||
* Generally, this tool is not meant to be run for more than 1 sample at a time. The one valid exception
|
||||
* brought to our attention by colleagues is the specific case of tumor/normal pairs in cancer analysis.
|
||||
* To prevent users from unintentionally running the tool in a less than ideal manner, we require them
|
||||
* to explicitly enable multi-sample analysis with this argument.
|
||||
*/
|
||||
@Argument(fullName = "cancer_mode", shortName = "cancer_mode", doc = "enable multi-samples reduction for cancer analysis", required = false)
|
||||
public boolean ALLOW_MULTIPLE_SAMPLES = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "nwayout", shortName = "nw", doc = "", required = false)
|
||||
public boolean nwayout = false;
|
||||
|
|
@ -294,6 +304,9 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
|
|||
if ( minAltProportionToTriggerVariant < 0.0 || minAltProportionToTriggerVariant > 1.0 )
|
||||
throw new UserException.BadArgumentValue("--minimum_alt_proportion_to_trigger_variant", "must be a value between 0 and 1 (inclusive)");
|
||||
|
||||
if ( SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()).size() > 1 && !ALLOW_MULTIPLE_SAMPLES )
|
||||
throw new UserException.BadInput("Reduce Reads is not meant to be run for more than 1 sample at a time except for the specific case of tumor/normal pairs in cancer analysis");
|
||||
|
||||
if ( known.isEmpty() )
|
||||
knownSnpPositions = null;
|
||||
else
|
||||
|
|
|
|||
|
|
@ -877,6 +877,10 @@ public class SlidingWindow {
|
|||
final int start = region.getStart() - windowHeaderStart;
|
||||
int stop = region.getStop() - windowHeaderStart;
|
||||
|
||||
// make sure the bitset is complete given the region (it might not be in multi-sample mode)
|
||||
if ( region.getStop() > markedSites.getStartLocation() + markedSites.getVariantSiteBitSet().length )
|
||||
markSites(region.getStop());
|
||||
|
||||
CloseVariantRegionResult closeVariantRegionResult = closeVariantRegion(start, stop, knownSnpPositions);
|
||||
allReads.addAll(closeVariantRegionResult.reads);
|
||||
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ import org.testng.annotations.Test;
|
|||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||
|
|
@ -221,13 +222,13 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testCoReduction() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
|
||||
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
|
||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5f4d2c1d9c010dfd6865aeba7d0336fe")), COREDUCTION_QUALS_TEST_MD5);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testCoReductionWithKnowns() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
|
||||
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
|
||||
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("ca48dd972bf57595c691972c0f887cb4")), COREDUCTION_QUALS_TEST_MD5);
|
||||
}
|
||||
|
||||
|
|
@ -281,5 +282,24 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
" -o %s --downsample_coverage 250 -dcov 50 ";
|
||||
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7e7b358443827ca239db3b98f299aec6")), "2af063d1bd3c322b03405dbb3ecf59a9");
|
||||
}
|
||||
|
||||
/**
|
||||
* Confirm that this bam does not fail when multi-sample mode is enabled. The provided example is tricky and used to cause
|
||||
* us to exception out in the code.
|
||||
*/
|
||||
@Test(enabled = true)
|
||||
public void testMultiSampleDoesNotFailWithFlag() {
|
||||
String cmd = "-T ReduceReads --cancer_mode -npt -R " + b37KGReference + " -I " + privateTestDir + "rr_multisample.bam -o /dev/null";
|
||||
executeTestWithoutAdditionalRRTests("testMultiSampleDoesNotFailWithFlag", new WalkerTestSpec(cmd, 0, Collections.<String>emptyList()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Confirm that this bam fails when multi-sample mode is not enabled
|
||||
*/
|
||||
@Test(enabled = true)
|
||||
public void testMultiSampleFailsWithoutFlag() {
|
||||
String cmd = "-T ReduceReads -npt -R " + b37KGReference + " -I " + privateTestDir + "rr_multisample.bam -o /dev/null";
|
||||
executeTestWithoutAdditionalRRTests("testMultiSampleDoesNotFailWithFlag", new WalkerTestSpec(cmd, 0, UserException.BadInput.class));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue