Merge pull request #219 from broadinstitute/eb_rr_multisample_fix

Fix bug in Reduce Reads that arises in multi-sample mode.
This commit is contained in:
Mark DePristo 2013-05-09 15:31:14 -07:00
commit 111e8cef0f
3 changed files with 39 additions and 2 deletions

View File

@ -64,6 +64,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -236,6 +237,15 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
@Argument(fullName = "downsample_coverage", shortName = "ds", doc = "", required = false)
public int downsampleCoverage = 250;
/**
* Generally, this tool is not meant to be run for more than 1 sample at a time. The one valid exception
* brought to our attention by colleagues is the specific case of tumor/normal pairs in cancer analysis.
* To prevent users from unintentionally running the tool in a less than ideal manner, we require them
* to explicitly enable multi-sample analysis with this argument.
*/
@Argument(fullName = "cancer_mode", shortName = "cancer_mode", doc = "enable multi-samples reduction for cancer analysis", required = false)
public boolean ALLOW_MULTIPLE_SAMPLES = false;
@Hidden
@Argument(fullName = "nwayout", shortName = "nw", doc = "", required = false)
public boolean nwayout = false;
@ -294,6 +304,9 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
if ( minAltProportionToTriggerVariant < 0.0 || minAltProportionToTriggerVariant > 1.0 )
throw new UserException.BadArgumentValue("--minimum_alt_proportion_to_trigger_variant", "must be a value between 0 and 1 (inclusive)");
if ( SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()).size() > 1 && !ALLOW_MULTIPLE_SAMPLES )
throw new UserException.BadInput("Reduce Reads is not meant to be run for more than 1 sample at a time except for the specific case of tumor/normal pairs in cancer analysis");
if ( known.isEmpty() )
knownSnpPositions = null;
else

View File

@ -877,6 +877,10 @@ public class SlidingWindow {
final int start = region.getStart() - windowHeaderStart;
int stop = region.getStop() - windowHeaderStart;
// make sure the bitset is complete given the region (it might not be in multi-sample mode)
if ( region.getStop() > markedSites.getStartLocation() + markedSites.getVariantSiteBitSet().length )
markSites(region.getStop());
CloseVariantRegionResult closeVariantRegionResult = closeVariantRegion(start, stop, knownSnpPositions);
allReads.addAll(closeVariantRegionResult.reads);

View File

@ -53,6 +53,7 @@ import org.testng.annotations.Test;
import java.io.File;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
public class ReduceReadsIntegrationTest extends WalkerTest {
@ -221,13 +222,13 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testCoReduction() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5f4d2c1d9c010dfd6865aeba7d0336fe")), COREDUCTION_QUALS_TEST_MD5);
}
@Test(enabled = true)
public void testCoReductionWithKnowns() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("ca48dd972bf57595c691972c0f887cb4")), COREDUCTION_QUALS_TEST_MD5);
}
@ -281,5 +282,24 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
" -o %s --downsample_coverage 250 -dcov 50 ";
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7e7b358443827ca239db3b98f299aec6")), "2af063d1bd3c322b03405dbb3ecf59a9");
}
/**
* Confirm that this bam does not fail when multi-sample mode is enabled. The provided example is tricky and used to cause
* us to exception out in the code.
*/
@Test(enabled = true)
public void testMultiSampleDoesNotFailWithFlag() {
String cmd = "-T ReduceReads --cancer_mode -npt -R " + b37KGReference + " -I " + privateTestDir + "rr_multisample.bam -o /dev/null";
executeTestWithoutAdditionalRRTests("testMultiSampleDoesNotFailWithFlag", new WalkerTestSpec(cmd, 0, Collections.<String>emptyList()));
}
/**
* Confirm that this bam fails when multi-sample mode is not enabled
*/
@Test(enabled = true)
public void testMultiSampleFailsWithoutFlag() {
String cmd = "-T ReduceReads -npt -R " + b37KGReference + " -I " + privateTestDir + "rr_multisample.bam -o /dev/null";
executeTestWithoutAdditionalRRTests("testMultiSampleDoesNotFailWithFlag", new WalkerTestSpec(cmd, 0, UserException.BadInput.class));
}
}