Make sure to fail in all cases where the BAM being used was created by ReduceReads.

In some cases, the program records were being removed from the BAM headers by the GATK engine
before we applied the check for reduced reads (so we did not fail appropriately).  Pushed up the
check to happen before the PG tags are modified and added a unit test to ensure it stays that way.
It turns out that some UG tests still used reduced bams so I switched to use different ones.

Based on reviewer feedback, made it more generic so that it's easy to add new unsupported tools.
This commit is contained in:
Eric Banks 2014-04-02 21:08:06 -04:00
parent 890f4e8873
commit a3d55b3341
3 changed files with 44 additions and 9 deletions

View File

@ -168,14 +168,15 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation
+ "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030";
final static String assessMinIndelFraction = "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R "
+ b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -I "
+ validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,500,000 -o %s";
@Test
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
Arrays.asList("af0b881d0a931f0789706f0289b72a64"));
Arrays.asList("0d9a3129f680c4a4f41b08154fd836a4"));
executeTest("test minIndelFraction 0.0", spec);
}
@ -183,7 +184,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
Arrays.asList("aa97a7941a861d57a3b746b3f6301eb6"));
Arrays.asList("e910304c25a277b63d8fa8167d4a8b88"));
executeTest("test minIndelFraction 0.25", spec);
}
@ -191,7 +192,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
public void testMinIndelFraction100() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 1", 1,
Arrays.asList("3f07efb768e08650a7ce333edd4f9a52"));
Arrays.asList("83e72fd13291cec00fa5a468a2656c94"));
executeTest("test minIndelFraction 1.0", spec);
}

View File

@ -154,6 +154,15 @@ public class SAMDataSource {
*/
private final ThreadAllocation threadAllocation;
/**
* Static set of unsupported programs that create bam files.
* The key is the PG record ID and the value is the name of the tool that created it
*/
private static Map<String, String> unsupportedPGs = new HashMap<>();
static {
unsupportedPGs.put("GATK ReduceReads", "ReduceReads");
}
/**
* Create a new SAM data source given the supplied read metadata.
*
@ -324,7 +333,6 @@ public class SAMDataSource {
// and read group id (merged) -> read group id (original) mappings.
for(SAMReaderID id: readerIDs) {
SAMFileReader reader = readers.getReader(id);
checkForReducedBamFile(reader.getFileHeader());
ReadGroupMapping mappingToMerged = new ReadGroupMapping();
@ -356,9 +364,11 @@ public class SAMDataSource {
* @param header the SAM header for a given file
* @throws UserException if the header is from a reduced bam
*/
private void checkForReducedBamFile(final SAMFileHeader header) {
if ( header.getProgramRecord("GATK ReduceReads") != null )
throw new UserException("The GATK no longer supports running off of BAMs produced by ReduceReads");
private void checkForUnsupportedBamFile(final SAMFileHeader header) {
for ( final SAMProgramRecord PGrecord : header.getProgramRecords() ) {
if ( unsupportedPGs.containsKey(PGrecord.getId()) )
throw new UserException("The GATK no longer supports running off of BAMs produced by " + unsupportedPGs.get(PGrecord.getId()));
}
}
public void close() {
@ -837,6 +847,8 @@ public class SAMDataSource {
for(final SAMReaderID readerID: readerIDs) {
final ReaderInitializer init = new ReaderInitializer(readerID).call();
checkForUnsupportedBamFile(init.reader.getFileHeader());
if (removeProgramRecords) {
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
}

View File

@ -227,4 +227,26 @@ public class SAMDataSourceUnitTest extends BaseTest {
new ArrayList<ReadFilter>(),
false);
}
@Test(expectedExceptions = UserException.class)
public void testFailOnReducedReadsRemovingProgramRecords() {
readers.add(new SAMReaderID(new File(privateTestDir + "old.reduced.bam"), new Tags()));
SAMDataSource data = new SAMDataSource(readers,
new ThreadAllocation(),
null,
genomeLocParser,
false,
SAMFileReader.ValidationStringency.SILENT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
Collections.<ReadTransformer>emptyList(),
false,
(byte) -1,
true,
false,
null);
}
}