From a3d55b334170e6129ee7f36b5803b858810c8f39 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 2 Apr 2014 21:08:06 -0400 Subject: [PATCH] Make sure to fail in all cases where the BAM being used was created by ReduceReads. In some cases, the program records were being removed from the BAM headers by the GATK engine before we applied the check for reduced reads (so we did not fail appropriately). Pushed up the check to happen before the PG tags are modified and added a unit test to ensure it stays that way. It turns out that some UG tests still used reduced bams so I switched to use different ones. Based on reviewer feedback, made it more generic so that it's easy to add new unsupported tools. --- ...dGenotyperIndelCallingIntegrationTest.java | 11 +++++----- .../gatk/datasources/reads/SAMDataSource.java | 20 +++++++++++++---- .../reads/SAMDataSourceUnitTest.java | 22 +++++++++++++++++++ 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index 8b8c82ea6..3aee7a5b8 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -168,14 +168,15 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation - + "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030"; + final static String assessMinIndelFraction = "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -I " + + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,500,000 -o %s"; @Test public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("af0b881d0a931f0789706f0289b72a64")); + Arrays.asList("0d9a3129f680c4a4f41b08154fd836a4")); executeTest("test minIndelFraction 0.0", spec); } @@ -183,7 +184,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("aa97a7941a861d57a3b746b3f6301eb6")); + Arrays.asList("e910304c25a277b63d8fa8167d4a8b88")); executeTest("test minIndelFraction 0.25", spec); } @@ -191,7 +192,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction100() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 1", 1, - Arrays.asList("3f07efb768e08650a7ce333edd4f9a52")); + Arrays.asList("83e72fd13291cec00fa5a468a2656c94")); executeTest("test minIndelFraction 1.0", spec); } diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 4f680ffc3..3f49d4759 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -154,6 +154,15 @@ public class SAMDataSource { */ private final ThreadAllocation threadAllocation; + /** + * Static set of unsupported programs that create bam files. + * The key is the PG record ID and the value is the name of the tool that created it + */ + private static Map unsupportedPGs = new HashMap<>(); + static { + unsupportedPGs.put("GATK ReduceReads", "ReduceReads"); + } + /** * Create a new SAM data source given the supplied read metadata. * @@ -324,7 +333,6 @@ public class SAMDataSource { // and read group id (merged) -> read group id (original) mappings. for(SAMReaderID id: readerIDs) { SAMFileReader reader = readers.getReader(id); - checkForReducedBamFile(reader.getFileHeader()); ReadGroupMapping mappingToMerged = new ReadGroupMapping(); @@ -356,9 +364,11 @@ public class SAMDataSource { * @param header the SAM header for a given file * @throws UserException if the header is from a reduced bam */ - private void checkForReducedBamFile(final SAMFileHeader header) { - if ( header.getProgramRecord("GATK ReduceReads") != null ) - throw new UserException("The GATK no longer supports running off of BAMs produced by ReduceReads"); + private void checkForUnsupportedBamFile(final SAMFileHeader header) { + for ( final SAMProgramRecord PGrecord : header.getProgramRecords() ) { + if ( unsupportedPGs.containsKey(PGrecord.getId()) ) + throw new UserException("The GATK no longer supports running off of BAMs produced by " + unsupportedPGs.get(PGrecord.getId())); + } } public void close() { @@ -837,6 +847,8 @@ public class SAMDataSource { for(final SAMReaderID readerID: readerIDs) { final ReaderInitializer init = new ReaderInitializer(readerID).call(); + checkForUnsupportedBamFile(init.reader.getFileHeader()); + if (removeProgramRecords) { init.reader.getFileHeader().setProgramRecords(new ArrayList()); } diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java index 0c53de307..280e48679 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java @@ -227,4 +227,26 @@ public class SAMDataSourceUnitTest extends BaseTest { new ArrayList(), false); } + + @Test(expectedExceptions = UserException.class) + public void testFailOnReducedReadsRemovingProgramRecords() { + readers.add(new SAMReaderID(new File(privateTestDir + "old.reduced.bam"), new Tags())); + + SAMDataSource data = new SAMDataSource(readers, + new ThreadAllocation(), + null, + genomeLocParser, + false, + SAMFileReader.ValidationStringency.SILENT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + Collections.emptyList(), + false, + (byte) -1, + true, + false, + null); + } }