diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 354e508c2..e54af01dd 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -214,6 +214,7 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche } initializeRecalibrationEngine(); + RecalUtils.checkForInvalidRecalBams(getToolkit().getSAMFileHeaders(), getToolkit().getArguments().ALLOW_BQSR_ON_REDUCED_BAMS); minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN; referenceReader = getToolkit().getReferenceDataSource().getReference(); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index f2e04c013..cd3255a78 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -242,11 +242,10 @@ public class ReduceReads extends ReadWalker, ReduceRea HashMap readNameHash; // This hash will keep the name of the original read the new compressed name (a number). Long nextReadNumber = 1L; // The next number to use for the compressed read name. - CompressionStash compressionStash = new CompressionStash(); - SortedSet intervalList; - - private static final String PROGRAM_RECORD_NAME = "GATK ReduceReads"; // The name that will go in the @PG tag + + // IMPORTANT: DO NOT CHANGE THE VALUE OF THIS CONSTANT VARIABLE; IT IS NOW PERMANENTLY THE @PG NAME THAT EXTERNAL TOOLS LOOK FOR IN THE BAM HEADER + public static final String PROGRAM_RECORD_NAME = "GATK ReduceReads"; // The name that will go in the @PG tag private static final String PROGRAM_FILENAME_EXTENSION = ".reduced.bam"; /** diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java index c85072fa2..113ea2222 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java @@ -66,6 +66,13 @@ public class BQSRReadTransformer extends ReadTransformer { public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) { this.enabled = engine.hasBQSRArgumentSet(); if ( enabled ) { + // TODO -- See important note below about applying BQSR to a reduced BAM file: + // If it is important to make sure that BQSR is not applied (as opposed to having the covariates computed) against a reduced bam file, + // we need to figure out how to make this work. The problem is that the ReadTransformers are initialized before the ReadDataSource + // inside the GenomeAnalysisEngine, so we generate a NPE when trying to retrieve the SAMFileHeaders. Ultimately, I don't think this is + // a necessary check anyways since we disallow running BaseRecalibrator on reduced bams (so we can't generate the recal tables to use here). + // Although we could add this check to the apply() method below, it's kind of ugly and inefficient. + // The call here would be: RecalUtils.checkForInvalidRecalBams(engine.getSAMFileHeaders(), engine.getArguments().ALLOW_BQSR_ON_REDUCED_BAMS); final BQSRArgumentSet args = engine.getBQSRArgumentSet(); this.bqsr = new BaseRecalibration(args.getRecalFile(), args.getQuantizationLevels(), args.shouldDisableIndelQuals(), args.getPreserveQscoresLessThan(), args.shouldEmitOriginalQuals(), args.getGlobalQScorePrior()); } diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index f7c3440e4..6d98803c9 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -46,10 +46,12 @@ package org.broadinstitute.sting.utils.recalibration; +import net.sf.samtools.SAMFileHeader; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.gatk.walkers.compression.reducereads.ReduceReads; import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.recalibration.covariates.*; import org.broadinstitute.sting.utils.BaseUtils; @@ -847,7 +849,6 @@ public class RecalUtils { } } - /** * creates a datum object with one observation and one or zero error * @@ -858,4 +859,20 @@ public class RecalUtils { private static RecalDatum createDatumObject(final byte reportedQual, final double isError) { return new RecalDatum(1, isError, reportedQual); } + + /** + * Checks for invalid BAMs that are being used with BQSR and fails with a UserException if it finds one + * + * @param headers sam file headers being passed into the GATK engine + * @param allowBqsrOnReducedBams should we allow BQSR on reduced bams? + */ + public static void checkForInvalidRecalBams(final List headers, final boolean allowBqsrOnReducedBams) { + // for now, the only check we make is against reduced bams + if ( !allowBqsrOnReducedBams ) { + for ( final SAMFileHeader header : headers ) { + if ( header.getProgramRecord(ReduceReads.PROGRAM_RECORD_NAME) != null ) + throw new UserException.BadInput("base quality score recalibration should absolutely not be run on reduced BAM files! Please run ReduceReads only after BQSR has been performed"); + } + } + } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index 825bb8f51..577569e4e 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -170,6 +170,19 @@ public class BQSRIntegrationTest extends WalkerTest { executeTest("testBQSRFailWithSolidNoCall", spec); } + @Test + public void testBQSRFailWithReducedBam() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + " -T BaseRecalibrator" + + " -R " + b37KGReference + + " -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam" + + " -L 1:67,225,396-67,288,518" + + " -o /dev/null", + 0, + UserException.class); + executeTest("testBQSRFailWithReducedBam", spec); + } + private static class PRTest { final String args; final String md5; @@ -241,5 +254,4 @@ public class BQSRIntegrationTest extends WalkerTest { UserException.class); executeTest("testPRFailWithLowMaxCycle", spec); } - } diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index c9f48dc01..070898654 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -884,10 +884,10 @@ public class GenomeAnalysisEngine { /** * Returns the unmerged SAM file header for an individual reader. * @param reader The reader. - * @return Header for that reader. + * @return Header for that reader or null if not available. */ public SAMFileHeader getSAMFileHeader(SAMReaderID reader) { - return readsDataSource.getHeader(reader); + return readsDataSource == null ? null : readsDataSource.getHeader(reader); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index bcf3e7044..a3e19b944 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -176,7 +176,7 @@ public class GATKArgumentCollection { @Argument(fullName = "fix_misencoded_quality_scores", shortName="fixMisencodedQuals", doc="Fix mis-encoded base quality scores", required = false) public boolean FIX_MISENCODED_QUALS = false; - @Argument(fullName = "allow_potentially_misencoded_quality_scores", shortName="allowPotentiallyMisencodedQuals", doc="Do not fail when encountered base qualities that are too high and seemingly indicate a problem with the base quality encoding of the BAM file", required = false) + @Argument(fullName = "allow_potentially_misencoded_quality_scores", shortName="allowPotentiallyMisencodedQuals", doc="Do not fail when encountering base qualities that are too high and that seemingly indicate a problem with the base quality encoding of the BAM file", required = false) public boolean ALLOW_POTENTIALLY_MISENCODED_QUALS = false; // -------------------------------------------------------------------------------------------------------------- @@ -245,6 +245,14 @@ public class GATKArgumentCollection { @Argument(fullName = "globalQScorePrior", shortName = "globalQScorePrior", doc = "The global Qscore Bayesian prior to use in the BQSR. If specified, this value will be used as the prior for all mismatch quality scores instead of the actual reported quality score", required = false) public double globalQScorePrior = -1.0; + /** + * For the sake of your data, please only use this option if you know what you are doing. It is absolutely not recommended practice + * to run base quality score recalibration on reduced BAM files. + */ + @Advanced + @Argument(fullName = "allow_bqsr_on_reduced_bams_despite_repeated_warnings", shortName="allowBqsrOnReducedBams", doc="Do not fail when running base quality score recalibration on a reduced BAM file even though we highly recommend against it", required = false) + public boolean ALLOW_BQSR_ON_REDUCED_BAMS = false; + // -------------------------------------------------------------------------------------------------------------- // // Other utility arguments