From 3de98263f545b8e69265d36b7b896fa7cb474c24 Mon Sep 17 00:00:00 2001 From: Ron Levine Date: Tue, 30 May 2017 15:51:57 -0400 Subject: [PATCH] Copy Program Records (PG) from input BAM --- .../haplotypeBAMWriter/ReadDestination.java | 1 + .../cancer/m2/MuTect2IntegrationTest.java | 2 +- .../HaplotypeCallerIntegrationTest.java | 38 +++++++++++++-- .../gatk/engine/GenomeAnalysisEngine.java | 5 +- .../engine/walkers/ActiveRegionWalker.java | 1 - .../gatk/engine/walkers/LocusWalker.java | 1 - .../engine/walkers/RemoveProgramRecords.java | 46 ------------------- 7 files changed, 36 insertions(+), 58 deletions(-) delete mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/RemoveProgramRecords.java diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/haplotypeBAMWriter/ReadDestination.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/haplotypeBAMWriter/ReadDestination.java index 9eab1e239..000224472 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/haplotypeBAMWriter/ReadDestination.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/haplotypeBAMWriter/ReadDestination.java @@ -83,6 +83,7 @@ public abstract class ReadDestination { bamHeader = new SAMFileHeader(); bamHeader.setSequenceDictionary(header.getSequenceDictionary()); bamHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate); + bamHeader.setProgramRecords(header.getProgramRecords()); // include the original read groups plus an artificial one for haplotypes final List readGroups = new ArrayList<>(header.getReadGroups()); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java index 1c99946db..feedfeb35 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java @@ -138,7 +138,7 @@ public class MuTect2IntegrationTest extends WalkerTest { public void testTruePositivesDream3TrackedDropped() { M2TestWithDroppedReads(DREAM3_TUMOR_BAM, DREAM3_NORMAL_BAM, "21:10935369", "", "48a446d47bb10434cb7f0ee726d15721", - "b536e76870326b4be01b8d6b83c1cf1c"); + "6ecaeb74893249dfa5723b2266c957e2"); } /** diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index d2d35c5d8..f7dde74c5 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -51,6 +51,10 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller; +import htsjdk.samtools.SAMProgramRecord; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; +import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.reference.IndexedFastaSequenceFile; import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.tribble.AbstractFeatureReader; @@ -64,6 +68,7 @@ import org.apache.commons.io.FileUtils; import org.broadinstitute.gatk.engine.walkers.WalkerTest; import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLocParser; +import org.broadinstitute.gatk.utils.MD5DB; import org.broadinstitute.gatk.utils.collections.Pair; import org.broadinstitute.gatk.engine.GATKVCFUtils; import org.broadinstitute.gatk.utils.exceptions.UserException; @@ -107,9 +112,30 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { executeTest("testHaplotypeCallerBamout", spec); } + /** + * Check that the bamout program records (@PG) contain all of the program records forwarded from the input BAMs + */ + private void validateForwardedProgramRecords(final List bamInFiles, final String bamOutMd5) throws FileNotFoundException { + final List bamInProgramRecords = new ArrayList<>(); + for (final File file : bamInFiles) { + final SamReader bamInReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(file); + bamInProgramRecords.addAll(bamInReader.getFileHeader().getProgramRecords()); + } + final String bamOutFilePath = new MD5DB().getMD5FilePath(bamOutMd5, null); + if (bamOutFilePath == null) { + throw new FileNotFoundException("Could not find " + bamOutMd5 + " in the MD5 DB"); + } + final SamReader bamOutReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT). + open(new File(bamOutFilePath)); + final List bamOutProgramRecords = bamOutReader.getFileHeader().getProgramRecords(); + Assert.assertTrue(bamOutProgramRecords.containsAll(bamInProgramRecords)); + } + @Test public void testHaplotypeBAMOutFlags() throws IOException { - HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "6588123afd06ff6acc9f10ea25250f54", "9d6bd79cdae3e3222fa93f542fbca153"); + final String md5BAMOut = "69aae17f8cd384666ec7c3c1f3d3eb57"; + HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "6588123afd06ff6acc9f10ea25250f54", md5BAMOut); + validateForwardedProgramRecords(new ArrayList<>(Arrays.asList(new File(NA12878_BAM))), md5BAMOut); } @Test @@ -301,14 +327,15 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { private static final String LEFT_ALIGNMENT_BAMOUT_TEST_OUTPUT = privateTestDir + "/bamout-indel-left-align-bugfix-expected-output.bam"; @Test - public void testLeftAlignmentBamOutBugFix() { + public void testLeftAlignmentBamOutBugFix() throws FileNotFoundException { + final String md5BAMOut = "27e729df3b166c81792a62a5b57ef7b3"; final String base = String.format("-T HaplotypeCaller -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, LEFT_ALIGNMENT_BAMOUT_TEST_INPUT) + " --no_cmdline_in_header -bamout %s -o /dev/null -L 1:11740000-11740700 --allowNonUniqueKmersInRef"; - final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("01deba68f7a7d562b0e466f6858d42e3")); + final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(md5BAMOut)); executeTest("LeftAlignmentBamOutBugFix", spec); + validateForwardedProgramRecords(new ArrayList<>(Arrays.asList(new File(LEFT_ALIGNMENT_BAMOUT_TEST_INPUT))), md5BAMOut); } - // -------------------------------------------------------------------------------------------------------------- // // test dbSNP annotation @@ -513,11 +540,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testHaplotypeCallerReadPosRankSum() throws IOException { final File testBAM = new File(privateTestDir + "testReadPos.snippet.bam"); final String md5Variants = "03b3c464f22a3572f7d66890c18bdda4"; - final String md5BAMOut = "2e0843f6e8e90c407825e9c47ce4a32d"; + final String md5BAMOut = "3ef35732e49980093ad445e3ac5731fa"; final String base = String.format("-T HaplotypeCaller -R %s -I %s -L 1:3753063 -ip 100 ", REF, testBAM) + " --no_cmdline_in_header -o %s -bamout %s"; final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList(md5Variants, md5BAMOut)); executeTest("testHaplotypeCallerReadPosRankSum", spec); + validateForwardedProgramRecords(new ArrayList(Arrays.asList(testBAM)), md5BAMOut); } @Test diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java index 6a8479645..57570ed87 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java @@ -944,10 +944,7 @@ public class GenomeAnalysisEngine { if (argCollection.removeProgramRecords && argCollection.keepProgramRecords) throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options"); - boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class); - - if (argCollection.keepProgramRecords) - removeProgramRecords = false; + final boolean removeProgramRecords = argCollection.keepProgramRecords ? false : argCollection.removeProgramRecords; final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker; diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionWalker.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionWalker.java index 21af352f4..053c482bb 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionWalker.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/ActiveRegionWalker.java @@ -60,7 +60,6 @@ import java.util.*; @ActiveRegionTraversalParameters(extension=50,maxRegion=1500) @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) @Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000) -@RemoveProgramRecords public abstract class ActiveRegionWalker extends Walker { /** * If provided, this walker will write out its activity profile (per bp probabilities of being active) diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/LocusWalker.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/LocusWalker.java index 90ff64f33..d1bace4b5 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/LocusWalker.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/LocusWalker.java @@ -46,7 +46,6 @@ import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; @PartitionBy(PartitionType.LOCUS) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) @Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000) -@RemoveProgramRecords public abstract class LocusWalker extends Walker { // Do we actually want to operate on the context? public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/RemoveProgramRecords.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/RemoveProgramRecords.java deleted file mode 100644 index af7df6363..000000000 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/walkers/RemoveProgramRecords.java +++ /dev/null @@ -1,46 +0,0 @@ -/* -* Copyright 2012-2016 Broad Institute, Inc. -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.gatk.engine.walkers; - -/** - * Created with IntelliJ IDEA. - * User: thibault - * Date: 8/2/12 - * Time: 1:58 PM - * To change this template use File | Settings | File Templates. - */ - -import java.lang.annotation.*; - -/** - * Indicates that program records should be removed from SAM headers by default for this walker - */ -@Documented -@Inherited -@Retention(RetentionPolicy.RUNTIME) -@Target(ElementType.TYPE) -public @interface RemoveProgramRecords { -}