Copy Program Records (PG) from input BAM

This commit is contained in:
Ron Levine 2017-05-30 15:51:57 -04:00
parent 1d6756fbd0
commit 3de98263f5
7 changed files with 36 additions and 58 deletions

View File

@ -83,6 +83,7 @@ public abstract class ReadDestination {
bamHeader = new SAMFileHeader();
bamHeader.setSequenceDictionary(header.getSequenceDictionary());
bamHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
bamHeader.setProgramRecords(header.getProgramRecords());
// include the original read groups plus an artificial one for haplotypes
final List<SAMReadGroupRecord> readGroups = new ArrayList<>(header.getReadGroups());

View File

@ -138,7 +138,7 @@ public class MuTect2IntegrationTest extends WalkerTest {
public void testTruePositivesDream3TrackedDropped() {
M2TestWithDroppedReads(DREAM3_TUMOR_BAM, DREAM3_NORMAL_BAM, "21:10935369", "",
"48a446d47bb10434cb7f0ee726d15721",
"b536e76870326b4be01b8d6b83c1cf1c");
"6ecaeb74893249dfa5723b2266c957e2");
}
/**

View File

@ -51,6 +51,10 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import htsjdk.samtools.SAMProgramRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.tribble.AbstractFeatureReader;
@ -64,6 +68,7 @@ import org.apache.commons.io.FileUtils;
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.MD5DB;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.engine.GATKVCFUtils;
import org.broadinstitute.gatk.utils.exceptions.UserException;
@ -107,9 +112,30 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
executeTest("testHaplotypeCallerBamout", spec);
}
/**
* Check that the bamout program records (@PG) contain all of the program records forwarded from the input BAMs
*/
private void validateForwardedProgramRecords(final List<File> bamInFiles, final String bamOutMd5) throws FileNotFoundException {
final List<SAMProgramRecord> bamInProgramRecords = new ArrayList<>();
for (final File file : bamInFiles) {
final SamReader bamInReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(file);
bamInProgramRecords.addAll(bamInReader.getFileHeader().getProgramRecords());
}
final String bamOutFilePath = new MD5DB().getMD5FilePath(bamOutMd5, null);
if (bamOutFilePath == null) {
throw new FileNotFoundException("Could not find " + bamOutMd5 + " in the MD5 DB");
}
final SamReader bamOutReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).
open(new File(bamOutFilePath));
final List<SAMProgramRecord> bamOutProgramRecords = bamOutReader.getFileHeader().getProgramRecords();
Assert.assertTrue(bamOutProgramRecords.containsAll(bamInProgramRecords));
}
@Test
public void testHaplotypeBAMOutFlags() throws IOException {
HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "6588123afd06ff6acc9f10ea25250f54", "9d6bd79cdae3e3222fa93f542fbca153");
final String md5BAMOut = "69aae17f8cd384666ec7c3c1f3d3eb57";
HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "6588123afd06ff6acc9f10ea25250f54", md5BAMOut);
validateForwardedProgramRecords(new ArrayList<>(Arrays.asList(new File(NA12878_BAM))), md5BAMOut);
}
@Test
@ -301,14 +327,15 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
private static final String LEFT_ALIGNMENT_BAMOUT_TEST_OUTPUT = privateTestDir + "/bamout-indel-left-align-bugfix-expected-output.bam";
@Test
public void testLeftAlignmentBamOutBugFix() {
public void testLeftAlignmentBamOutBugFix() throws FileNotFoundException {
final String md5BAMOut = "27e729df3b166c81792a62a5b57ef7b3";
final String base = String.format("-T HaplotypeCaller -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, LEFT_ALIGNMENT_BAMOUT_TEST_INPUT)
+ " --no_cmdline_in_header -bamout %s -o /dev/null -L 1:11740000-11740700 --allowNonUniqueKmersInRef";
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("01deba68f7a7d562b0e466f6858d42e3"));
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(md5BAMOut));
executeTest("LeftAlignmentBamOutBugFix", spec);
validateForwardedProgramRecords(new ArrayList<>(Arrays.asList(new File(LEFT_ALIGNMENT_BAMOUT_TEST_INPUT))), md5BAMOut);
}
// --------------------------------------------------------------------------------------------------------------
//
// test dbSNP annotation
@ -513,11 +540,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testHaplotypeCallerReadPosRankSum() throws IOException {
final File testBAM = new File(privateTestDir + "testReadPos.snippet.bam");
final String md5Variants = "03b3c464f22a3572f7d66890c18bdda4";
final String md5BAMOut = "2e0843f6e8e90c407825e9c47ce4a32d";
final String md5BAMOut = "3ef35732e49980093ad445e3ac5731fa";
final String base = String.format("-T HaplotypeCaller -R %s -I %s -L 1:3753063 -ip 100 ", REF, testBAM) +
" --no_cmdline_in_header -o %s -bamout %s";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList(md5Variants, md5BAMOut));
executeTest("testHaplotypeCallerReadPosRankSum", spec);
validateForwardedProgramRecords(new ArrayList<File>(Arrays.asList(testBAM)), md5BAMOut);
}
@Test

View File

@ -944,10 +944,7 @@ public class GenomeAnalysisEngine {
if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
if (argCollection.keepProgramRecords)
removeProgramRecords = false;
final boolean removeProgramRecords = argCollection.keepProgramRecords ? false : argCollection.removeProgramRecords;
final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker;

View File

@ -60,7 +60,6 @@ import java.util.*;
@ActiveRegionTraversalParameters(extension=50,maxRegion=1500)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000)
@RemoveProgramRecords
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
/**
* If provided, this walker will write out its activity profile (per bp probabilities of being active)

View File

@ -46,7 +46,6 @@ import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
@PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000)
@RemoveProgramRecords
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

View File

@ -1,46 +0,0 @@
/*
* Copyright 2012-2016 Broad Institute, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.gatk.engine.walkers;
/**
* Created with IntelliJ IDEA.
* User: thibault
* Date: 8/2/12
* Time: 1:58 PM
* To change this template use File | Settings | File Templates.
*/
import java.lang.annotation.*;
/**
* Indicates that program records should be removed from SAM headers by default for this walker
*/
@Documented
@Inherited
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface RemoveProgramRecords {
}