Copy Program Records (PG) from input BAM
This commit is contained in:
parent
1d6756fbd0
commit
3de98263f5
|
|
@ -83,6 +83,7 @@ public abstract class ReadDestination {
|
||||||
bamHeader = new SAMFileHeader();
|
bamHeader = new SAMFileHeader();
|
||||||
bamHeader.setSequenceDictionary(header.getSequenceDictionary());
|
bamHeader.setSequenceDictionary(header.getSequenceDictionary());
|
||||||
bamHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
|
bamHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
|
||||||
|
bamHeader.setProgramRecords(header.getProgramRecords());
|
||||||
|
|
||||||
// include the original read groups plus an artificial one for haplotypes
|
// include the original read groups plus an artificial one for haplotypes
|
||||||
final List<SAMReadGroupRecord> readGroups = new ArrayList<>(header.getReadGroups());
|
final List<SAMReadGroupRecord> readGroups = new ArrayList<>(header.getReadGroups());
|
||||||
|
|
|
||||||
|
|
@ -138,7 +138,7 @@ public class MuTect2IntegrationTest extends WalkerTest {
|
||||||
public void testTruePositivesDream3TrackedDropped() {
|
public void testTruePositivesDream3TrackedDropped() {
|
||||||
M2TestWithDroppedReads(DREAM3_TUMOR_BAM, DREAM3_NORMAL_BAM, "21:10935369", "",
|
M2TestWithDroppedReads(DREAM3_TUMOR_BAM, DREAM3_NORMAL_BAM, "21:10935369", "",
|
||||||
"48a446d47bb10434cb7f0ee726d15721",
|
"48a446d47bb10434cb7f0ee726d15721",
|
||||||
"b536e76870326b4be01b8d6b83c1cf1c");
|
"6ecaeb74893249dfa5723b2266c957e2");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,10 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||||
|
|
||||||
|
import htsjdk.samtools.SAMProgramRecord;
|
||||||
|
import htsjdk.samtools.SamReader;
|
||||||
|
import htsjdk.samtools.SamReaderFactory;
|
||||||
|
import htsjdk.samtools.ValidationStringency;
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import htsjdk.tribble.AbstractFeatureReader;
|
import htsjdk.tribble.AbstractFeatureReader;
|
||||||
|
|
@ -64,6 +68,7 @@ import org.apache.commons.io.FileUtils;
|
||||||
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.gatk.utils.MD5DB;
|
||||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
|
|
@ -107,9 +112,30 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
executeTest("testHaplotypeCallerBamout", spec);
|
executeTest("testHaplotypeCallerBamout", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check that the bamout program records (@PG) contain all of the program records forwarded from the input BAMs
|
||||||
|
*/
|
||||||
|
private void validateForwardedProgramRecords(final List<File> bamInFiles, final String bamOutMd5) throws FileNotFoundException {
|
||||||
|
final List<SAMProgramRecord> bamInProgramRecords = new ArrayList<>();
|
||||||
|
for (final File file : bamInFiles) {
|
||||||
|
final SamReader bamInReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(file);
|
||||||
|
bamInProgramRecords.addAll(bamInReader.getFileHeader().getProgramRecords());
|
||||||
|
}
|
||||||
|
final String bamOutFilePath = new MD5DB().getMD5FilePath(bamOutMd5, null);
|
||||||
|
if (bamOutFilePath == null) {
|
||||||
|
throw new FileNotFoundException("Could not find " + bamOutMd5 + " in the MD5 DB");
|
||||||
|
}
|
||||||
|
final SamReader bamOutReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).
|
||||||
|
open(new File(bamOutFilePath));
|
||||||
|
final List<SAMProgramRecord> bamOutProgramRecords = bamOutReader.getFileHeader().getProgramRecords();
|
||||||
|
Assert.assertTrue(bamOutProgramRecords.containsAll(bamInProgramRecords));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeBAMOutFlags() throws IOException {
|
public void testHaplotypeBAMOutFlags() throws IOException {
|
||||||
HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "6588123afd06ff6acc9f10ea25250f54", "9d6bd79cdae3e3222fa93f542fbca153");
|
final String md5BAMOut = "69aae17f8cd384666ec7c3c1f3d3eb57";
|
||||||
|
HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "6588123afd06ff6acc9f10ea25250f54", md5BAMOut);
|
||||||
|
validateForwardedProgramRecords(new ArrayList<>(Arrays.asList(new File(NA12878_BAM))), md5BAMOut);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -301,14 +327,15 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
private static final String LEFT_ALIGNMENT_BAMOUT_TEST_OUTPUT = privateTestDir + "/bamout-indel-left-align-bugfix-expected-output.bam";
|
private static final String LEFT_ALIGNMENT_BAMOUT_TEST_OUTPUT = privateTestDir + "/bamout-indel-left-align-bugfix-expected-output.bam";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testLeftAlignmentBamOutBugFix() {
|
public void testLeftAlignmentBamOutBugFix() throws FileNotFoundException {
|
||||||
|
final String md5BAMOut = "27e729df3b166c81792a62a5b57ef7b3";
|
||||||
final String base = String.format("-T HaplotypeCaller -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, LEFT_ALIGNMENT_BAMOUT_TEST_INPUT)
|
final String base = String.format("-T HaplotypeCaller -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, LEFT_ALIGNMENT_BAMOUT_TEST_INPUT)
|
||||||
+ " --no_cmdline_in_header -bamout %s -o /dev/null -L 1:11740000-11740700 --allowNonUniqueKmersInRef";
|
+ " --no_cmdline_in_header -bamout %s -o /dev/null -L 1:11740000-11740700 --allowNonUniqueKmersInRef";
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("01deba68f7a7d562b0e466f6858d42e3"));
|
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(md5BAMOut));
|
||||||
executeTest("LeftAlignmentBamOutBugFix", spec);
|
executeTest("LeftAlignmentBamOutBugFix", spec);
|
||||||
|
validateForwardedProgramRecords(new ArrayList<>(Arrays.asList(new File(LEFT_ALIGNMENT_BAMOUT_TEST_INPUT))), md5BAMOut);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// test dbSNP annotation
|
// test dbSNP annotation
|
||||||
|
|
@ -513,11 +540,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void testHaplotypeCallerReadPosRankSum() throws IOException {
|
public void testHaplotypeCallerReadPosRankSum() throws IOException {
|
||||||
final File testBAM = new File(privateTestDir + "testReadPos.snippet.bam");
|
final File testBAM = new File(privateTestDir + "testReadPos.snippet.bam");
|
||||||
final String md5Variants = "03b3c464f22a3572f7d66890c18bdda4";
|
final String md5Variants = "03b3c464f22a3572f7d66890c18bdda4";
|
||||||
final String md5BAMOut = "2e0843f6e8e90c407825e9c47ce4a32d";
|
final String md5BAMOut = "3ef35732e49980093ad445e3ac5731fa";
|
||||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s -L 1:3753063 -ip 100 ", REF, testBAM) +
|
final String base = String.format("-T HaplotypeCaller -R %s -I %s -L 1:3753063 -ip 100 ", REF, testBAM) +
|
||||||
" --no_cmdline_in_header -o %s -bamout %s";
|
" --no_cmdline_in_header -o %s -bamout %s";
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList(md5Variants, md5BAMOut));
|
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList(md5Variants, md5BAMOut));
|
||||||
executeTest("testHaplotypeCallerReadPosRankSum", spec);
|
executeTest("testHaplotypeCallerReadPosRankSum", spec);
|
||||||
|
validateForwardedProgramRecords(new ArrayList<File>(Arrays.asList(testBAM)), md5BAMOut);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
||||||
|
|
@ -944,10 +944,7 @@ public class GenomeAnalysisEngine {
|
||||||
if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
|
if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
|
||||||
throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
|
throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
|
||||||
|
|
||||||
boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
|
final boolean removeProgramRecords = argCollection.keepProgramRecords ? false : argCollection.removeProgramRecords;
|
||||||
|
|
||||||
if (argCollection.keepProgramRecords)
|
|
||||||
removeProgramRecords = false;
|
|
||||||
|
|
||||||
final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker;
|
final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,6 @@ import java.util.*;
|
||||||
@ActiveRegionTraversalParameters(extension=50,maxRegion=1500)
|
@ActiveRegionTraversalParameters(extension=50,maxRegion=1500)
|
||||||
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
|
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
|
||||||
@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000)
|
@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000)
|
||||||
@RemoveProgramRecords
|
|
||||||
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||||
/**
|
/**
|
||||||
* If provided, this walker will write out its activity profile (per bp probabilities of being active)
|
* If provided, this walker will write out its activity profile (per bp probabilities of being active)
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,6 @@ import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||||
@PartitionBy(PartitionType.LOCUS)
|
@PartitionBy(PartitionType.LOCUS)
|
||||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
||||||
@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000)
|
@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000)
|
||||||
@RemoveProgramRecords
|
|
||||||
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||||
// Do we actually want to operate on the context?
|
// Do we actually want to operate on the context?
|
||||||
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
|
||||||
|
|
@ -1,46 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright 2012-2016 Broad Institute, Inc.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.walkers;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created with IntelliJ IDEA.
|
|
||||||
* User: thibault
|
|
||||||
* Date: 8/2/12
|
|
||||||
* Time: 1:58 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.lang.annotation.*;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Indicates that program records should be removed from SAM headers by default for this walker
|
|
||||||
*/
|
|
||||||
@Documented
|
|
||||||
@Inherited
|
|
||||||
@Retention(RetentionPolicy.RUNTIME)
|
|
||||||
@Target(ElementType.TYPE)
|
|
||||||
public @interface RemoveProgramRecords {
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue