From 9dff578706c5f99dd1b8ff157e7a9d80a86cebd1 Mon Sep 17 00:00:00 2001 From: ebanks Date: Fri, 30 Apr 2010 17:30:30 +0000 Subject: [PATCH] Added PG tag to bam header to let people know it's been cleaned. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3284 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/indels/IndelRealigner.java | 40 +++++++++++++------ .../indels/IndelRealignerIntegrationTest.java | 6 +-- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index fbc1d2d0b..353e89761 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -36,6 +36,7 @@ import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.collections.Pair; @@ -110,9 +111,11 @@ public class IndelRealigner extends ReadWalker { protected int MAX_RECORDS_IN_RAM = 500000; @Argument(fullName="writerWindowSize", shortName="writerWindowSize", doc="the window over which the writer will store reads when --sortInMemory is enabled", required=false) - protected int SORTING_WRITER_WINDOW = 100; + protected int SORTING_WRITER_WINDOW = 300; + + @Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the realigned bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.") + protected boolean NO_PG_TAG = false; - // the intervals input by the user private Iterator intervals = null; @@ -171,22 +174,16 @@ public class IndelRealigner extends ReadWalker { for ( SAMReaderID id: ids ) { File file = getToolkit().getDataSource().getSAMFile(id); SAMFileHeader header = getToolkit().getSAMFileHeader(id); - if ( SORTING_STRATEGY == RealignerSortingStrategy.NO_SORT ) - header.setSortOrder(SAMFileHeader.SortOrder.unsorted); String newFileName = file.getName().substring(0, file.getName().length()-3) + outputSuffix + ".bam"; - SAMFileWriter writer = factory.makeBAMWriter(header, SORTING_STRATEGY == RealignerSortingStrategy.IN_MEMORY, new File(baseWriterFilename, newFileName), compressionLevel); - if ( SORTING_STRATEGY == RealignerSortingStrategy.IN_MEMORY ) - writer = new SortingSAMFileWriter(writer, SORTING_WRITER_WINDOW); + File newFile = new File(baseWriterFilename, newFileName); + SAMFileWriter writer = makeWriter(factory, header, newFile); for ( String rg : readGroupMap.get(file) ) writers.put(rg, writer); } } else { SAMFileHeader header = getToolkit().getSAMFileHeader(); - if ( SORTING_STRATEGY == RealignerSortingStrategy.NO_SORT ) - header.setSortOrder(SAMFileHeader.SortOrder.unsorted); - SAMFileWriter writer = factory.makeBAMWriter(header, SORTING_STRATEGY == RealignerSortingStrategy.IN_MEMORY, new File(baseWriterFilename), compressionLevel); - if ( SORTING_STRATEGY == RealignerSortingStrategy.IN_MEMORY ) - writer = new SortingSAMFileWriter(writer, SORTING_WRITER_WINDOW); + File file = new File(baseWriterFilename); + SAMFileWriter writer = makeWriter(factory, header, file); for ( Set set : readGroupMap.values() ) { for ( String rg : set ) writers.put(rg, writer); @@ -223,6 +220,25 @@ public class IndelRealigner extends ReadWalker { } } + private SAMFileWriter makeWriter(SAMFileWriterFactory factory, SAMFileHeader header, File file) { + if ( SORTING_STRATEGY == RealignerSortingStrategy.NO_SORT ) + header.setSortOrder(SAMFileHeader.SortOrder.unsorted); + + if ( !NO_PG_TAG ) { + final SAMProgramRecord programRecord = new SAMProgramRecord("GATK IndelRealigner"); + final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText"); + programRecord.setProgramVersion(headerInfo.getString("org.broadinstitute.sting.gatk.version")); + header.addProgramRecord( programRecord ); + } + + SAMFileWriter writer = factory.makeBAMWriter(header, SORTING_STRATEGY == RealignerSortingStrategy.IN_MEMORY, file, compressionLevel); + + if ( SORTING_STRATEGY == RealignerSortingStrategy.IN_MEMORY ) + writer = new SortingSAMFileWriter(writer, SORTING_WRITER_WINDOW); + + return writer; + } + private void emit(final SAMRecord read) { if ( writers != null ) { SAMReadGroupRecord readGroup = read.getReadGroup(); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 66c8440be..c612878ac 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -12,14 +12,14 @@ public class IndelRealignerIntegrationTest extends WalkerTest { String[] md5lod5 = {"67c3fc25e9d192cc5fbfd48ade0efc84", "86778f92b0fa6aa7c26e651c8c1eb320"}; WalkerTestSpec spec1 = new WalkerTestSpec( - "-T IndelRealigner -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023800-10332350 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O %s -snps %s", + "-T IndelRealigner -noPG -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023800-10332350 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O %s -snps %s", 2, Arrays.asList(md5lod5)); executeTest("test Lod5", spec1); String[] md5lod200 = {"96edef86cea95f312ee8295b38227eb8", "d4d8ff567b614729ab8c52bd7d6bef48"}; WalkerTestSpec spec2 = new WalkerTestSpec( - "-T IndelRealigner -LOD 200 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023800-10332350 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O %s -snps %s", + "-T IndelRealigner -noPG -LOD 200 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023800-10332350 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O %s -snps %s", 2, Arrays.asList(md5lod200)); executeTest("test Lod200", spec2); @@ -27,7 +27,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { String filename1 = "NA12878.chrom1.SLX.SRP000032.2009_06"; String filename2 = "low_coverage_CEU.chr1.10k-11k"; WalkerTestSpec spec3 = new WalkerTestSpec( - "-T IndelRealigner -nway -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + filename1 + ".bam -I " + validationDataLocation + filename2 + ".bam -L 1:10023900-10024000 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O /tmp -snps %s", + "-T IndelRealigner -nway -noPG -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + filename1 + ".bam -I " + validationDataLocation + filename2 + ".bam -L 1:10023900-10024000 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O /tmp -snps %s", 1, Arrays.asList("bd42a4fa66d7ec7a480c2b94313a78d3")); File file1 = new File("/tmp/" + filename1 + ".cleaned.bam");