Added PG tag to bam header to let people know it's been cleaned.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3284 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
28f746b76a
commit
9dff578706
|
|
@ -36,6 +36,7 @@ import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
|||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
|
@ -110,9 +111,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
protected int MAX_RECORDS_IN_RAM = 500000;
|
||||
|
||||
@Argument(fullName="writerWindowSize", shortName="writerWindowSize", doc="the window over which the writer will store reads when --sortInMemory is enabled", required=false)
|
||||
protected int SORTING_WRITER_WINDOW = 100;
|
||||
protected int SORTING_WRITER_WINDOW = 300;
|
||||
|
||||
@Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the realigned bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
||||
protected boolean NO_PG_TAG = false;
|
||||
|
||||
|
||||
// the intervals input by the user
|
||||
private Iterator<GenomeLoc> intervals = null;
|
||||
|
||||
|
|
@ -171,22 +174,16 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
for ( SAMReaderID id: ids ) {
|
||||
File file = getToolkit().getDataSource().getSAMFile(id);
|
||||
SAMFileHeader header = getToolkit().getSAMFileHeader(id);
|
||||
if ( SORTING_STRATEGY == RealignerSortingStrategy.NO_SORT )
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
|
||||
String newFileName = file.getName().substring(0, file.getName().length()-3) + outputSuffix + ".bam";
|
||||
SAMFileWriter writer = factory.makeBAMWriter(header, SORTING_STRATEGY == RealignerSortingStrategy.IN_MEMORY, new File(baseWriterFilename, newFileName), compressionLevel);
|
||||
if ( SORTING_STRATEGY == RealignerSortingStrategy.IN_MEMORY )
|
||||
writer = new SortingSAMFileWriter(writer, SORTING_WRITER_WINDOW);
|
||||
File newFile = new File(baseWriterFilename, newFileName);
|
||||
SAMFileWriter writer = makeWriter(factory, header, newFile);
|
||||
for ( String rg : readGroupMap.get(file) )
|
||||
writers.put(rg, writer);
|
||||
}
|
||||
} else {
|
||||
SAMFileHeader header = getToolkit().getSAMFileHeader();
|
||||
if ( SORTING_STRATEGY == RealignerSortingStrategy.NO_SORT )
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
|
||||
SAMFileWriter writer = factory.makeBAMWriter(header, SORTING_STRATEGY == RealignerSortingStrategy.IN_MEMORY, new File(baseWriterFilename), compressionLevel);
|
||||
if ( SORTING_STRATEGY == RealignerSortingStrategy.IN_MEMORY )
|
||||
writer = new SortingSAMFileWriter(writer, SORTING_WRITER_WINDOW);
|
||||
File file = new File(baseWriterFilename);
|
||||
SAMFileWriter writer = makeWriter(factory, header, file);
|
||||
for ( Set<String> set : readGroupMap.values() ) {
|
||||
for ( String rg : set )
|
||||
writers.put(rg, writer);
|
||||
|
|
@ -223,6 +220,25 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
private SAMFileWriter makeWriter(SAMFileWriterFactory factory, SAMFileHeader header, File file) {
|
||||
if ( SORTING_STRATEGY == RealignerSortingStrategy.NO_SORT )
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
|
||||
|
||||
if ( !NO_PG_TAG ) {
|
||||
final SAMProgramRecord programRecord = new SAMProgramRecord("GATK IndelRealigner");
|
||||
final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
|
||||
programRecord.setProgramVersion(headerInfo.getString("org.broadinstitute.sting.gatk.version"));
|
||||
header.addProgramRecord( programRecord );
|
||||
}
|
||||
|
||||
SAMFileWriter writer = factory.makeBAMWriter(header, SORTING_STRATEGY == RealignerSortingStrategy.IN_MEMORY, file, compressionLevel);
|
||||
|
||||
if ( SORTING_STRATEGY == RealignerSortingStrategy.IN_MEMORY )
|
||||
writer = new SortingSAMFileWriter(writer, SORTING_WRITER_WINDOW);
|
||||
|
||||
return writer;
|
||||
}
|
||||
|
||||
private void emit(final SAMRecord read) {
|
||||
if ( writers != null ) {
|
||||
SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
|
|
|
|||
|
|
@ -12,14 +12,14 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
|
|||
|
||||
String[] md5lod5 = {"67c3fc25e9d192cc5fbfd48ade0efc84", "86778f92b0fa6aa7c26e651c8c1eb320"};
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
"-T IndelRealigner -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023800-10332350 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O %s -snps %s",
|
||||
"-T IndelRealigner -noPG -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023800-10332350 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O %s -snps %s",
|
||||
2,
|
||||
Arrays.asList(md5lod5));
|
||||
executeTest("test Lod5", spec1);
|
||||
|
||||
String[] md5lod200 = {"96edef86cea95f312ee8295b38227eb8", "d4d8ff567b614729ab8c52bd7d6bef48"};
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
"-T IndelRealigner -LOD 200 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023800-10332350 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O %s -snps %s",
|
||||
"-T IndelRealigner -noPG -LOD 200 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023800-10332350 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O %s -snps %s",
|
||||
2,
|
||||
Arrays.asList(md5lod200));
|
||||
executeTest("test Lod200", spec2);
|
||||
|
|
@ -27,7 +27,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
|
|||
String filename1 = "NA12878.chrom1.SLX.SRP000032.2009_06";
|
||||
String filename2 = "low_coverage_CEU.chr1.10k-11k";
|
||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||
"-T IndelRealigner -nway -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + filename1 + ".bam -I " + validationDataLocation + filename2 + ".bam -L 1:10023900-10024000 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O /tmp -snps %s",
|
||||
"-T IndelRealigner -nway -noPG -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + filename1 + ".bam -I " + validationDataLocation + filename2 + ".bam -L 1:10023900-10024000 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O /tmp -snps %s",
|
||||
1,
|
||||
Arrays.asList("bd42a4fa66d7ec7a480c2b94313a78d3"));
|
||||
File file1 = new File("/tmp/" + filename1 + ".cleaned.bam");
|
||||
|
|
|
|||
Loading…
Reference in New Issue