From bed0acaed4fe166bfc644012a3650964406ea2b2 Mon Sep 17 00:00:00 2001 From: Andrey Sivachenko Date: Mon, 31 Oct 2011 12:28:28 -0400 Subject: [PATCH] nWayOut now adds PG tag to the header as it should. Also, additional hidden option added: keepPGTags. If invoked, IndelRealigner PG tags from previous runs (if any) are kept in the header and the new PG tag is simply added, instead of overriding them --- .../gatk/walkers/indels/IndelRealigner.java | 37 +++++++--- .../sting/utils/sam/NWaySAMFileWriter.java | 69 ++++++++++++++----- 2 files changed, 76 insertions(+), 30 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 36e4db1c5..8a2917145 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -263,6 +263,12 @@ public class IndelRealigner extends ReadWalker { doc="Don't output the usual PG tag in the realigned bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.") protected boolean NO_PG_TAG = false; + @Hidden + @Argument(fullName="keepPGTags", shortName="keepPG", required=false, + doc="Keep older PG tags left in the bam header by previous runs of this tool (by default, all these "+ + "historical tags will be replaced by the latest tag generated in the current run).") + protected boolean KEEP_ALL_PG_RECORDS = false; + @Hidden @Output(fullName="indelsFileForDebugging", shortName="indels", required=false, doc="Output file (text) for the indels found; FOR DEBUGGING PURPOSES ONLY") protected String OUT_INDELS = null; @@ -404,11 +410,12 @@ public class IndelRealigner extends ReadWalker { // if ( args.containsKey("disable_bam_indexing") ) { System.out.println("NO INDEXING!!"); System.exit(1); createIndex = false; } if ( N_WAY_OUT.toUpperCase().endsWith(".MAP") ) { - writerToUse = new NWaySAMFileWriter(getToolkit(),loadFileNameMap(N_WAY_OUT),SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s); + writerToUse = new NWaySAMFileWriter(getToolkit(),loadFileNameMap(N_WAY_OUT), + SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s,createProgramRecord(),KEEP_ALL_PG_RECORDS); } else { - writerToUse = new NWaySAMFileWriter(getToolkit(),N_WAY_OUT,SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s); + writerToUse = new NWaySAMFileWriter(getToolkit(),N_WAY_OUT,SAMFileHeader.SortOrder.coordinate,true, + createIndex, generateMD5s,createProgramRecord(),KEEP_ALL_PG_RECORDS); } - } else { // set up the output writer @@ -448,18 +455,12 @@ public class IndelRealigner extends ReadWalker { private void setupWriter(SAMFileHeader header) { if ( !NO_PG_TAG ) { - final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME); - final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText"); - try { - final String version = headerInfo.getString("org.broadinstitute.sting.gatk.version"); - programRecord.setProgramVersion(version); - } catch (MissingResourceException e) {} - programRecord.setCommandLine(getToolkit().createApproximateCommandLineArgumentString(getToolkit(), this)); + final SAMProgramRecord programRecord = createProgramRecord(); List oldRecords = header.getProgramRecords(); List newRecords = new ArrayList(oldRecords.size()+1); for ( SAMProgramRecord record : oldRecords ) { - if ( !record.getId().startsWith(PROGRAM_RECORD_NAME) ) + if ( !record.getId().startsWith(PROGRAM_RECORD_NAME) || KEEP_ALL_PG_RECORDS ) newRecords.add(record); } newRecords.add(programRecord); @@ -470,6 +471,20 @@ public class IndelRealigner extends ReadWalker { writer.setPresorted(true); } + + private SAMProgramRecord createProgramRecord() { + if ( NO_PG_TAG ) return null; + + final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME); + final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText"); + try { + final String version = headerInfo.getString("org.broadinstitute.sting.gatk.version"); + programRecord.setProgramVersion(version); + } catch (MissingResourceException e) {} + programRecord.setCommandLine(getToolkit().createApproximateCommandLineArgumentString(getToolkit(), this)); + return programRecord; + } + private void emit(final SAMRecord read) { // check to see whether the read was modified by looking at the temporary tag diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java b/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java index 07bfc52c7..fa07523f3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java @@ -25,18 +25,16 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMFileWriter; -import net.sf.samtools.SAMFileWriterFactory; -import net.sf.samtools.SAMRecord; +import net.sf.samtools.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.io.File; -import java.util.HashMap; -import java.util.Map; +import java.util.*; /** * Created by IntelliJ IDEA. @@ -50,21 +48,35 @@ public class NWaySAMFileWriter implements SAMFileWriter { private Map writerMap = null; private boolean presorted ; GenomeAnalysisEngine toolkit; + boolean KEEP_ALL_PG_RECORDS = false; - public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) { + public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map in2out, SAMFileHeader.SortOrder order, + boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) { this.presorted = presorted; this.toolkit = toolkit; + this.KEEP_ALL_PG_RECORDS = keep_records; writerMap = new HashMap(); - setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5); + setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5, pRecord); } - public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly , boolean generateMD5) { + public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, + boolean presorted, boolean indexOnTheFly , boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) { this.presorted = presorted; this.toolkit = toolkit; + this.KEEP_ALL_PG_RECORDS = keep_records; writerMap = new HashMap(); - setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5); + setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5, pRecord); } + public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map in2out, SAMFileHeader.SortOrder order, + boolean presorted, boolean indexOnTheFly, boolean generateMD5) { + this(toolkit, in2out, order, presorted, indexOnTheFly, generateMD5, null,false); + } + + public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, + boolean presorted, boolean indexOnTheFly , boolean generateMD5) { + this(toolkit, ext, order, presorted, indexOnTheFly, generateMD5, null,false); + } /** * Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved @@ -73,7 +85,8 @@ public class NWaySAMFileWriter implements SAMFileWriter { * @param toolkit * @param in2out */ - public void setupByReader(GenomeAnalysisEngine toolkit, Map in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) { + public void setupByReader(GenomeAnalysisEngine toolkit, Map in2out, SAMFileHeader.SortOrder order, + boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) { if ( in2out==null ) throw new StingException("input-output bam filename map for n-way-out writing is NULL"); for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) { @@ -85,9 +98,10 @@ public class NWaySAMFileWriter implements SAMFileWriter { outName = in2out.get(fName); if ( writerMap.containsKey( rid ) ) - throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered"); + throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered; "+ + "map file likely contains multiple entries for this input file"); - addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5); + addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord); } } @@ -100,7 +114,8 @@ public class NWaySAMFileWriter implements SAMFileWriter { * @param toolkit * @param ext */ - public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) { + public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, + boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) { for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) { String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName(); @@ -117,16 +132,29 @@ public class NWaySAMFileWriter implements SAMFileWriter { if ( writerMap.containsKey( rid ) ) throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered"); - - addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5); + addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord); } } - private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) { + private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted, + boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord programRecord) { File f = new File(outName); SAMFileHeader header = toolkit.getSAMFileHeader(id).clone(); header.setSortOrder(order); + + if ( programRecord != null ) { + // --->> add program record + List oldRecords = header.getProgramRecords(); + List newRecords = new ArrayList(oldRecords.size()+1); + for ( SAMProgramRecord record : oldRecords ) { + if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS ) + newRecords.add(record); + } + newRecords.add(programRecord); + header.setProgramRecords(newRecords); + // <-- add program record ends here + } SAMFileWriterFactory factory = new SAMFileWriterFactory(); factory.setCreateIndex(indexOnTheFly); factory.setCreateMd5File(generateMD5); @@ -134,7 +162,10 @@ public class NWaySAMFileWriter implements SAMFileWriter { writerMap.put(id,sw); } - + public Collection getWriters() { + return writerMap.values(); + } + public void addAlignment(SAMRecord samRecord) { final SAMReaderID id = toolkit.getReaderIDForRead(samRecord); String rg = samRecord.getStringAttribute("RG"); @@ -146,7 +177,7 @@ public class NWaySAMFileWriter implements SAMFileWriter { } public SAMFileHeader getFileHeader() { - return toolkit.getSAMFileHeader(); + return toolkit.getSAMFileHeader(); } public void close() {