nWayOut now adds PG tag to the header as it should. Also, additional hidden option added: keepPGTags. If invoked, IndelRealigner PG tags from previous runs (if any) are kept in the header and the new PG tag is simply added, instead of overriding them

This commit is contained in:
Andrey Sivachenko 2011-10-31 12:28:28 -04:00
parent 41ddc7bce7
commit bed0acaed4
2 changed files with 76 additions and 30 deletions

View File

@ -263,6 +263,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
doc="Don't output the usual PG tag in the realigned bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.") doc="Don't output the usual PG tag in the realigned bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
protected boolean NO_PG_TAG = false; protected boolean NO_PG_TAG = false;
@Hidden
@Argument(fullName="keepPGTags", shortName="keepPG", required=false,
doc="Keep older PG tags left in the bam header by previous runs of this tool (by default, all these "+
"historical tags will be replaced by the latest tag generated in the current run).")
protected boolean KEEP_ALL_PG_RECORDS = false;
@Hidden @Hidden
@Output(fullName="indelsFileForDebugging", shortName="indels", required=false, doc="Output file (text) for the indels found; FOR DEBUGGING PURPOSES ONLY") @Output(fullName="indelsFileForDebugging", shortName="indels", required=false, doc="Output file (text) for the indels found; FOR DEBUGGING PURPOSES ONLY")
protected String OUT_INDELS = null; protected String OUT_INDELS = null;
@ -404,11 +410,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// if ( args.containsKey("disable_bam_indexing") ) { System.out.println("NO INDEXING!!"); System.exit(1); createIndex = false; } // if ( args.containsKey("disable_bam_indexing") ) { System.out.println("NO INDEXING!!"); System.exit(1); createIndex = false; }
if ( N_WAY_OUT.toUpperCase().endsWith(".MAP") ) { if ( N_WAY_OUT.toUpperCase().endsWith(".MAP") ) {
writerToUse = new NWaySAMFileWriter(getToolkit(),loadFileNameMap(N_WAY_OUT),SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s); writerToUse = new NWaySAMFileWriter(getToolkit(),loadFileNameMap(N_WAY_OUT),
SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s,createProgramRecord(),KEEP_ALL_PG_RECORDS);
} else { } else {
writerToUse = new NWaySAMFileWriter(getToolkit(),N_WAY_OUT,SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s); writerToUse = new NWaySAMFileWriter(getToolkit(),N_WAY_OUT,SAMFileHeader.SortOrder.coordinate,true,
createIndex, generateMD5s,createProgramRecord(),KEEP_ALL_PG_RECORDS);
} }
} else { } else {
// set up the output writer // set up the output writer
@ -448,18 +455,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
private void setupWriter(SAMFileHeader header) { private void setupWriter(SAMFileHeader header) {
if ( !NO_PG_TAG ) { if ( !NO_PG_TAG ) {
final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME); final SAMProgramRecord programRecord = createProgramRecord();
final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
try {
final String version = headerInfo.getString("org.broadinstitute.sting.gatk.version");
programRecord.setProgramVersion(version);
} catch (MissingResourceException e) {}
programRecord.setCommandLine(getToolkit().createApproximateCommandLineArgumentString(getToolkit(), this));
List<SAMProgramRecord> oldRecords = header.getProgramRecords(); List<SAMProgramRecord> oldRecords = header.getProgramRecords();
List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1); List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
for ( SAMProgramRecord record : oldRecords ) { for ( SAMProgramRecord record : oldRecords ) {
if ( !record.getId().startsWith(PROGRAM_RECORD_NAME) ) if ( !record.getId().startsWith(PROGRAM_RECORD_NAME) || KEEP_ALL_PG_RECORDS )
newRecords.add(record); newRecords.add(record);
} }
newRecords.add(programRecord); newRecords.add(programRecord);
@ -470,6 +471,20 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
writer.setPresorted(true); writer.setPresorted(true);
} }
private SAMProgramRecord createProgramRecord() {
if ( NO_PG_TAG ) return null;
final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
try {
final String version = headerInfo.getString("org.broadinstitute.sting.gatk.version");
programRecord.setProgramVersion(version);
} catch (MissingResourceException e) {}
programRecord.setCommandLine(getToolkit().createApproximateCommandLineArgumentString(getToolkit(), this));
return programRecord;
}
private void emit(final SAMRecord read) { private void emit(final SAMRecord read) {
// check to see whether the read was modified by looking at the temporary tag // check to see whether the read was modified by looking at the temporary tag

View File

@ -25,18 +25,16 @@
package org.broadinstitute.sting.utils.sam; package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.*;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMFileWriterFactory;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.io.File; import java.io.File;
import java.util.HashMap; import java.util.*;
import java.util.Map;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -50,21 +48,35 @@ public class NWaySAMFileWriter implements SAMFileWriter {
private Map<SAMReaderID,SAMFileWriter> writerMap = null; private Map<SAMReaderID,SAMFileWriter> writerMap = null;
private boolean presorted ; private boolean presorted ;
GenomeAnalysisEngine toolkit; GenomeAnalysisEngine toolkit;
boolean KEEP_ALL_PG_RECORDS = false;
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) { public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
this.presorted = presorted; this.presorted = presorted;
this.toolkit = toolkit; this.toolkit = toolkit;
this.KEEP_ALL_PG_RECORDS = keep_records;
writerMap = new HashMap<SAMReaderID,SAMFileWriter>(); writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5); setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5, pRecord);
} }
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly , boolean generateMD5) { public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
boolean presorted, boolean indexOnTheFly , boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
this.presorted = presorted; this.presorted = presorted;
this.toolkit = toolkit; this.toolkit = toolkit;
this.KEEP_ALL_PG_RECORDS = keep_records;
writerMap = new HashMap<SAMReaderID,SAMFileWriter>(); writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5); setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5, pRecord);
} }
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
this(toolkit, in2out, order, presorted, indexOnTheFly, generateMD5, null,false);
}
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
boolean presorted, boolean indexOnTheFly , boolean generateMD5) {
this(toolkit, ext, order, presorted, indexOnTheFly, generateMD5, null,false);
}
/** /**
* Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved * Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
@ -73,7 +85,8 @@ public class NWaySAMFileWriter implements SAMFileWriter {
* @param toolkit * @param toolkit
* @param in2out * @param in2out
*/ */
public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) { public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
if ( in2out==null ) throw new StingException("input-output bam filename map for n-way-out writing is NULL"); if ( in2out==null ) throw new StingException("input-output bam filename map for n-way-out writing is NULL");
for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) { for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
@ -85,9 +98,10 @@ public class NWaySAMFileWriter implements SAMFileWriter {
outName = in2out.get(fName); outName = in2out.get(fName);
if ( writerMap.containsKey( rid ) ) if ( writerMap.containsKey( rid ) )
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered"); throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered; "+
"map file likely contains multiple entries for this input file");
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5); addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
} }
} }
@ -100,7 +114,8 @@ public class NWaySAMFileWriter implements SAMFileWriter {
* @param toolkit * @param toolkit
* @param ext * @param ext
*/ */
public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) { public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) { for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName(); String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
@ -117,16 +132,29 @@ public class NWaySAMFileWriter implements SAMFileWriter {
if ( writerMap.containsKey( rid ) ) if ( writerMap.containsKey( rid ) )
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered"); throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5);
} }
} }
private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) { private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted,
boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord programRecord) {
File f = new File(outName); File f = new File(outName);
SAMFileHeader header = toolkit.getSAMFileHeader(id).clone(); SAMFileHeader header = toolkit.getSAMFileHeader(id).clone();
header.setSortOrder(order); header.setSortOrder(order);
if ( programRecord != null ) {
// --->> add program record
List<SAMProgramRecord> oldRecords = header.getProgramRecords();
List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
for ( SAMProgramRecord record : oldRecords ) {
if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
newRecords.add(record);
}
newRecords.add(programRecord);
header.setProgramRecords(newRecords);
// <-- add program record ends here
}
SAMFileWriterFactory factory = new SAMFileWriterFactory(); SAMFileWriterFactory factory = new SAMFileWriterFactory();
factory.setCreateIndex(indexOnTheFly); factory.setCreateIndex(indexOnTheFly);
factory.setCreateMd5File(generateMD5); factory.setCreateMd5File(generateMD5);
@ -134,6 +162,9 @@ public class NWaySAMFileWriter implements SAMFileWriter {
writerMap.put(id,sw); writerMap.put(id,sw);
} }
public Collection<SAMFileWriter> getWriters() {
return writerMap.values();
}
public void addAlignment(SAMRecord samRecord) { public void addAlignment(SAMRecord samRecord) {
final SAMReaderID id = toolkit.getReaderIDForRead(samRecord); final SAMReaderID id = toolkit.getReaderIDForRead(samRecord);