nWayOut now adds PG tag to the header as it should. Also, additional hidden option added: keepPGTags. If invoked, IndelRealigner PG tags from previous runs (if any) are kept in the header and the new PG tag is simply added, instead of overriding them
This commit is contained in:
parent
41ddc7bce7
commit
bed0acaed4
|
|
@ -263,6 +263,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
doc="Don't output the usual PG tag in the realigned bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
doc="Don't output the usual PG tag in the realigned bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
||||||
protected boolean NO_PG_TAG = false;
|
protected boolean NO_PG_TAG = false;
|
||||||
|
|
||||||
|
@Hidden
|
||||||
|
@Argument(fullName="keepPGTags", shortName="keepPG", required=false,
|
||||||
|
doc="Keep older PG tags left in the bam header by previous runs of this tool (by default, all these "+
|
||||||
|
"historical tags will be replaced by the latest tag generated in the current run).")
|
||||||
|
protected boolean KEEP_ALL_PG_RECORDS = false;
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Output(fullName="indelsFileForDebugging", shortName="indels", required=false, doc="Output file (text) for the indels found; FOR DEBUGGING PURPOSES ONLY")
|
@Output(fullName="indelsFileForDebugging", shortName="indels", required=false, doc="Output file (text) for the indels found; FOR DEBUGGING PURPOSES ONLY")
|
||||||
protected String OUT_INDELS = null;
|
protected String OUT_INDELS = null;
|
||||||
|
|
@ -404,11 +410,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
// if ( args.containsKey("disable_bam_indexing") ) { System.out.println("NO INDEXING!!"); System.exit(1); createIndex = false; }
|
// if ( args.containsKey("disable_bam_indexing") ) { System.out.println("NO INDEXING!!"); System.exit(1); createIndex = false; }
|
||||||
|
|
||||||
if ( N_WAY_OUT.toUpperCase().endsWith(".MAP") ) {
|
if ( N_WAY_OUT.toUpperCase().endsWith(".MAP") ) {
|
||||||
writerToUse = new NWaySAMFileWriter(getToolkit(),loadFileNameMap(N_WAY_OUT),SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s);
|
writerToUse = new NWaySAMFileWriter(getToolkit(),loadFileNameMap(N_WAY_OUT),
|
||||||
|
SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s,createProgramRecord(),KEEP_ALL_PG_RECORDS);
|
||||||
} else {
|
} else {
|
||||||
writerToUse = new NWaySAMFileWriter(getToolkit(),N_WAY_OUT,SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s);
|
writerToUse = new NWaySAMFileWriter(getToolkit(),N_WAY_OUT,SAMFileHeader.SortOrder.coordinate,true,
|
||||||
|
createIndex, generateMD5s,createProgramRecord(),KEEP_ALL_PG_RECORDS);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
// set up the output writer
|
// set up the output writer
|
||||||
|
|
@ -448,18 +455,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
private void setupWriter(SAMFileHeader header) {
|
private void setupWriter(SAMFileHeader header) {
|
||||||
|
|
||||||
if ( !NO_PG_TAG ) {
|
if ( !NO_PG_TAG ) {
|
||||||
final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
|
final SAMProgramRecord programRecord = createProgramRecord();
|
||||||
final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
|
|
||||||
try {
|
|
||||||
final String version = headerInfo.getString("org.broadinstitute.sting.gatk.version");
|
|
||||||
programRecord.setProgramVersion(version);
|
|
||||||
} catch (MissingResourceException e) {}
|
|
||||||
programRecord.setCommandLine(getToolkit().createApproximateCommandLineArgumentString(getToolkit(), this));
|
|
||||||
|
|
||||||
List<SAMProgramRecord> oldRecords = header.getProgramRecords();
|
List<SAMProgramRecord> oldRecords = header.getProgramRecords();
|
||||||
List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
|
List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
|
||||||
for ( SAMProgramRecord record : oldRecords ) {
|
for ( SAMProgramRecord record : oldRecords ) {
|
||||||
if ( !record.getId().startsWith(PROGRAM_RECORD_NAME) )
|
if ( !record.getId().startsWith(PROGRAM_RECORD_NAME) || KEEP_ALL_PG_RECORDS )
|
||||||
newRecords.add(record);
|
newRecords.add(record);
|
||||||
}
|
}
|
||||||
newRecords.add(programRecord);
|
newRecords.add(programRecord);
|
||||||
|
|
@ -470,6 +471,20 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
writer.setPresorted(true);
|
writer.setPresorted(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private SAMProgramRecord createProgramRecord() {
|
||||||
|
if ( NO_PG_TAG ) return null;
|
||||||
|
|
||||||
|
final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
|
||||||
|
final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
|
||||||
|
try {
|
||||||
|
final String version = headerInfo.getString("org.broadinstitute.sting.gatk.version");
|
||||||
|
programRecord.setProgramVersion(version);
|
||||||
|
} catch (MissingResourceException e) {}
|
||||||
|
programRecord.setCommandLine(getToolkit().createApproximateCommandLineArgumentString(getToolkit(), this));
|
||||||
|
return programRecord;
|
||||||
|
}
|
||||||
|
|
||||||
private void emit(final SAMRecord read) {
|
private void emit(final SAMRecord read) {
|
||||||
|
|
||||||
// check to see whether the read was modified by looking at the temporary tag
|
// check to see whether the read was modified by looking at the temporary tag
|
||||||
|
|
|
||||||
|
|
@ -25,18 +25,16 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.*;
|
||||||
import net.sf.samtools.SAMFileWriter;
|
|
||||||
import net.sf.samtools.SAMFileWriterFactory;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||||
|
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.HashMap;
|
import java.util.*;
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -50,21 +48,35 @@ public class NWaySAMFileWriter implements SAMFileWriter {
|
||||||
private Map<SAMReaderID,SAMFileWriter> writerMap = null;
|
private Map<SAMReaderID,SAMFileWriter> writerMap = null;
|
||||||
private boolean presorted ;
|
private boolean presorted ;
|
||||||
GenomeAnalysisEngine toolkit;
|
GenomeAnalysisEngine toolkit;
|
||||||
|
boolean KEEP_ALL_PG_RECORDS = false;
|
||||||
|
|
||||||
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
|
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
|
||||||
|
boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
|
||||||
this.presorted = presorted;
|
this.presorted = presorted;
|
||||||
this.toolkit = toolkit;
|
this.toolkit = toolkit;
|
||||||
|
this.KEEP_ALL_PG_RECORDS = keep_records;
|
||||||
writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
|
writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
|
||||||
setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5);
|
setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5, pRecord);
|
||||||
}
|
}
|
||||||
|
|
||||||
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly , boolean generateMD5) {
|
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
|
||||||
|
boolean presorted, boolean indexOnTheFly , boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
|
||||||
this.presorted = presorted;
|
this.presorted = presorted;
|
||||||
this.toolkit = toolkit;
|
this.toolkit = toolkit;
|
||||||
|
this.KEEP_ALL_PG_RECORDS = keep_records;
|
||||||
writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
|
writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
|
||||||
setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5);
|
setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5, pRecord);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
|
||||||
|
boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
|
||||||
|
this(toolkit, in2out, order, presorted, indexOnTheFly, generateMD5, null,false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
|
||||||
|
boolean presorted, boolean indexOnTheFly , boolean generateMD5) {
|
||||||
|
this(toolkit, ext, order, presorted, indexOnTheFly, generateMD5, null,false);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
|
* Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
|
||||||
|
|
@ -73,7 +85,8 @@ public class NWaySAMFileWriter implements SAMFileWriter {
|
||||||
* @param toolkit
|
* @param toolkit
|
||||||
* @param in2out
|
* @param in2out
|
||||||
*/
|
*/
|
||||||
public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
|
public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
|
||||||
|
boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
|
||||||
if ( in2out==null ) throw new StingException("input-output bam filename map for n-way-out writing is NULL");
|
if ( in2out==null ) throw new StingException("input-output bam filename map for n-way-out writing is NULL");
|
||||||
for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
|
for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
|
||||||
|
|
||||||
|
|
@ -85,9 +98,10 @@ public class NWaySAMFileWriter implements SAMFileWriter {
|
||||||
outName = in2out.get(fName);
|
outName = in2out.get(fName);
|
||||||
|
|
||||||
if ( writerMap.containsKey( rid ) )
|
if ( writerMap.containsKey( rid ) )
|
||||||
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
|
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered; "+
|
||||||
|
"map file likely contains multiple entries for this input file");
|
||||||
|
|
||||||
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5);
|
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -100,7 +114,8 @@ public class NWaySAMFileWriter implements SAMFileWriter {
|
||||||
* @param toolkit
|
* @param toolkit
|
||||||
* @param ext
|
* @param ext
|
||||||
*/
|
*/
|
||||||
public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
|
public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
|
||||||
|
boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
|
||||||
for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
|
for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
|
||||||
|
|
||||||
String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
|
String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
|
||||||
|
|
@ -117,16 +132,29 @@ public class NWaySAMFileWriter implements SAMFileWriter {
|
||||||
|
|
||||||
if ( writerMap.containsKey( rid ) )
|
if ( writerMap.containsKey( rid ) )
|
||||||
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
|
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
|
||||||
|
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
|
||||||
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
|
private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted,
|
||||||
|
boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord programRecord) {
|
||||||
File f = new File(outName);
|
File f = new File(outName);
|
||||||
SAMFileHeader header = toolkit.getSAMFileHeader(id).clone();
|
SAMFileHeader header = toolkit.getSAMFileHeader(id).clone();
|
||||||
header.setSortOrder(order);
|
header.setSortOrder(order);
|
||||||
|
|
||||||
|
if ( programRecord != null ) {
|
||||||
|
// --->> add program record
|
||||||
|
List<SAMProgramRecord> oldRecords = header.getProgramRecords();
|
||||||
|
List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
|
||||||
|
for ( SAMProgramRecord record : oldRecords ) {
|
||||||
|
if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
|
||||||
|
newRecords.add(record);
|
||||||
|
}
|
||||||
|
newRecords.add(programRecord);
|
||||||
|
header.setProgramRecords(newRecords);
|
||||||
|
// <-- add program record ends here
|
||||||
|
}
|
||||||
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||||
factory.setCreateIndex(indexOnTheFly);
|
factory.setCreateIndex(indexOnTheFly);
|
||||||
factory.setCreateMd5File(generateMD5);
|
factory.setCreateMd5File(generateMD5);
|
||||||
|
|
@ -134,7 +162,10 @@ public class NWaySAMFileWriter implements SAMFileWriter {
|
||||||
writerMap.put(id,sw);
|
writerMap.put(id,sw);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Collection<SAMFileWriter> getWriters() {
|
||||||
|
return writerMap.values();
|
||||||
|
}
|
||||||
|
|
||||||
public void addAlignment(SAMRecord samRecord) {
|
public void addAlignment(SAMRecord samRecord) {
|
||||||
final SAMReaderID id = toolkit.getReaderIDForRead(samRecord);
|
final SAMReaderID id = toolkit.getReaderIDForRead(samRecord);
|
||||||
String rg = samRecord.getStringAttribute("RG");
|
String rg = samRecord.getStringAttribute("RG");
|
||||||
|
|
@ -146,7 +177,7 @@ public class NWaySAMFileWriter implements SAMFileWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
public SAMFileHeader getFileHeader() {
|
public SAMFileHeader getFileHeader() {
|
||||||
return toolkit.getSAMFileHeader();
|
return toolkit.getSAMFileHeader();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() {
|
public void close() {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue