Breaking the utility to write @PG tags for SAMFileWriters and StingSAMFileWriters

This commit is contained in:
Mauricio Carneiro 2012-11-13 15:21:57 -05:00
parent 95a4ba57bf
commit cab8ba7c75
3 changed files with 59 additions and 24 deletions

View File

@ -256,7 +256,6 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
// todo -- rework the whole NO_PG_TAG thing // todo -- rework the whole NO_PG_TAG thing
final boolean preSorted = true; final boolean preSorted = true;
final boolean indexOnTheFly = true; final boolean indexOnTheFly = true;
final boolean generateMD5 = true;
final boolean keep_records = true; final boolean keep_records = true;
final SAMFileHeader.SortOrder sortOrder = SAMFileHeader.SortOrder.coordinate; final SAMFileHeader.SortOrder sortOrder = SAMFileHeader.SortOrder.coordinate;
if (nwayout) { if (nwayout) {
@ -267,7 +266,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
writerToUse = out; writerToUse = out;
out.setPresorted(false); out.setPresorted(false);
if (!NO_PG_TAG) { if (!NO_PG_TAG) {
Utils.setupWriter(out, toolkit, !preSorted, keep_records, this, PROGRAM_RECORD_NAME); Utils.setupWriter(out, toolkit, toolkit.getSAMFileHeader(), !preSorted, keep_records, this, PROGRAM_RECORD_NAME);
} }
} }
} }

View File

@ -687,23 +687,69 @@ public class Utils {
array[i] = value; array[i] = value;
} }
public static void setupWriter(StingSAMFileWriter writer, GenomeAnalysisEngine toolkit, boolean preSorted, boolean KEEP_ALL_PG_RECORDS, Object walker, String PROGRAM_RECORD_NAME) { /**
final SAMProgramRecord programRecord = createProgramRecord(toolkit, walker, PROGRAM_RECORD_NAME); * Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and sets
* up the writer with the header and presorted status.
SAMFileHeader header = toolkit.getSAMFileHeader(); *
* @param toolkit the engine
* @param originalHeader original header
* @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
* @param programRecord the program record for this program
*/
public static SAMFileHeader setupWriter(GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean KEEP_ALL_PG_RECORDS, SAMProgramRecord programRecord) {
SAMFileHeader header = originalHeader.clone();
List<SAMProgramRecord> oldRecords = header.getProgramRecords(); List<SAMProgramRecord> oldRecords = header.getProgramRecords();
List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1); List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
for ( SAMProgramRecord record : oldRecords ) for ( SAMProgramRecord record : oldRecords )
if ( !record.getId().startsWith(PROGRAM_RECORD_NAME) || KEEP_ALL_PG_RECORDS ) if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
newRecords.add(record); newRecords.add(record);
newRecords.add(programRecord); newRecords.add(programRecord);
header.setProgramRecords(newRecords); header.setProgramRecords(newRecords);
return header;
}
/**
* Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and returns
* the new header to be added to the BAM writer.
*
* @param toolkit the engine
* @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
* @param walker the walker object (so we can extract the command line)
* @param PROGRAM_RECORD_NAME the name for the PG tag
* @return a pre-filled header for the bam writer
*/
public static SAMFileHeader setupWriter(GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean KEEP_ALL_PG_RECORDS, Object walker, String PROGRAM_RECORD_NAME) {
final SAMProgramRecord programRecord = createProgramRecord(toolkit, walker, PROGRAM_RECORD_NAME);
return setupWriter(toolkit, originalHeader, KEEP_ALL_PG_RECORDS, programRecord);
}
/**
* Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and sets
* up the writer with the header and presorted status.
*
* @param writer BAM file writer
* @param toolkit the engine
* @param preSorted whether or not the writer can assume reads are going to be added are already sorted
* @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
* @param walker the walker object (so we can extract the command line)
* @param PROGRAM_RECORD_NAME the name for the PG tag
*/
public static void setupWriter(StingSAMFileWriter writer, GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean preSorted, boolean KEEP_ALL_PG_RECORDS, Object walker, String PROGRAM_RECORD_NAME) {
SAMFileHeader header = setupWriter(toolkit, originalHeader, KEEP_ALL_PG_RECORDS, walker, PROGRAM_RECORD_NAME);
writer.writeHeader(header); writer.writeHeader(header);
writer.setPresorted(preSorted); writer.setPresorted(preSorted);
} }
/**
* Creates a program record (@PG) tag
*
* @param toolkit the engine
* @param walker the walker object (so we can extract the command line)
* @param PROGRAM_RECORD_NAME the name for the PG tag
* @return a program record for the tool
*/
public static SAMProgramRecord createProgramRecord(GenomeAnalysisEngine toolkit, Object walker, String PROGRAM_RECORD_NAME) { public static SAMProgramRecord createProgramRecord(GenomeAnalysisEngine toolkit, Object walker, String PROGRAM_RECORD_NAME) {
final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME); final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText"); final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
@ -858,4 +904,5 @@ public class Utils {
} }
return subLists; return subLists;
} }
} }

View File

@ -28,11 +28,14 @@ package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.*; import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File; import java.io.File;
import java.util.*; import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -138,21 +141,7 @@ public class NWaySAMFileWriter implements SAMFileWriter {
private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted, private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted,
boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord programRecord) { boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord programRecord) {
File f = new File(outName); File f = new File(outName);
SAMFileHeader header = toolkit.getSAMFileHeader(id).clone(); SAMFileHeader header = Utils.setupWriter(toolkit, toolkit.getSAMFileHeader(id), KEEP_ALL_PG_RECORDS, programRecord);
header.setSortOrder(order);
if ( programRecord != null ) {
// --->> add program record
List<SAMProgramRecord> oldRecords = header.getProgramRecords();
List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
for ( SAMProgramRecord record : oldRecords ) {
if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
newRecords.add(record);
}
newRecords.add(programRecord);
header.setProgramRecords(newRecords);
// <-- add program record ends here
}
SAMFileWriterFactory factory = new SAMFileWriterFactory(); SAMFileWriterFactory factory = new SAMFileWriterFactory();
factory.setCreateIndex(indexOnTheFly); factory.setCreateIndex(indexOnTheFly);
factory.setCreateMd5File(generateMD5); factory.setCreateMd5File(generateMD5);