Enhance the I/O management system to support custom headers and set the presorted flag

from the initialize() method (or at any time before the first SAM record is written).


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2560 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-01-12 18:21:42 +00:00
parent 3c5f5177b1
commit 58999a8e9d
5 changed files with 79 additions and 33 deletions

View File

@ -0,0 +1,25 @@
package org.broadinstitute.sting.gatk.io;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMFileHeader;
/**
* A writer that will allow unsorted BAM files to be written
* and sorted on-the-fly.
*
* @author mhanna
* @version 0.1
*/
public interface StingSAMFileWriter extends SAMFileWriter {
/**
* Writes the given custom header to SAM file output.
* @param header The header to write.
*/
public void writeHeader(SAMFileHeader header);
/**
* Set Whether the BAM file to create is actually presorted.
* @param presorted True if the BAM file is presorted. False otherwise.
*/
public void setPresorted(boolean presorted);
}

View File

@ -49,19 +49,19 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
public SAMFileWriterStorage( SAMFileWriterStub stub, File file ) {
this.file = file;
if( stub.getCompressionLevel() != null )
this.writer = new SAMFileWriterFactory().makeBAMWriter( stub.getSAMFileHeader(), true, file, stub.getCompressionLevel() );
this.writer = new SAMFileWriterFactory().makeBAMWriter( stub.getFileHeader(), stub.isPresorted(), file, stub.getCompressionLevel() );
else
this.writer = new SAMFileWriterFactory().makeBAMWriter( stub.getSAMFileHeader(), true, file );
}
public void addAlignment( SAMRecord read ) {
writer.addAlignment(read);
this.writer = new SAMFileWriterFactory().makeBAMWriter( stub.getFileHeader(), stub.isPresorted(), file );
}
public SAMFileHeader getFileHeader() {
return writer.getFileHeader();
}
public void addAlignment( SAMRecord read ) {
writer.addAlignment(read);
}
public void close() {
writer.close();
}

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.utils.cmdLine.*;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import net.sf.samtools.SAMFileWriter;
import java.util.List;
@ -60,7 +61,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
@Override
public boolean supports( Class type ) {
return SAMFileWriter.class.isAssignableFrom(type);
return SAMFileWriter.class.equals(type) || StingSAMFileWriter.class.equals(type);
}
@Override

View File

@ -32,6 +32,7 @@ import net.sf.samtools.SAMFileHeader;
import java.io.File;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
/**
@ -40,12 +41,17 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
* @author mhanna
* @version 0.1
*/
public class SAMFileWriterStub implements Stub<SAMFileWriter>, SAMFileWriter {
public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWriter {
/**
* Engine to use for collecting attributes for the output SAM file.
*/
private final GenomeAnalysisEngine engine;
/**
* A header supplied by the user that overrides the merged header from the input BAM.
*/
private SAMFileHeader headerOverride = null;
/**
* The sam file that this stub should write to. Should be passed along to
* whatever happens to create the StreamConnector.
@ -57,6 +63,11 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, SAMFileWriter {
*/
private Integer compressionLevel = null;
/**
* Should this BAM be presorted?
*/
private boolean presorted = true;
/**
* Connects this stub with an external stream capable of serving the
* requests of the consumer of this stub.
@ -66,7 +77,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, SAMFileWriter {
/**
* Create a new stub given the requested SAM file and compression level.
* @param engine source of header data, maybe other data about input files.
* @param samFile SAM file to (ultimately) cerate.
* @param samFile SAM file to (ultimately) create.
*/
public SAMFileWriterStub( GenomeAnalysisEngine engine, File samFile ) {
this.engine = engine;
@ -85,12 +96,8 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, SAMFileWriter {
* Retrieves the header to use when creating the new SAM file.
* @return header to use when creating the new SAM file.
*/
public SAMFileHeader getSAMFileHeader() {
return engine.getSAMFileHeader();
}
public SAMFileHeader getFileHeader() {
return getSAMFileHeader();
return headerOverride != null ? headerOverride : engine.getSAMFileHeader();
}
/**
@ -109,6 +116,22 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, SAMFileWriter {
this.compressionLevel = compressionLevel;
}
/**
* Whether the BAM file to create is actually presorted.
* @return True if the BAM file is presorted. False otherwise.
*/
public boolean isPresorted() {
return this.presorted;
}
/**
* Set Whether the BAM file to create is actually presorted.
* @param presorted True if the BAM file is presorted. False otherwise.
*/
public void setPresorted(boolean presorted) {
this.presorted = presorted;
}
/**
* Registers the given streamConnector with this stub.
* @param outputTracker The connector used to provide an appropriate stream.
@ -117,6 +140,14 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, SAMFileWriter {
this.outputTracker = outputTracker;
}
/**
* Use the given header as the target for this writer.
* @param header The header to write.
*/
public void writeHeader(SAMFileHeader header) {
this.headerOverride = header;
}
/**
* @{inheritDoc}
*/

View File

@ -8,6 +8,7 @@ import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import java.util.*;
import java.util.regex.Pattern;
@ -26,7 +27,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
* an optional argument to dump the reads out to a BAM file
*/
@Argument(fullName = "outputBam", shortName = "ob", doc = "Write output to this BAM filename instead of STDOUT", required = false)
String outputBamFile = null;
StingSAMFileWriter outputBam = null;
@Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc = "", required = false)
int qTrimmingThreshold = -1;
@ -117,6 +118,8 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
}
}
}
outputBam.setPresorted(clippingRepresentation != ClippingRepresentation.SOFTCLIP_BASES);
}
/**
@ -277,23 +280,15 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
* @return
*/
public ClippingData reduceInit() {
SAMFileWriter outputBam = null;
if ( outputBamFile != null ) {
SAMFileHeader header = this.getToolkit().getSAMFileHeader();
boolean maintainsSort = clippingRepresentation != ClippingRepresentation.SOFTCLIP_BASES;
outputBam = Utils.createSAMFileWriterWithCompression(header, maintainsSort, outputBamFile, 5);
}
return new ClippingData(outputBam, sequencesToClip);
return new ClippingData(sequencesToClip);
}
public ClippingData reduce(ReadClipper clipper, ClippingData data) {
if ( clipper == null )
return data;
if (data.output != null) {
data.output.addAlignment(clipper.clipRead(clippingRepresentation));
if (outputBam != null) {
outputBam.addAlignment(clipper.clipRead(clippingRepresentation));
} else {
out.println(clipper.clipRead(clippingRepresentation).format());
}
@ -321,9 +316,6 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
}
public void onTraversalDone(ClippingData data) {
if (data.output != null)
data.output.close();
out.printf(data.toString());
}
@ -522,8 +514,6 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
}
public class ClippingData {
public SAMFileWriter output = null;
public long nTotalReads = 0;
public long nTotalBases = 0;
public long nClippedReads = 0;
@ -534,8 +524,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
HashMap<String, Long> seqClipCounts = new HashMap<String, Long>();
public ClippingData(SAMFileWriter output, List<SeqToClip> clipSeqs) {
this.output = output;
public ClippingData(List<SeqToClip> clipSeqs) {
for (SeqToClip clipSeq : clipSeqs) {
seqClipCounts.put(clipSeq.seq, 0L);
}