BAQ calculation refactoring in the GATK. Single -baq argument can be NONE, CALCULATE_AS_NECESSARY, and RECALCULATE. Walkers can control bia the @BAQMode annotation how the BAQ calculation is applied. Can either be as a tag, by overwriting the qualities scores, or by only returning the baq-capped qualities scores. Additionally, walkers can be set up to have the BAQ applied to the incoming reads (ON_INPUT, the default), to output reads (ON_OUTPUT), or HANDLED_BY_WALKER, which means that calling into the BAQ system is the responsibility of the individual walker.

SAMFileWriterStub now supports BAQ writing as an internal feature.  Several walkers have the @BAQMode applied to this, with parameters that I think are reasonable.  Please look if you own these walkers, though

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4798 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-12-06 20:55:52 +00:00
parent 02de9a9764
commit c91712bd59
19 changed files with 162 additions and 49 deletions

View File

@ -50,6 +50,7 @@ import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
@ -544,6 +545,9 @@ public abstract class AbstractGenomeAnalysisEngine {
private SAMDataSource createReadsDataSource(GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) {
DownsamplingMethod method = getDownsamplingMethod();
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.NONE )
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
return new SAMDataSource(
unpackBAMFileList(argCollection.samFiles),
genomeLocParser,
@ -555,9 +559,18 @@ public abstract class AbstractGenomeAnalysisEngine {
filters,
includeReadsWithDeletionAtLoci(),
generateExtendedEvents(),
argCollection.BAQMode, refReader);
getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.NONE,
getWalkerBAQQualityMode(),
refReader);
}
/**
* Overloaded to break abstraction. Thanks Khalid :-(
* @return
*/
public BAQ.QualityMode getWalkerBAQQualityMode() { return BAQ.QualityMode.DONT_MODIFY; }
public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return BAQ.ApplicationTime.ON_INPUT; }
/**
* Opens a reference sequence file paired with an index.
*

View File

@ -47,6 +47,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -181,6 +182,9 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
return method;
}
public BAQ.QualityMode getWalkerBAQQualityMode() { return WalkerManager.getBAQQualityMode(walker); }
public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); }
@Override
protected boolean generateExtendedEvents() {
return walker.generateExtendedEvents();

View File

@ -38,7 +38,8 @@ public class ReadProperties {
private boolean includeReadsWithDeletionAtLoci = false;
private boolean useOriginalBaseQualities = false;
private boolean generateExtendedEvents = false;
private BAQ.Mode mode = BAQ.Mode.NONE;
private BAQ.CalculationMode cmode = BAQ.CalculationMode.NONE;
private BAQ.QualityMode qmode = BAQ.QualityMode.DONT_MODIFY;
IndexedFastaSequenceFile refReader = null; // read for BAQ, if desired
// do we want to generate additional piles of "extended" events (indels)
@ -127,9 +128,8 @@ public class ReadProperties {
}
public BAQ.Mode getBAQMode() {
return mode;
}
public BAQ.QualityMode getBAQQualityMode() { return qmode; }
public BAQ.CalculationMode getBAQCalculationMode() { return cmode; }
public IndexedFastaSequenceFile getRefReader() {
return refReader;
@ -153,7 +153,8 @@ public class ReadProperties {
* @param includeReadsWithDeletionAtLoci if 'true', the base pileups sent to the walker's map() method
* will explicitly list reads with deletion over the current reference base; otherwise, only observed
* bases will be seen in the pileups, and the deletions will be skipped silently.
* @param mode How should we apply the BAQ calculation to the reads?
* @param cmode How should we apply the BAQ calculation to the reads?
* @param qmode How should we apply the BAQ calculation to the reads?
* @param refReader if applyBAQ is true, must be a valid pointer to a indexed fasta file reads so we can get the ref bases for BAQ calculation
*/
public ReadProperties( List<SAMReaderID> samFiles,
@ -166,7 +167,8 @@ public class ReadProperties {
Collection<SamRecordFilter> supplementalFilters,
boolean includeReadsWithDeletionAtLoci,
boolean generateExtendedEvents,
BAQ.Mode mode,
BAQ.CalculationMode cmode,
BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader ) {
this.readers = samFiles;
this.header = header;
@ -178,7 +180,8 @@ public class ReadProperties {
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
this.generateExtendedEvents = generateExtendedEvents;
this.useOriginalBaseQualities = useOriginalBaseQualities;
this.mode = mode;
this.cmode = cmode;
this.qmode = qmode;
this.refReader = refReader;
}
}

View File

@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import org.broadinstitute.sting.utils.help.DescriptionTaglet;
import org.broadinstitute.sting.utils.help.DisplayNameTaglet;
import org.broadinstitute.sting.utils.help.SummaryTaglet;
import org.broadinstitute.sting.utils.baq.BAQ;
import java.util.*;
@ -359,6 +360,14 @@ public class WalkerManager extends PluginManager<Walker> {
return downsamplingMethod;
}
public static BAQ.QualityMode getBAQQualityMode(Walker walker) {
return walker.getClass().getAnnotation(BAQMode.class).QualityMode();
}
public static BAQ.ApplicationTime getBAQApplicationTime(Walker walker) {
return walker.getClass().getAnnotation(BAQMode.class).ApplicationTime();
}
/**
* Gets the type of downsampling method requested by the walker. If an alternative
* downsampling method is specified on the command-line, the command-line version will

View File

@ -152,7 +152,7 @@ public class GATKArgumentCollection {
@Element(required = false)
@Argument(fullName = "baq", shortName="baq", doc="Type of BAQ calculation to apply in the engine", required = false)
public BAQ.Mode BAQMode = BAQ.Mode.NONE;
public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.NONE;
/**
* Gets the default downsampling method, returned if the user didn't specify any downsampling

View File

@ -168,7 +168,7 @@ public class SAMDataSource implements SimpleDataSource {
supplementalFilters,
includeReadsWithDeletionAtLoci,
generateExtendedEvents,
BAQ.Mode.NONE, null // no BAQ
BAQ.CalculationMode.NONE, BAQ.QualityMode.DONT_MODIFY, null // no BAQ
);
}
@ -199,7 +199,8 @@ public class SAMDataSource implements SimpleDataSource {
Collection<SamRecordFilter> supplementalFilters,
boolean includeReadsWithDeletionAtLoci,
boolean generateExtendedEvents,
BAQ.Mode Mode,
BAQ.CalculationMode cmode,
BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader
) {
this.readMetrics = new ReadMetrics();
@ -247,7 +248,7 @@ public class SAMDataSource implements SimpleDataSource {
supplementalFilters,
includeReadsWithDeletionAtLoci,
generateExtendedEvents,
Mode, refReader );
cmode, qmode, refReader );
// cache the read group id (original) -> read group id (merged)
// and read group id (merged) -> read group id (original) mappings.
@ -517,7 +518,7 @@ public class SAMDataSource implements SimpleDataSource {
readProperties.getDownsamplingMethod().toFraction,
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
readProperties.getSupplementalFilters(),
readProperties.getBAQMode(), readProperties.getRefReader());
readProperties.getBAQCalculationMode(), readProperties.getBAQQualityMode(), readProperties.getRefReader());
}
/**
@ -541,7 +542,7 @@ public class SAMDataSource implements SimpleDataSource {
readProperties.getDownsamplingMethod().toFraction,
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
readProperties.getSupplementalFilters(),
readProperties.getBAQMode(), readProperties.getRefReader());
readProperties.getBAQCalculationMode(), readProperties.getBAQQualityMode(), readProperties.getRefReader());
}
/**
@ -575,7 +576,7 @@ public class SAMDataSource implements SimpleDataSource {
Double downsamplingFraction,
Boolean noValidationOfReadOrder,
Collection<SamRecordFilter> supplementalFilters,
BAQ.Mode mode, IndexedFastaSequenceFile refReader ) {
BAQ.CalculationMode cmode, BAQ.QualityMode qmode, IndexedFastaSequenceFile refReader ) {
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities);
// NOTE: this (and other filtering) should be done before on-the-fly sorting
@ -588,8 +589,8 @@ public class SAMDataSource implements SimpleDataSource {
if (!noValidationOfReadOrder && enableVerification)
wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator);
if (mode != BAQ.Mode.NONE)
wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, mode);
if (cmode != BAQ.CalculationMode.NONE)
wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode);
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));

View File

@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.baq.BAQ;
/**
* A stub for routing and management of SAM file reading and writing.
@ -100,6 +101,11 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
*/
private boolean writeStarted = false;
/**
* HMM for BAQ, if needed
*/
BAQ baqHMM = new BAQ();
/**
* Create a new stub given the requested SAM file and compression level.
* @param engine source of header data, maybe other data about input files.
@ -236,6 +242,11 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
* @{inheritDoc}
*/
public void addAlignment( SAMRecord alignment ) {
if ( engine.getArguments().BAQMode != BAQ.CalculationMode.NONE && engine.getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_OUTPUT ) {
System.out.printf("Writing BAQ at OUTPUT TIME%n");
baqHMM.baqRead(alignment, engine.getReferenceDataSource().getReference(), engine.getArguments().BAQMode, engine.getWalkerBAQQualityMode());
}
writeStarted = true;
outputTracker.getStorage(this).addAlignment(alignment);
}

View File

@ -0,0 +1,34 @@
package org.broadinstitute.sting.gatk.walkers;
import java.lang.annotation.Documented;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import java.lang.annotation.ElementType;
/**
* User: hanna
* Date: May 14, 2009
* Time: 1:51:22 PM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
*/
/**
* Allows the walker to indicate what type of data it wants to consume.
*/
@Documented
@Inherited
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface BAQMode {
public abstract org.broadinstitute.sting.utils.baq.BAQ.QualityMode QualityMode() default org.broadinstitute.sting.utils.baq.BAQ.QualityMode.OVERWRITE_QUALS;
public abstract org.broadinstitute.sting.utils.baq.BAQ.ApplicationTime ApplicationTime() default org.broadinstitute.sting.utils.baq.BAQ.ApplicationTime.ON_INPUT;
}

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.utils.baq.BAQ;
import java.io.PrintStream;

View File

@ -30,6 +30,7 @@ import java.util.List;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.collections.Pair;
import org.apache.log4j.Logger;
@ -41,6 +42,7 @@ import org.apache.log4j.Logger;
* To change this template use File | Settings | File Templates.
*/
@ReadFilters(MalformedReadFilter.class)
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
public abstract class Walker<MapType, ReduceType> {
final protected static Logger logger = Logger.getLogger(Walker.class);
private GenomeAnalysisEngine toolkit;

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
@ -45,6 +46,9 @@ import java.io.PrintStream;
* A variant caller which unifies the approaches of several disparate callers. Works for single-sample and
* multi-sample data. The user can choose from several different incorporated calculation models.
*/
// todo -- change when UG is generalized to do BAQ as necessary
//@BAQMode(QualityMode = BAQ.QualityMode.DONT_MODIFY, ApplicationTime = BAQ.ApplicationTime.HANDLED_IN_WALKER)
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
@Reference(window=@Window(start=-200,stop=200))
@By(DataSource.REFERENCE)
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)

View File

@ -41,10 +41,12 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.BAQMode;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator;
import org.broadinstitute.sting.utils.interval.NwayIntervalMergingIterator;
@ -65,6 +67,7 @@ import java.util.*;
* appropriate alternate reference (i.e. indel) exists and updates SAMRecords accordingly.
*/
//Reference(window=@Window(start=-30,stop=30))
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
public class IndelRealigner extends ReadWalker<Integer, Integer> {
public static final String ORIGINAL_CIGAR_TAG = "OC";

View File

@ -20,6 +20,7 @@ import java.io.PrintStream;
* Can also count the number of reads matching a given criterion using read filters (see the
* --read-filter command line argument). Simplest example of a read-backed analysis.
*/
@BAQMode(QualityMode = BAQ.QualityMode.DONT_MODIFY, ApplicationTime = BAQ.ApplicationTime.HANDLED_IN_WALKER)
@Reference(window=@Window(start=-5,stop=5))
@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})
public class ValidateBAQWalker extends ReadWalker<Integer, Integer> {

View File

@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.NestedHashMap;
import org.broadinstitute.sting.commandline.Argument;
@ -70,6 +71,7 @@ import java.util.Map;
* @help.summary First pass of the recalibration. Generates recalibration table based on various user-specified covariates (such as reported quality score, cycle, and dinucleotide).
*/
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta

View File

@ -37,10 +37,7 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrde
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.NestedHashMap;
@ -49,6 +46,7 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -68,6 +66,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
* @help.summary Second pass of the recalibration. Uses the table generated by CountCovariates to update the base quality scores of the input bam file using a sequential table calculation making the base quality scores more accurately reflect the actual quality of the bases as mesaured by reference mismatch rate.
*/
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
@WalkerName("TableRecalibration")
@Requires({ DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES }) // This walker requires -I input.bam, it also requires -R reference.fasta
public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWriter> {

View File

@ -36,12 +36,24 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
public class BAQ {
private final static boolean DEBUG = false;
public enum Mode {
public enum CalculationMode {
NONE, // don't apply a BAQ at all, the default
USE_TAG_ONLY, // only use the TAG, if present, in the reads
ADD_TAG_ONLY, // calculate the BAQ, but write it into the reads as the BAQ tag, leaving QUAL field alone
CALCULATE_AS_NECESSARY, // do HMM BAQ calculation on the fly, as necessary, if there's no tag
RECALCULATE_ALWAYS // do HMM BAQ calculation on the fly, regardless of whether there's a tag present
RECALCULATE // do HMM BAQ calculation on the fly, regardless of whether there's a tag present
}
/** these are features that only the walker can override */
public enum QualityMode {
ADD_TAG, // calculate the BAQ, but write it into the reads as the BAQ tag, leaving QUAL field alone
OVERWRITE_QUALS, // overwrite the quality field directly
DONT_MODIFY // do the BAQ, but don't modify the quality scores themselves, just return them in the function.
}
public enum ApplicationTime {
FORBIDDEN, // Walker does not tolerate BAQ input
ON_INPUT, // apply the BAQ calculation to the incoming reads, the default
ON_OUTPUT, // apply the BAQ calculation to outgoing read streams
HANDLED_IN_WALKER // the walker will deal with the BAQ calculation status itself
}
public static final String BAQ_TAG = "BQ";
@ -487,35 +499,36 @@ public class BAQ {
* @param read
* @param refReader
* @param calculationType
* @return
* @return BQ qualities for use, in case qmode is DONT_MODIFY
*/
public SAMRecord baqRead(SAMRecord read, IndexedFastaSequenceFile refReader, Mode calculationType ) {
public byte[] baqRead(SAMRecord read, IndexedFastaSequenceFile refReader, CalculationMode calculationType, QualityMode qmode ) {
if ( DEBUG ) System.out.printf("BAQ %s read %s%n", calculationType, read.getReadName());
if ( calculationType == Mode.NONE ) { // we don't want to do anything
byte[] BAQQuals = read.getBaseQualities(); // in general we are overwriting quals, so just get a pointer to them
if ( calculationType == CalculationMode.NONE ) { // we don't want to do anything
; // just fall though
} else if ( excludeReadFromBAQ(read) ) {
; // just fall through
} else if ( calculationType == Mode.USE_TAG_ONLY ) {
calcBAQFromTag(read, true, true);
} else {
if ( calculationType == Mode.RECALCULATE_ALWAYS || ! hasBAQTag(read) || calculationType == Mode.ADD_TAG_ONLY ) {
if ( calculationType == CalculationMode.RECALCULATE || ! hasBAQTag(read) ) {
if ( DEBUG ) System.out.printf(" Calculating BAQ on the fly%n");
BAQCalculationResult hmmResult = calcBAQFromHMM(read, refReader);
if ( hmmResult != null ) {
if ( calculationType == Mode.ADD_TAG_ONLY )
addBAQTag(read, hmmResult.bq);
else
// modify QUALs directly
System.arraycopy(hmmResult.bq, 0, read.getBaseQualities(), 0, hmmResult.bq.length);
switch ( qmode ) {
case ADD_TAG: addBAQTag(read, hmmResult.bq); break;
case OVERWRITE_QUALS: System.arraycopy(hmmResult.bq, 0, read.getBaseQualities(), 0, hmmResult.bq.length); break;
case DONT_MODIFY: BAQQuals = hmmResult.bq; break;
default: throw new ReviewedStingException("BUG: unexpected qmode " + qmode);
}
}
} else {
} else if ( qmode == QualityMode.OVERWRITE_QUALS ) { // only makes sense if we are overwriting quals
if ( DEBUG ) System.out.printf(" Taking BAQ from tag%n");
// this overwrites the original qualities
calcBAQFromTag(read, true, false);
}
}
return read;
return BAQQuals;
}
/**

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.baq;
import net.sf.samtools.SAMRecord;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import java.util.Iterator;
@ -14,7 +15,8 @@ public class BAQSamIterator implements StingSAMIterator {
StingSAMIterator it;
BAQ baqHMM = new BAQ(); // creates a BAQ creator with default parameters
IndexedFastaSequenceFile refReader = null;
BAQ.Mode mode;
BAQ.CalculationMode cmode;
BAQ.QualityMode qmode;
/**
* Creates a new BAMSamIterator using the reference getter refReader and applies the BAM to the reads coming
@ -22,17 +24,28 @@ public class BAQSamIterator implements StingSAMIterator {
*
* @param refReader
* @param it
* @param mode
* @param cmode
* @param qmode
*/
public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.Mode mode) {
this.refReader =refReader;
public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.CalculationMode cmode, BAQ.QualityMode qmode) {
if ( cmode == BAQ.CalculationMode.NONE ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode NONE");
if ( qmode == BAQ.QualityMode.DONT_MODIFY ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with quailty mode DONT_MODIFY");
this.refReader = refReader;
this.it = it;
this.mode = mode;
this.cmode = cmode;
this.qmode = qmode;
}
public SAMRecord next() { return baqHMM.baqRead(it.next(), refReader, mode); }
public boolean hasNext() { return this.it.hasNext(); }
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
public void close() { it.close(); }
public SAMRecord next() {
System.out.printf("BAQing during input%n");
SAMRecord read = it.next();
baqHMM.baqRead(read, refReader, cmode, qmode);
return read;
}
public boolean hasNext() { return this.it.hasNext(); }
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
public void close() { it.close(); }
public Iterator<SAMRecord> iterator() { return this; }
}

View File

@ -124,6 +124,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
cacheMisses++;
result = super.getSubsequenceAt(contig, start, stop);
} else {
// todo -- potential optimization is to check if contig.name == contig, as this in generally will be true
SAMSequenceRecord contigInfo = super.getSequenceDictionary().getSequence(contig);
if (stop > contigInfo.getSequenceLength())
@ -132,7 +133,6 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
synchronized (lock) { // access to shared cache information must be protected
if ( start < cacheStart || stop > cacheStop || cache == null || cache.getContigIndex() != contigInfo.getSequenceIndex() ) {
cacheMisses++;
cacheStart = Math.max(start - cacheMissBackup, 0);
cacheStop = Math.min(cacheStart + cacheSize, contigInfo.getSequenceLength());
cache = super.getSubsequenceAt(contig, cacheStart, cacheStop);

View File

@ -148,7 +148,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
new ArrayList<SamRecordFilter>(),
false,
false,
BAQ.Mode.NONE, null // no BAQ
BAQ.CalculationMode.NONE, BAQ.QualityMode.DONT_MODIFY, null // no BAQ
);
}
}