BAQ calculation refactoring in the GATK. Single -baq argument can be NONE, CALCULATE_AS_NECESSARY, and RECALCULATE. Walkers can control bia the @BAQMode annotation how the BAQ calculation is applied. Can either be as a tag, by overwriting the qualities scores, or by only returning the baq-capped qualities scores. Additionally, walkers can be set up to have the BAQ applied to the incoming reads (ON_INPUT, the default), to output reads (ON_OUTPUT), or HANDLED_BY_WALKER, which means that calling into the BAQ system is the responsibility of the individual walker.
SAMFileWriterStub now supports BAQ writing as an internal feature. Several walkers have the @BAQMode applied to this, with parameters that I think are reasonable. Please look if you own these walkers, though git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4798 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
02de9a9764
commit
c91712bd59
|
|
@ -50,6 +50,7 @@ import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
|||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
|
|
@ -544,6 +545,9 @@ public abstract class AbstractGenomeAnalysisEngine {
|
|||
private SAMDataSource createReadsDataSource(GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) {
|
||||
DownsamplingMethod method = getDownsamplingMethod();
|
||||
|
||||
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.NONE )
|
||||
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
|
||||
|
||||
return new SAMDataSource(
|
||||
unpackBAMFileList(argCollection.samFiles),
|
||||
genomeLocParser,
|
||||
|
|
@ -555,9 +559,18 @@ public abstract class AbstractGenomeAnalysisEngine {
|
|||
filters,
|
||||
includeReadsWithDeletionAtLoci(),
|
||||
generateExtendedEvents(),
|
||||
argCollection.BAQMode, refReader);
|
||||
getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.NONE,
|
||||
getWalkerBAQQualityMode(),
|
||||
refReader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Overloaded to break abstraction. Thanks Khalid :-(
|
||||
* @return
|
||||
*/
|
||||
public BAQ.QualityMode getWalkerBAQQualityMode() { return BAQ.QualityMode.DONT_MODIFY; }
|
||||
public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return BAQ.ApplicationTime.ON_INPUT; }
|
||||
|
||||
/**
|
||||
* Opens a reference sequence file paired with an index.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
|
|
@ -181,6 +182,9 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
|
|||
return method;
|
||||
}
|
||||
|
||||
public BAQ.QualityMode getWalkerBAQQualityMode() { return WalkerManager.getBAQQualityMode(walker); }
|
||||
public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); }
|
||||
|
||||
@Override
|
||||
protected boolean generateExtendedEvents() {
|
||||
return walker.generateExtendedEvents();
|
||||
|
|
|
|||
|
|
@ -38,7 +38,8 @@ public class ReadProperties {
|
|||
private boolean includeReadsWithDeletionAtLoci = false;
|
||||
private boolean useOriginalBaseQualities = false;
|
||||
private boolean generateExtendedEvents = false;
|
||||
private BAQ.Mode mode = BAQ.Mode.NONE;
|
||||
private BAQ.CalculationMode cmode = BAQ.CalculationMode.NONE;
|
||||
private BAQ.QualityMode qmode = BAQ.QualityMode.DONT_MODIFY;
|
||||
IndexedFastaSequenceFile refReader = null; // read for BAQ, if desired
|
||||
|
||||
// do we want to generate additional piles of "extended" events (indels)
|
||||
|
|
@ -127,9 +128,8 @@ public class ReadProperties {
|
|||
}
|
||||
|
||||
|
||||
public BAQ.Mode getBAQMode() {
|
||||
return mode;
|
||||
}
|
||||
public BAQ.QualityMode getBAQQualityMode() { return qmode; }
|
||||
public BAQ.CalculationMode getBAQCalculationMode() { return cmode; }
|
||||
|
||||
public IndexedFastaSequenceFile getRefReader() {
|
||||
return refReader;
|
||||
|
|
@ -153,7 +153,8 @@ public class ReadProperties {
|
|||
* @param includeReadsWithDeletionAtLoci if 'true', the base pileups sent to the walker's map() method
|
||||
* will explicitly list reads with deletion over the current reference base; otherwise, only observed
|
||||
* bases will be seen in the pileups, and the deletions will be skipped silently.
|
||||
* @param mode How should we apply the BAQ calculation to the reads?
|
||||
* @param cmode How should we apply the BAQ calculation to the reads?
|
||||
* @param qmode How should we apply the BAQ calculation to the reads?
|
||||
* @param refReader if applyBAQ is true, must be a valid pointer to a indexed fasta file reads so we can get the ref bases for BAQ calculation
|
||||
*/
|
||||
public ReadProperties( List<SAMReaderID> samFiles,
|
||||
|
|
@ -166,7 +167,8 @@ public class ReadProperties {
|
|||
Collection<SamRecordFilter> supplementalFilters,
|
||||
boolean includeReadsWithDeletionAtLoci,
|
||||
boolean generateExtendedEvents,
|
||||
BAQ.Mode mode,
|
||||
BAQ.CalculationMode cmode,
|
||||
BAQ.QualityMode qmode,
|
||||
IndexedFastaSequenceFile refReader ) {
|
||||
this.readers = samFiles;
|
||||
this.header = header;
|
||||
|
|
@ -178,7 +180,8 @@ public class ReadProperties {
|
|||
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
|
||||
this.generateExtendedEvents = generateExtendedEvents;
|
||||
this.useOriginalBaseQualities = useOriginalBaseQualities;
|
||||
this.mode = mode;
|
||||
this.cmode = cmode;
|
||||
this.qmode = qmode;
|
||||
this.refReader = refReader;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
|||
import org.broadinstitute.sting.utils.help.DescriptionTaglet;
|
||||
import org.broadinstitute.sting.utils.help.DisplayNameTaglet;
|
||||
import org.broadinstitute.sting.utils.help.SummaryTaglet;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -359,6 +360,14 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
return downsamplingMethod;
|
||||
}
|
||||
|
||||
public static BAQ.QualityMode getBAQQualityMode(Walker walker) {
|
||||
return walker.getClass().getAnnotation(BAQMode.class).QualityMode();
|
||||
}
|
||||
|
||||
public static BAQ.ApplicationTime getBAQApplicationTime(Walker walker) {
|
||||
return walker.getClass().getAnnotation(BAQMode.class).ApplicationTime();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the type of downsampling method requested by the walker. If an alternative
|
||||
* downsampling method is specified on the command-line, the command-line version will
|
||||
|
|
|
|||
|
|
@ -152,7 +152,7 @@ public class GATKArgumentCollection {
|
|||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "baq", shortName="baq", doc="Type of BAQ calculation to apply in the engine", required = false)
|
||||
public BAQ.Mode BAQMode = BAQ.Mode.NONE;
|
||||
public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.NONE;
|
||||
|
||||
/**
|
||||
* Gets the default downsampling method, returned if the user didn't specify any downsampling
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
supplementalFilters,
|
||||
includeReadsWithDeletionAtLoci,
|
||||
generateExtendedEvents,
|
||||
BAQ.Mode.NONE, null // no BAQ
|
||||
BAQ.CalculationMode.NONE, BAQ.QualityMode.DONT_MODIFY, null // no BAQ
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -199,7 +199,8 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
Collection<SamRecordFilter> supplementalFilters,
|
||||
boolean includeReadsWithDeletionAtLoci,
|
||||
boolean generateExtendedEvents,
|
||||
BAQ.Mode Mode,
|
||||
BAQ.CalculationMode cmode,
|
||||
BAQ.QualityMode qmode,
|
||||
IndexedFastaSequenceFile refReader
|
||||
) {
|
||||
this.readMetrics = new ReadMetrics();
|
||||
|
|
@ -247,7 +248,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
supplementalFilters,
|
||||
includeReadsWithDeletionAtLoci,
|
||||
generateExtendedEvents,
|
||||
Mode, refReader );
|
||||
cmode, qmode, refReader );
|
||||
|
||||
// cache the read group id (original) -> read group id (merged)
|
||||
// and read group id (merged) -> read group id (original) mappings.
|
||||
|
|
@ -517,7 +518,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
readProperties.getDownsamplingMethod().toFraction,
|
||||
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
readProperties.getSupplementalFilters(),
|
||||
readProperties.getBAQMode(), readProperties.getRefReader());
|
||||
readProperties.getBAQCalculationMode(), readProperties.getBAQQualityMode(), readProperties.getRefReader());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -541,7 +542,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
readProperties.getDownsamplingMethod().toFraction,
|
||||
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
readProperties.getSupplementalFilters(),
|
||||
readProperties.getBAQMode(), readProperties.getRefReader());
|
||||
readProperties.getBAQCalculationMode(), readProperties.getBAQQualityMode(), readProperties.getRefReader());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -575,7 +576,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
Double downsamplingFraction,
|
||||
Boolean noValidationOfReadOrder,
|
||||
Collection<SamRecordFilter> supplementalFilters,
|
||||
BAQ.Mode mode, IndexedFastaSequenceFile refReader ) {
|
||||
BAQ.CalculationMode cmode, BAQ.QualityMode qmode, IndexedFastaSequenceFile refReader ) {
|
||||
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities);
|
||||
|
||||
// NOTE: this (and other filtering) should be done before on-the-fly sorting
|
||||
|
|
@ -588,8 +589,8 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
if (!noValidationOfReadOrder && enableVerification)
|
||||
wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator);
|
||||
|
||||
if (mode != BAQ.Mode.NONE)
|
||||
wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, mode);
|
||||
if (cmode != BAQ.CalculationMode.NONE)
|
||||
wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode);
|
||||
|
||||
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
||||
/**
|
||||
* A stub for routing and management of SAM file reading and writing.
|
||||
|
|
@ -100,6 +101,11 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
|
|||
*/
|
||||
private boolean writeStarted = false;
|
||||
|
||||
/**
|
||||
* HMM for BAQ, if needed
|
||||
*/
|
||||
BAQ baqHMM = new BAQ();
|
||||
|
||||
/**
|
||||
* Create a new stub given the requested SAM file and compression level.
|
||||
* @param engine source of header data, maybe other data about input files.
|
||||
|
|
@ -236,6 +242,11 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
|
|||
* @{inheritDoc}
|
||||
*/
|
||||
public void addAlignment( SAMRecord alignment ) {
|
||||
if ( engine.getArguments().BAQMode != BAQ.CalculationMode.NONE && engine.getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_OUTPUT ) {
|
||||
System.out.printf("Writing BAQ at OUTPUT TIME%n");
|
||||
baqHMM.baqRead(alignment, engine.getReferenceDataSource().getReference(), engine.getArguments().BAQMode, engine.getWalkerBAQQualityMode());
|
||||
}
|
||||
|
||||
writeStarted = true;
|
||||
outputTracker.getStorage(this).addAlignment(alignment);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,34 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import java.lang.annotation.Documented;
|
||||
import java.lang.annotation.Inherited;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
import java.lang.annotation.ElementType;
|
||||
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
* Time: 1:51:22 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Allows the walker to indicate what type of data it wants to consume.
|
||||
*/
|
||||
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface BAQMode {
|
||||
public abstract org.broadinstitute.sting.utils.baq.BAQ.QualityMode QualityMode() default org.broadinstitute.sting.utils.baq.BAQ.QualityMode.OVERWRITE_QUALS;
|
||||
public abstract org.broadinstitute.sting.utils.baq.BAQ.ApplicationTime ApplicationTime() default org.broadinstitute.sting.utils.baq.BAQ.ApplicationTime.ON_INPUT;
|
||||
}
|
||||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import java.util.List;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
|
|
@ -41,6 +42,7 @@ import org.apache.log4j.Logger;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@ReadFilters(MalformedReadFilter.class)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
|
||||
public abstract class Walker<MapType, ReduceType> {
|
||||
final protected static Logger logger = Logger.getLogger(Walker.class);
|
||||
private GenomeAnalysisEngine toolkit;
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
|||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
|
||||
|
|
@ -45,6 +46,9 @@ import java.io.PrintStream;
|
|||
* A variant caller which unifies the approaches of several disparate callers. Works for single-sample and
|
||||
* multi-sample data. The user can choose from several different incorporated calculation models.
|
||||
*/
|
||||
// todo -- change when UG is generalized to do BAQ as necessary
|
||||
//@BAQMode(QualityMode = BAQ.QualityMode.DONT_MODIFY, ApplicationTime = BAQ.ApplicationTime.HANDLED_IN_WALKER)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
|
||||
@Reference(window=@Window(start=-200,stop=200))
|
||||
@By(DataSource.REFERENCE)
|
||||
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
|
||||
|
|
|
|||
|
|
@ -41,10 +41,12 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.BAQMode;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator;
|
||||
import org.broadinstitute.sting.utils.interval.NwayIntervalMergingIterator;
|
||||
|
|
@ -65,6 +67,7 @@ import java.util.*;
|
|||
* appropriate alternate reference (i.e. indel) exists and updates SAMRecords accordingly.
|
||||
*/
|
||||
//Reference(window=@Window(start=-30,stop=30))
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
|
||||
public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||
|
||||
public static final String ORIGINAL_CIGAR_TAG = "OC";
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import java.io.PrintStream;
|
|||
* Can also count the number of reads matching a given criterion using read filters (see the
|
||||
* --read-filter command line argument). Simplest example of a read-backed analysis.
|
||||
*/
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.DONT_MODIFY, ApplicationTime = BAQ.ApplicationTime.HANDLED_IN_WALKER)
|
||||
@Reference(window=@Window(start=-5,stop=5))
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||
public class ValidateBAQWalker extends ReadWalker<Integer, Integer> {
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
|
@ -70,6 +71,7 @@ import java.util.Map;
|
|||
* @help.summary First pass of the recalibration. Generates recalibration table based on various user-specified covariates (such as reported quality score, cycle, and dinucleotide).
|
||||
*/
|
||||
|
||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||
@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
|
||||
@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality
|
||||
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
|
||||
|
|
|
|||
|
|
@ -37,10 +37,7 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrde
|
|||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
|
|
@ -49,6 +46,7 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -68,6 +66,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
* @help.summary Second pass of the recalibration. Uses the table generated by CountCovariates to update the base quality scores of the input bam file using a sequential table calculation making the base quality scores more accurately reflect the actual quality of the bases as mesaured by reference mismatch rate.
|
||||
*/
|
||||
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
|
||||
@WalkerName("TableRecalibration")
|
||||
@Requires({ DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES }) // This walker requires -I input.bam, it also requires -R reference.fasta
|
||||
public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||
|
|
|
|||
|
|
@ -36,12 +36,24 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
public class BAQ {
|
||||
private final static boolean DEBUG = false;
|
||||
|
||||
public enum Mode {
|
||||
public enum CalculationMode {
|
||||
NONE, // don't apply a BAQ at all, the default
|
||||
USE_TAG_ONLY, // only use the TAG, if present, in the reads
|
||||
ADD_TAG_ONLY, // calculate the BAQ, but write it into the reads as the BAQ tag, leaving QUAL field alone
|
||||
CALCULATE_AS_NECESSARY, // do HMM BAQ calculation on the fly, as necessary, if there's no tag
|
||||
RECALCULATE_ALWAYS // do HMM BAQ calculation on the fly, regardless of whether there's a tag present
|
||||
RECALCULATE // do HMM BAQ calculation on the fly, regardless of whether there's a tag present
|
||||
}
|
||||
|
||||
/** these are features that only the walker can override */
|
||||
public enum QualityMode {
|
||||
ADD_TAG, // calculate the BAQ, but write it into the reads as the BAQ tag, leaving QUAL field alone
|
||||
OVERWRITE_QUALS, // overwrite the quality field directly
|
||||
DONT_MODIFY // do the BAQ, but don't modify the quality scores themselves, just return them in the function.
|
||||
}
|
||||
|
||||
public enum ApplicationTime {
|
||||
FORBIDDEN, // Walker does not tolerate BAQ input
|
||||
ON_INPUT, // apply the BAQ calculation to the incoming reads, the default
|
||||
ON_OUTPUT, // apply the BAQ calculation to outgoing read streams
|
||||
HANDLED_IN_WALKER // the walker will deal with the BAQ calculation status itself
|
||||
}
|
||||
|
||||
public static final String BAQ_TAG = "BQ";
|
||||
|
|
@ -487,35 +499,36 @@ public class BAQ {
|
|||
* @param read
|
||||
* @param refReader
|
||||
* @param calculationType
|
||||
* @return
|
||||
* @return BQ qualities for use, in case qmode is DONT_MODIFY
|
||||
*/
|
||||
public SAMRecord baqRead(SAMRecord read, IndexedFastaSequenceFile refReader, Mode calculationType ) {
|
||||
public byte[] baqRead(SAMRecord read, IndexedFastaSequenceFile refReader, CalculationMode calculationType, QualityMode qmode ) {
|
||||
if ( DEBUG ) System.out.printf("BAQ %s read %s%n", calculationType, read.getReadName());
|
||||
if ( calculationType == Mode.NONE ) { // we don't want to do anything
|
||||
|
||||
byte[] BAQQuals = read.getBaseQualities(); // in general we are overwriting quals, so just get a pointer to them
|
||||
if ( calculationType == CalculationMode.NONE ) { // we don't want to do anything
|
||||
; // just fall though
|
||||
} else if ( excludeReadFromBAQ(read) ) {
|
||||
; // just fall through
|
||||
} else if ( calculationType == Mode.USE_TAG_ONLY ) {
|
||||
calcBAQFromTag(read, true, true);
|
||||
} else {
|
||||
if ( calculationType == Mode.RECALCULATE_ALWAYS || ! hasBAQTag(read) || calculationType == Mode.ADD_TAG_ONLY ) {
|
||||
if ( calculationType == CalculationMode.RECALCULATE || ! hasBAQTag(read) ) {
|
||||
if ( DEBUG ) System.out.printf(" Calculating BAQ on the fly%n");
|
||||
BAQCalculationResult hmmResult = calcBAQFromHMM(read, refReader);
|
||||
if ( hmmResult != null ) {
|
||||
if ( calculationType == Mode.ADD_TAG_ONLY )
|
||||
addBAQTag(read, hmmResult.bq);
|
||||
else
|
||||
// modify QUALs directly
|
||||
System.arraycopy(hmmResult.bq, 0, read.getBaseQualities(), 0, hmmResult.bq.length);
|
||||
switch ( qmode ) {
|
||||
case ADD_TAG: addBAQTag(read, hmmResult.bq); break;
|
||||
case OVERWRITE_QUALS: System.arraycopy(hmmResult.bq, 0, read.getBaseQualities(), 0, hmmResult.bq.length); break;
|
||||
case DONT_MODIFY: BAQQuals = hmmResult.bq; break;
|
||||
default: throw new ReviewedStingException("BUG: unexpected qmode " + qmode);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
} else if ( qmode == QualityMode.OVERWRITE_QUALS ) { // only makes sense if we are overwriting quals
|
||||
if ( DEBUG ) System.out.printf(" Taking BAQ from tag%n");
|
||||
// this overwrites the original qualities
|
||||
calcBAQFromTag(read, true, false);
|
||||
}
|
||||
}
|
||||
|
||||
return read;
|
||||
return BAQQuals;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.baq;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
|
@ -14,7 +15,8 @@ public class BAQSamIterator implements StingSAMIterator {
|
|||
StingSAMIterator it;
|
||||
BAQ baqHMM = new BAQ(); // creates a BAQ creator with default parameters
|
||||
IndexedFastaSequenceFile refReader = null;
|
||||
BAQ.Mode mode;
|
||||
BAQ.CalculationMode cmode;
|
||||
BAQ.QualityMode qmode;
|
||||
|
||||
/**
|
||||
* Creates a new BAMSamIterator using the reference getter refReader and applies the BAM to the reads coming
|
||||
|
|
@ -22,17 +24,28 @@ public class BAQSamIterator implements StingSAMIterator {
|
|||
*
|
||||
* @param refReader
|
||||
* @param it
|
||||
* @param mode
|
||||
* @param cmode
|
||||
* @param qmode
|
||||
*/
|
||||
public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.Mode mode) {
|
||||
this.refReader =refReader;
|
||||
public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.CalculationMode cmode, BAQ.QualityMode qmode) {
|
||||
if ( cmode == BAQ.CalculationMode.NONE ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode NONE");
|
||||
if ( qmode == BAQ.QualityMode.DONT_MODIFY ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with quailty mode DONT_MODIFY");
|
||||
|
||||
this.refReader = refReader;
|
||||
this.it = it;
|
||||
this.mode = mode;
|
||||
this.cmode = cmode;
|
||||
this.qmode = qmode;
|
||||
}
|
||||
|
||||
public SAMRecord next() { return baqHMM.baqRead(it.next(), refReader, mode); }
|
||||
public boolean hasNext() { return this.it.hasNext(); }
|
||||
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
|
||||
public void close() { it.close(); }
|
||||
public SAMRecord next() {
|
||||
System.out.printf("BAQing during input%n");
|
||||
SAMRecord read = it.next();
|
||||
baqHMM.baqRead(read, refReader, cmode, qmode);
|
||||
return read;
|
||||
}
|
||||
|
||||
public boolean hasNext() { return this.it.hasNext(); }
|
||||
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
|
||||
public void close() { it.close(); }
|
||||
public Iterator<SAMRecord> iterator() { return this; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -124,6 +124,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
|||
cacheMisses++;
|
||||
result = super.getSubsequenceAt(contig, start, stop);
|
||||
} else {
|
||||
// todo -- potential optimization is to check if contig.name == contig, as this in generally will be true
|
||||
SAMSequenceRecord contigInfo = super.getSequenceDictionary().getSequence(contig);
|
||||
|
||||
if (stop > contigInfo.getSequenceLength())
|
||||
|
|
@ -132,7 +133,6 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
|||
synchronized (lock) { // access to shared cache information must be protected
|
||||
if ( start < cacheStart || stop > cacheStop || cache == null || cache.getContigIndex() != contigInfo.getSequenceIndex() ) {
|
||||
cacheMisses++;
|
||||
|
||||
cacheStart = Math.max(start - cacheMissBackup, 0);
|
||||
cacheStop = Math.min(cacheStart + cacheSize, contigInfo.getSequenceLength());
|
||||
cache = super.getSubsequenceAt(contig, cacheStart, cacheStop);
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
new ArrayList<SamRecordFilter>(),
|
||||
false,
|
||||
false,
|
||||
BAQ.Mode.NONE, null // no BAQ
|
||||
BAQ.CalculationMode.NONE, BAQ.QualityMode.DONT_MODIFY, null // no BAQ
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue