General infrastructure for ReadTransformers
-- These are like read filters but can be applied either on input, on output, of handled by the walker
-- Previous example of BAQ now uses the general framework
-- Resulted in massive conceptual cleanup of SAMDataSource and ReadProperties! Yeah!
-- BQSR now uses this framework. We can now do BQSR on input, on output, or within a walker
-- PrintReads now handles all read transformers in the walker in map, enabling us to parallelize PrintReads with BAQ and BQSR
-- Currently BQSR is excepting in parallel, which subsequent commit with fix
-- Removed global variable setting in GenomeAnalysisEngine for BAQ, as command line parameters are cleanly handled by ReadTransformer infrastructure
-- In principle ReadFilters are just a special kind of ReadTransformer, but this refactoring is larger than I can do. It's a JIRA entry
-- Many files touched simply due to the refactoring and renaming of classes
This commit is contained in:
parent
143fbead03
commit
817ece37a2
|
|
@ -27,24 +27,23 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
|
@ -52,6 +51,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
|
||||
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -101,7 +101,7 @@ import java.util.*;
|
|||
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
|
||||
@ActiveRegionExtension(extension=65, maxRegion=300)
|
||||
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
|
||||
|
||||
|
|
|
|||
|
|
@ -42,6 +42,8 @@ import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
|||
import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformersMode;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
|
|
@ -49,8 +51,8 @@ import org.broadinstitute.sting.gatk.samples.SampleDB;
|
|||
import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -131,6 +133,11 @@ public class GenomeAnalysisEngine {
|
|||
*/
|
||||
private Collection<ReadFilter> filters;
|
||||
|
||||
/**
|
||||
* Collection of the read transformers applied to the reads
|
||||
*/
|
||||
private List<ReadTransformer> readTransformers;
|
||||
|
||||
/**
|
||||
* Controls the allocation of threads between CPU vs IO.
|
||||
*/
|
||||
|
|
@ -354,6 +361,39 @@ public class GenomeAnalysisEngine {
|
|||
return Collections.unmodifiableList(filters);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of active, initialized read transformers
|
||||
*
|
||||
* @param walker the walker we need to apply read transformers too
|
||||
* @return a non-null list of read transformers
|
||||
*/
|
||||
public void initializeReadTransformers(final Walker walker) {
|
||||
final List<ReadTransformer> activeTransformers = new ArrayList<ReadTransformer>();
|
||||
|
||||
final ReadTransformersMode overrideMode = WalkerManager.getWalkerAnnotation(walker, ReadTransformersMode.class);
|
||||
final ReadTransformer.ApplicationTime overrideTime = overrideMode != null ? overrideMode.ApplicationTime() : null;
|
||||
|
||||
final PluginManager<ReadTransformer> pluginManager = new PluginManager<ReadTransformer>(ReadTransformer.class);
|
||||
|
||||
for ( final ReadTransformer transformer : pluginManager.createAllTypes() ) {
|
||||
transformer.initialize(overrideTime, this, walker);
|
||||
if ( transformer.enabled() )
|
||||
activeTransformers.add(transformer);
|
||||
}
|
||||
|
||||
setReadTransformers(activeTransformers);
|
||||
}
|
||||
|
||||
public List<ReadTransformer> getReadTransformers() {
|
||||
return readTransformers;
|
||||
}
|
||||
|
||||
private void setReadTransformers(final List<ReadTransformer> readTransformers) {
|
||||
if ( readTransformers == null )
|
||||
throw new ReviewedStingException("read transformers cannot be null");
|
||||
this.readTransformers = readTransformers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse out the thread allocation from the given command-line argument.
|
||||
*/
|
||||
|
|
@ -419,9 +459,6 @@ public class GenomeAnalysisEngine {
|
|||
argCollection.setDownsamplingMethod(method);
|
||||
}
|
||||
|
||||
public BAQ.QualityMode getWalkerBAQQualityMode() { return WalkerManager.getBAQQualityMode(walker); }
|
||||
public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); }
|
||||
|
||||
protected boolean includeReadsWithDeletionAtLoci() {
|
||||
return walker.includeReadsWithDeletionAtLoci();
|
||||
}
|
||||
|
|
@ -702,13 +739,12 @@ public class GenomeAnalysisEngine {
|
|||
protected void initializeDataSources() {
|
||||
logger.info("Strictness is " + argCollection.strictnessLevel);
|
||||
|
||||
// TODO -- REMOVE ME
|
||||
BAQ.DEFAULT_GOP = argCollection.BAQGOP;
|
||||
|
||||
validateSuppliedReference();
|
||||
setReferenceDataSource(argCollection.referenceFile);
|
||||
|
||||
validateSuppliedReads();
|
||||
initializeReadTransformers(walker);
|
||||
|
||||
readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference());
|
||||
|
||||
for (ReadFilter filter : filters)
|
||||
|
|
@ -795,9 +831,6 @@ public class GenomeAnalysisEngine {
|
|||
// interrogating for the downsample method during command line recreation.
|
||||
setDownsamplingMethod(method);
|
||||
|
||||
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
|
||||
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
|
||||
|
||||
if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
|
||||
throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
|
||||
|
||||
|
|
@ -817,11 +850,8 @@ public class GenomeAnalysisEngine {
|
|||
method,
|
||||
new ValidationExclusion(Arrays.asList(argCollection.unsafe)),
|
||||
filters,
|
||||
readTransformers,
|
||||
includeReadsWithDeletionAtLoci(),
|
||||
getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.OFF,
|
||||
getWalkerBAQQualityMode(),
|
||||
refReader,
|
||||
getBaseRecalibration(),
|
||||
argCollection.defaultBaseQualities,
|
||||
removeProgramRecords);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,15 +1,14 @@
|
|||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
|
|
@ -34,12 +33,9 @@ public class ReadProperties {
|
|||
private final DownsamplingMethod downsamplingMethod;
|
||||
private final ValidationExclusion exclusionList;
|
||||
private final Collection<ReadFilter> supplementalFilters;
|
||||
private final List<ReadTransformer> readTransformers;
|
||||
private final boolean includeReadsWithDeletionAtLoci;
|
||||
private final boolean useOriginalBaseQualities;
|
||||
private final BAQ.CalculationMode cmode;
|
||||
private final BAQ.QualityMode qmode;
|
||||
private final IndexedFastaSequenceFile refReader; // read for BAQ, if desired
|
||||
private final BaseRecalibration bqsrApplier;
|
||||
private final byte defaultBaseQualities;
|
||||
|
||||
/**
|
||||
|
|
@ -95,6 +91,11 @@ public class ReadProperties {
|
|||
return supplementalFilters;
|
||||
}
|
||||
|
||||
|
||||
public List<ReadTransformer> getReadTransformers() {
|
||||
return readTransformers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether to use original base qualities.
|
||||
* @return Whether to use original base qualities.
|
||||
|
|
@ -103,16 +104,6 @@ public class ReadProperties {
|
|||
return useOriginalBaseQualities;
|
||||
}
|
||||
|
||||
|
||||
public BAQ.QualityMode getBAQQualityMode() { return qmode; }
|
||||
public BAQ.CalculationMode getBAQCalculationMode() { return cmode; }
|
||||
|
||||
public IndexedFastaSequenceFile getRefReader() {
|
||||
return refReader;
|
||||
}
|
||||
|
||||
public BaseRecalibration getBQSRApplier() { return bqsrApplier; }
|
||||
|
||||
/**
|
||||
* @return Default base quality value to fill reads missing base quality information.
|
||||
*/
|
||||
|
|
@ -134,9 +125,6 @@ public class ReadProperties {
|
|||
* @param includeReadsWithDeletionAtLoci if 'true', the base pileups sent to the walker's map() method
|
||||
* will explicitly list reads with deletion over the current reference base; otherwise, only observed
|
||||
* bases will be seen in the pileups, and the deletions will be skipped silently.
|
||||
* @param cmode How should we apply the BAQ calculation to the reads?
|
||||
* @param qmode How should we apply the BAQ calculation to the reads?
|
||||
* @param refReader if applyBAQ is true, must be a valid pointer to a indexed fasta file reads so we can get the ref bases for BAQ calculation
|
||||
* @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality.
|
||||
*/
|
||||
public ReadProperties( Collection<SAMReaderID> samFiles,
|
||||
|
|
@ -146,11 +134,8 @@ public class ReadProperties {
|
|||
DownsamplingMethod downsamplingMethod,
|
||||
ValidationExclusion exclusionList,
|
||||
Collection<ReadFilter> supplementalFilters,
|
||||
List<ReadTransformer> readTransformers,
|
||||
boolean includeReadsWithDeletionAtLoci,
|
||||
BAQ.CalculationMode cmode,
|
||||
BAQ.QualityMode qmode,
|
||||
IndexedFastaSequenceFile refReader,
|
||||
BaseRecalibration bqsrApplier,
|
||||
byte defaultBaseQualities) {
|
||||
this.readers = samFiles;
|
||||
this.header = header;
|
||||
|
|
@ -158,12 +143,9 @@ public class ReadProperties {
|
|||
this.downsamplingMethod = downsamplingMethod == null ? DownsamplingMethod.NONE : downsamplingMethod;
|
||||
this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList;
|
||||
this.supplementalFilters = supplementalFilters;
|
||||
this.readTransformers = readTransformers;
|
||||
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
|
||||
this.useOriginalBaseQualities = useOriginalBaseQualities;
|
||||
this.cmode = cmode;
|
||||
this.qmode = qmode;
|
||||
this.refReader = refReader;
|
||||
this.bqsrApplier = bqsrApplier;
|
||||
this.defaultBaseQualities = defaultBaseQualities;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,13 +29,14 @@ import org.broadinstitute.sting.commandline.Hidden;
|
|||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.help.ResourceBundleExtractorDoclet;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -319,11 +320,11 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
return downsamplingMethod;
|
||||
}
|
||||
|
||||
public static BAQ.QualityMode getBAQQualityMode(Walker walker) {
|
||||
return walker.getClass().getAnnotation(BAQMode.class).QualityMode();
|
||||
public static <T extends Annotation> T getWalkerAnnotation(final Walker walker, final Class<T> clazz) {
|
||||
return walker.getClass().getAnnotation(clazz);
|
||||
}
|
||||
|
||||
public static BAQ.ApplicationTime getBAQApplicationTime(Walker walker) {
|
||||
public static ReadTransformer.ApplicationTime getBAQApplicationTime(Walker walker) {
|
||||
return walker.getClass().getAnnotation(BAQMode.class).ApplicationTime();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.sam.MergingSamRecordIterator;
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
import net.sf.samtools.*;
|
||||
|
|
@ -42,12 +41,9 @@ import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.baq.BAQSamIterator;
|
||||
import org.broadinstitute.sting.utils.baq.ReadTransformingIterator;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.recalibration.BQSRSamIterator;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -200,11 +196,8 @@ public class SAMDataSource {
|
|||
downsamplingMethod,
|
||||
exclusionList,
|
||||
supplementalFilters,
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
includeReadsWithDeletionAtLoci,
|
||||
BAQ.CalculationMode.OFF,
|
||||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null, // no BAQ
|
||||
null, // no BQSR
|
||||
(byte) -1,
|
||||
false);
|
||||
}
|
||||
|
|
@ -234,11 +227,8 @@ public class SAMDataSource {
|
|||
DownsamplingMethod downsamplingMethod,
|
||||
ValidationExclusion exclusionList,
|
||||
Collection<ReadFilter> supplementalFilters,
|
||||
List<ReadTransformer> readTransformers,
|
||||
boolean includeReadsWithDeletionAtLoci,
|
||||
BAQ.CalculationMode cmode,
|
||||
BAQ.QualityMode qmode,
|
||||
IndexedFastaSequenceFile refReader,
|
||||
BaseRecalibration bqsrApplier,
|
||||
byte defaultBaseQualities,
|
||||
boolean removeProgramRecords) {
|
||||
this.readMetrics = new ReadMetrics();
|
||||
|
|
@ -308,11 +298,8 @@ public class SAMDataSource {
|
|||
downsamplingMethod,
|
||||
exclusionList,
|
||||
supplementalFilters,
|
||||
readTransformers,
|
||||
includeReadsWithDeletionAtLoci,
|
||||
cmode,
|
||||
qmode,
|
||||
refReader,
|
||||
bqsrApplier,
|
||||
defaultBaseQualities);
|
||||
|
||||
// cache the read group id (original) -> read group id (merged)
|
||||
|
|
@ -603,10 +590,7 @@ public class SAMDataSource {
|
|||
readProperties.getDownsamplingMethod().toFraction,
|
||||
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
readProperties.getSupplementalFilters(),
|
||||
readProperties.getBAQCalculationMode(),
|
||||
readProperties.getBAQQualityMode(),
|
||||
readProperties.getRefReader(),
|
||||
readProperties.getBQSRApplier(),
|
||||
readProperties.getReadTransformers(),
|
||||
readProperties.defaultBaseQualities());
|
||||
}
|
||||
|
||||
|
|
@ -673,10 +657,7 @@ public class SAMDataSource {
|
|||
Double downsamplingFraction,
|
||||
Boolean noValidationOfReadOrder,
|
||||
Collection<ReadFilter> supplementalFilters,
|
||||
BAQ.CalculationMode cmode,
|
||||
BAQ.QualityMode qmode,
|
||||
IndexedFastaSequenceFile refReader,
|
||||
BaseRecalibration bqsrApplier,
|
||||
List<ReadTransformer> readTransformers,
|
||||
byte defaultBaseQualities) {
|
||||
|
||||
// *********************************************************************************** //
|
||||
|
|
@ -698,11 +679,11 @@ public class SAMDataSource {
|
|||
// only wrap if we are replacing the original qualities or using a default base quality
|
||||
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
|
||||
|
||||
if (bqsrApplier != null)
|
||||
wrappedIterator = new BQSRSamIterator(wrappedIterator, bqsrApplier);
|
||||
|
||||
if (cmode != BAQ.CalculationMode.OFF)
|
||||
wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode);
|
||||
// set up read transformers
|
||||
for ( final ReadTransformer readTransformer : readTransformers ) {
|
||||
if ( readTransformer.enabled() && readTransformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_INPUT )
|
||||
wrappedIterator = new ReadTransformingIterator(wrappedIterator, readTransformer);
|
||||
}
|
||||
|
||||
return wrappedIterator;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,12 +31,16 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A stub for routing and management of SAM file reading and writing.
|
||||
|
|
@ -116,15 +120,15 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
|
|||
*/
|
||||
private boolean simplifyBAM = false;
|
||||
|
||||
private List<ReadTransformer> onOutputReadTransformers = null;
|
||||
|
||||
/**
|
||||
* Create a new stub given the requested SAM file and compression level.
|
||||
* @param engine source of header data, maybe other data about input files.
|
||||
* @param samFile SAM file to (ultimately) create.
|
||||
*/
|
||||
public SAMFileWriterStub( GenomeAnalysisEngine engine, File samFile ) {
|
||||
this.engine = engine;
|
||||
this.samFile = samFile;
|
||||
this.samOutputStream = null;
|
||||
this(engine, samFile, null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -133,8 +137,12 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
|
|||
* @param stream Output stream to which data should be written.
|
||||
*/
|
||||
public SAMFileWriterStub( GenomeAnalysisEngine engine, OutputStream stream ) {
|
||||
this(engine, null, stream);
|
||||
}
|
||||
|
||||
private SAMFileWriterStub(final GenomeAnalysisEngine engine, final File samFile, final OutputStream stream) {
|
||||
this.engine = engine;
|
||||
this.samFile = null;
|
||||
this.samFile = samFile;
|
||||
this.samOutputStream = stream;
|
||||
}
|
||||
|
||||
|
|
@ -274,17 +282,29 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
|
|||
this.headerOverride = header;
|
||||
}
|
||||
|
||||
private void initializeReadTransformers() {
|
||||
this.onOutputReadTransformers = new ArrayList<ReadTransformer>(engine.getReadTransformers().size());
|
||||
for ( final ReadTransformer transformer : engine.getReadTransformers() ) {
|
||||
if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_OUTPUT )
|
||||
onOutputReadTransformers.add(transformer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @{inheritDoc}
|
||||
*/
|
||||
public void addAlignment( SAMRecord alignment ) {
|
||||
if ( engine.getArguments().BAQMode != BAQ.CalculationMode.OFF && engine.getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_OUTPUT ) {
|
||||
//System.out.printf("Writing BAQ at OUTPUT TIME%n");
|
||||
baqHMM.baqRead(alignment, engine.getReferenceDataSource().getReference(), engine.getArguments().BAQMode, engine.getWalkerBAQQualityMode());
|
||||
}
|
||||
public void addAlignment( final SAMRecord readIn ) {
|
||||
if ( onOutputReadTransformers == null )
|
||||
initializeReadTransformers();
|
||||
|
||||
GATKSAMRecord workingRead = (GATKSAMRecord)readIn;
|
||||
|
||||
// run on output read transformers
|
||||
for ( final ReadTransformer transform : onOutputReadTransformers )
|
||||
workingRead = transform.apply(workingRead);
|
||||
|
||||
writeStarted = true;
|
||||
outputTracker.getStorage(this).addAlignment(alignment);
|
||||
outputTracker.getStorage(this).addAlignment(workingRead);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -0,0 +1,144 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Baseclass used to describe a read transformer like BAQ and BQSR
|
||||
*
|
||||
* Read transformers are plugable infrastructure that modify read state
|
||||
* either on input, on output, or within walkers themselves.
|
||||
*
|
||||
* The function apply() is called on each read seen by the GATK (after passing
|
||||
* all ReadFilters) and it can do as it sees fit (without modifying the alignment)
|
||||
* to the read to change qualities, add tags, etc.
|
||||
*
|
||||
* Initialize is called once right before the GATK traversal begins providing
|
||||
* the ReadTransformer with the ability to collect and initialize data from the
|
||||
* engine.
|
||||
*
|
||||
* Note that all ReadTransformers within the classpath are created and initialized. If one
|
||||
* shouldn't be run it should look at the command line options of the engine and override
|
||||
* the enabled.
|
||||
*
|
||||
* @since 8/31/12
|
||||
* @author depristo
|
||||
*/
|
||||
abstract public class ReadTransformer {
|
||||
/**
|
||||
* When should this read transform be applied?
|
||||
*/
|
||||
private ApplicationTime applicationTime;
|
||||
|
||||
/**
|
||||
* Keep track of whether we've been initialized already, and ensure it's not called more than once.
|
||||
*/
|
||||
private boolean initialized = false;
|
||||
|
||||
protected ReadTransformer() {}
|
||||
|
||||
/**
|
||||
* Master initialization routine. Called to setup a ReadTransform, using it's overloaded initialialSub routine.
|
||||
*
|
||||
* @param overrideTime if not null, we will run this ReadTransform at the time provided, regardless of the timing of this read transformer itself
|
||||
* @param engine the engine, for initializing values
|
||||
* @param walker the walker we intend to run
|
||||
*/
|
||||
@Requires({"initialized == false", "engine != null", "walker == null"})
|
||||
@Ensures("initialized == true")
|
||||
public final void initialize(final ApplicationTime overrideTime, final GenomeAnalysisEngine engine, final Walker walker) {
|
||||
if ( engine == null ) throw new IllegalArgumentException("engine cannot be null");
|
||||
if ( walker == null ) throw new IllegalArgumentException("walker cannot be null");
|
||||
|
||||
this.applicationTime = initializeSub(engine, walker);
|
||||
if ( overrideTime != null ) this.applicationTime = overrideTime;
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses must override this to initialize themeselves
|
||||
*
|
||||
* @param engine the engine, for initializing values
|
||||
* @param walker the walker we intend to run
|
||||
* @return the point of time we'd like this read transform to be run
|
||||
*/
|
||||
@Requires({"engine != null", "walker != null"})
|
||||
@Ensures("result != null")
|
||||
protected abstract ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker);
|
||||
|
||||
/**
|
||||
* Should this ReadTransformer be activated? Called after initialize, which allows this
|
||||
* read transformer to look at its arguments and decide if it should be active. All
|
||||
* ReadTransformers must override this, as by default they are not enabled.
|
||||
*
|
||||
* @return true if this ReadTransformer should be used on the read stream
|
||||
*/
|
||||
public boolean enabled() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Has this transformer been initialized?
|
||||
*
|
||||
* @return true if it has
|
||||
*/
|
||||
public final boolean isInitialized() {
|
||||
return initialized;
|
||||
}
|
||||
|
||||
/**
|
||||
* When should we apply this read transformer?
|
||||
*
|
||||
* @return true if yes
|
||||
*/
|
||||
public final ApplicationTime getApplicationTime() {
|
||||
return applicationTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Primary interface function for a read transform to actually do some work
|
||||
*
|
||||
* The function apply() is called on each read seen by the GATK (after passing
|
||||
* all ReadFilters) and it can do as it sees fit (without modifying the alignment)
|
||||
* to the read to change qualities, add tags, etc.
|
||||
*
|
||||
* @param read the read to transform
|
||||
* @return the transformed read
|
||||
*/
|
||||
@Requires("read != null")
|
||||
@Ensures("result != null")
|
||||
abstract public GATKSAMRecord apply(final GATKSAMRecord read);
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName();
|
||||
}
|
||||
|
||||
/**
|
||||
* When should a read transformer be applied?
|
||||
*/
|
||||
public static enum ApplicationTime {
|
||||
/**
|
||||
* Walker does not tolerate this read transformer
|
||||
*/
|
||||
FORBIDDEN,
|
||||
|
||||
/**
|
||||
* apply the transformation to the incoming reads, the default
|
||||
*/
|
||||
ON_INPUT,
|
||||
|
||||
/**
|
||||
* apply the transformation to the outgoing read stream
|
||||
*/
|
||||
ON_OUTPUT,
|
||||
|
||||
/**
|
||||
* the walker will deal with the calculation itself
|
||||
*/
|
||||
HANDLED_IN_WALKER
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
* Time: 1:51:22 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Allows the walker to indicate what type of data it wants to consume.
|
||||
*/
|
||||
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface ReadTransformersMode {
|
||||
public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT;
|
||||
}
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
|
|
@ -25,5 +27,5 @@ import java.lang.annotation.*;
|
|||
@Target(ElementType.TYPE)
|
||||
public @interface BAQMode {
|
||||
public abstract org.broadinstitute.sting.utils.baq.BAQ.QualityMode QualityMode() default org.broadinstitute.sting.utils.baq.BAQ.QualityMode.OVERWRITE_QUALS;
|
||||
public abstract org.broadinstitute.sting.utils.baq.BAQ.ApplicationTime ApplicationTime() default org.broadinstitute.sting.utils.baq.BAQ.ApplicationTime.ON_INPUT;
|
||||
public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT;
|
||||
}
|
||||
|
|
@ -32,6 +32,8 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformersMode;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
|
@ -91,7 +93,8 @@ import java.util.TreeSet;
|
|||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
|
||||
@ReadTransformersMode(ApplicationTime = ReadTransformer.ApplicationTime.HANDLED_IN_WALKER)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.HANDLED_IN_WALKER)
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> implements ThreadSafeMapReduce {
|
||||
|
||||
|
|
@ -217,11 +220,20 @@ public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> impleme
|
|||
* The reads map function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a GATKSAMRecord
|
||||
* @param readIn the read itself, as a GATKSAMRecord
|
||||
* @return the read itself
|
||||
*/
|
||||
public GATKSAMRecord map( ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker ) {
|
||||
return simplifyReads ? read.simplify() : read;
|
||||
public GATKSAMRecord map( ReferenceContext ref, GATKSAMRecord readIn, RefMetaDataTracker metaDataTracker ) {
|
||||
GATKSAMRecord workingRead = readIn;
|
||||
|
||||
for ( final ReadTransformer transformer : getToolkit().getReadTransformers() ) {
|
||||
if ( logger.isDebugEnabled() ) logger.debug("Applying transformer " + transformer + " to read " + readIn.getReadName());
|
||||
workingRead = transformer.apply(workingRead);
|
||||
}
|
||||
|
||||
if ( simplifyReads ) workingRead = workingRead.simplify();
|
||||
|
||||
return workingRead;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -30,12 +30,14 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.recalibration.BQSRMode;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -48,7 +50,8 @@ import java.util.List;
|
|||
*/
|
||||
@ReadFilters(MalformedReadFilter.class)
|
||||
@PartitionBy(PartitionType.NONE)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
|
||||
@BQSRMode(ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
|
||||
@DocumentedGATKFeature(groupName = "Uncategorized", extraDocs = {CommandLineGATK.class})
|
||||
public abstract class Walker<MapType, ReduceType> {
|
||||
final protected static Logger logger = Logger.getLogger(Walker.class);
|
||||
|
|
|
|||
|
|
@ -32,10 +32,9 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -46,6 +45,7 @@ import org.broadinstitute.sting.utils.recalibration.QuantizationInfo;
|
|||
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
|
|
@ -104,7 +104,7 @@ import java.util.ArrayList;
|
|||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
|
||||
@By(DataSource.READS)
|
||||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
|
|
@ -117,7 +118,7 @@ import java.util.*;
|
|||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
|
||||
@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableFilter.class} )
|
||||
@Reference(window=@Window(start=-200,stop=200))
|
||||
@By(DataSource.REFERENCE)
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.BAQMode;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
|
|
@ -111,7 +112,7 @@ import java.util.*;
|
|||
* @author ebanks
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.ON_OUTPUT)
|
||||
public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||
|
||||
public static final String ORIGINAL_CIGAR_TAG = "OC";
|
||||
|
|
|
|||
|
|
@ -33,10 +33,10 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.*;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -101,7 +101,7 @@ import java.util.TreeSet;
|
|||
@Reference(window=@Window(start=-1,stop=50))
|
||||
@Allows(value={DataSource.READS, DataSource.REFERENCE})
|
||||
@By(DataSource.REFERENCE)
|
||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
|
||||
public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Event, RealignerTargetCreator.EventPair> implements TreeReducible<RealignerTargetCreator.EventPair> {
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -52,13 +52,6 @@ public class BAQ {
|
|||
DONT_MODIFY // do the BAQ, but don't modify the quality scores themselves, just return them in the function.
|
||||
}
|
||||
|
||||
public enum ApplicationTime {
|
||||
FORBIDDEN, // Walker does not tolerate BAQ input
|
||||
ON_INPUT, // apply the BAQ calculation to the incoming reads, the default
|
||||
ON_OUTPUT, // apply the BAQ calculation to outgoing read streams
|
||||
HANDLED_IN_WALKER // the walker will deal with the BAQ calculation status itself
|
||||
}
|
||||
|
||||
public static final String BAQ_TAG = "BQ";
|
||||
|
||||
private static double[] qual2prob = new double[256];
|
||||
|
|
@ -68,7 +61,7 @@ public class BAQ {
|
|||
}
|
||||
|
||||
// Phred scaled now (changed 1/10/2011)
|
||||
public static double DEFAULT_GOP = 40;
|
||||
public static final double DEFAULT_GOP = 40;
|
||||
|
||||
/* Takes a Phred Scale quality score and returns the error probability.
|
||||
*
|
||||
|
|
@ -110,10 +103,19 @@ public class BAQ {
|
|||
* Use defaults for everything
|
||||
*/
|
||||
public BAQ() {
|
||||
cd = convertFromPhredScale(DEFAULT_GOP);
|
||||
this(DEFAULT_GOP);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use defaults for everything
|
||||
*/
|
||||
public BAQ(final double gapOpenPenalty) {
|
||||
cd = convertFromPhredScale(gapOpenPenalty);
|
||||
initializeCachedData();
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Create a new HmmGlocal object with specified parameters
|
||||
*
|
||||
|
|
|
|||
|
|
@ -0,0 +1,49 @@
|
|||
package org.broadinstitute.sting.utils.baq;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.walkers.BAQMode;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Applies Heng's BAQ calculation to a stream of incoming reads
|
||||
*/
|
||||
public class BAQReadTransformer extends ReadTransformer {
|
||||
private BAQ baqHMM;
|
||||
private IndexedFastaSequenceFile refReader;
|
||||
private BAQ.CalculationMode cmode;
|
||||
private BAQ.QualityMode qmode;
|
||||
|
||||
@Override
|
||||
public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) {
|
||||
final BAQMode mode = WalkerManager.getWalkerAnnotation(walker, BAQMode.class);
|
||||
this.refReader = engine.getReferenceDataSource().getReference();
|
||||
this.cmode = engine.getArguments().BAQMode;
|
||||
this.qmode = mode.QualityMode();
|
||||
baqHMM = new BAQ(engine.getArguments().BAQGOP);
|
||||
|
||||
if ( qmode == BAQ.QualityMode.DONT_MODIFY )
|
||||
throw new ReviewedStingException("BUG: shouldn't create BAQ transformer with quality mode DONT_MODIFY");
|
||||
|
||||
if ( mode.ApplicationTime() == ReadTransformer.ApplicationTime.FORBIDDEN && enabled() )
|
||||
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + cmode + " was requested.");
|
||||
|
||||
return mode.ApplicationTime();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean enabled() {
|
||||
return cmode != BAQ.CalculationMode.OFF;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GATKSAMRecord apply(final GATKSAMRecord read) {
|
||||
baqHMM.baqRead(read, refReader, cmode, qmode);
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.baq;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Simple iterator that applies Heng's BAQ calculation to a stream of incoming reads
|
||||
*/
|
||||
public class BAQSamIterator implements StingSAMIterator {
|
||||
private final StingSAMIterator it;
|
||||
private final BAQ baqHMM = new BAQ(); // creates a BAQ creator with default parameters
|
||||
private final IndexedFastaSequenceFile refReader;
|
||||
private final BAQ.CalculationMode cmode;
|
||||
private final BAQ.QualityMode qmode;
|
||||
|
||||
/**
|
||||
* Creates a new BAMSamIterator using the reference getter refReader and applies the BAM to the reads coming
|
||||
* in from it. See BAQ docs for baqType information.
|
||||
*
|
||||
* @param refReader
|
||||
* @param it
|
||||
* @param cmode
|
||||
* @param qmode
|
||||
*/
|
||||
@Requires({
|
||||
"refReader != null",
|
||||
"it != null",
|
||||
"cmode != null" ,
|
||||
"qmode != null"})
|
||||
public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.CalculationMode cmode, BAQ.QualityMode qmode) {
|
||||
if ( cmode == BAQ.CalculationMode.OFF ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode OFF");
|
||||
if ( qmode == BAQ.QualityMode.DONT_MODIFY ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with quailty mode DONT_MODIFY");
|
||||
|
||||
this.refReader = refReader;
|
||||
this.it = it;
|
||||
this.cmode = cmode;
|
||||
this.qmode = qmode;
|
||||
}
|
||||
|
||||
@Requires("hasNext()")
|
||||
@Ensures("result != null")
|
||||
public SAMRecord next() {
|
||||
//System.out.printf("BAQing during input%n");
|
||||
SAMRecord read = it.next();
|
||||
baqHMM.baqRead(read, refReader, cmode, qmode);
|
||||
return read;
|
||||
}
|
||||
|
||||
public boolean hasNext() { return this.it.hasNext(); }
|
||||
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
|
||||
public void close() { it.close(); }
|
||||
public Iterator<SAMRecord> iterator() { return this; }
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
package org.broadinstitute.sting.utils.baq;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Iterator that applies a ReadTransformer to a stream of reads
|
||||
*/
|
||||
public class ReadTransformingIterator implements StingSAMIterator {
|
||||
private final StingSAMIterator it;
|
||||
private final ReadTransformer transformer;
|
||||
|
||||
/**
|
||||
* Creates a new ReadTransforming iterator
|
||||
*/
|
||||
@Requires({"it != null", "engine != null", "transformer != null", "transformer.isInitialized()"})
|
||||
public ReadTransformingIterator(final StingSAMIterator it, final ReadTransformer transformer) {
|
||||
if ( ! transformer.isInitialized() )
|
||||
throw new IllegalStateException("Creating a read transformer stream for an uninitialized read transformer: " + transformer);
|
||||
if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.FORBIDDEN )
|
||||
throw new IllegalStateException("Creating a read transformer stream for a forbidden transformer " + transformer);
|
||||
|
||||
this.it = it;
|
||||
this.transformer = transformer;
|
||||
}
|
||||
|
||||
@Requires("hasNext()")
|
||||
@Ensures("result != null")
|
||||
public SAMRecord next() {
|
||||
final GATKSAMRecord read = (GATKSAMRecord)it.next();
|
||||
return transformer.apply(read);
|
||||
}
|
||||
|
||||
public boolean hasNext() { return this.it.hasNext(); }
|
||||
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
|
||||
public void close() { it.close(); }
|
||||
public Iterator<SAMRecord> iterator() { return this; }
|
||||
}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
* Time: 1:51:22 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Allows the walker to indicate what type of data it wants to consume.
|
||||
*/
|
||||
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface BQSRMode {
|
||||
public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT;
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* A ReadTransformer that applies BQSR on the fly to reads
|
||||
*
|
||||
* User: rpoplin
|
||||
* Date: 2/13/12
|
||||
*/
|
||||
public class BQSRReadTransformer extends ReadTransformer {
|
||||
private boolean enabled;
|
||||
private BaseRecalibration bqsr;
|
||||
|
||||
@Override
|
||||
public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) {
|
||||
this.enabled = engine.hasBaseRecalibration();
|
||||
this.bqsr = engine.getBaseRecalibration();
|
||||
final BQSRMode mode = WalkerManager.getWalkerAnnotation(walker, BQSRMode.class);
|
||||
return mode.ApplicationTime();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean enabled() {
|
||||
return enabled;
|
||||
}
|
||||
|
||||
/**
|
||||
* initialize a new BQSRReadTransformer that applies BQSR on the fly to incoming reads.
|
||||
*/
|
||||
@Override
|
||||
public GATKSAMRecord apply(GATKSAMRecord read) {
|
||||
bqsr.recalibrateRead(read);
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,50 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 2/13/12
|
||||
*/
|
||||
|
||||
public class BQSRSamIterator implements StingSAMIterator {
|
||||
private final StingSAMIterator it;
|
||||
private final BaseRecalibration bqsr;
|
||||
|
||||
/**
|
||||
* Creates a new BQSRSamIterator and applies BQSR on the fly to incoming reads.
|
||||
*
|
||||
* @param it The incoming SamIterator to wrap
|
||||
* @param bqsr The object which holds the BQSR table information and knows how to apply it
|
||||
*/
|
||||
@Requires({
|
||||
"it != null",
|
||||
"bqsr != null"})
|
||||
public BQSRSamIterator(StingSAMIterator it, BaseRecalibration bqsr) {
|
||||
if ( bqsr == null ) throw new ReviewedStingException("BUG: shouldn't create BQSRSamIterator with null recalibration object");
|
||||
|
||||
this.it = it;
|
||||
this.bqsr = bqsr;
|
||||
}
|
||||
|
||||
@Requires("hasNext()")
|
||||
@Ensures("result != null")
|
||||
public SAMRecord next() {
|
||||
SAMRecord read = it.next();
|
||||
bqsr.recalibrateRead((GATKSAMRecord) read);
|
||||
return read;
|
||||
}
|
||||
|
||||
public boolean hasNext() { return this.it.hasNext(); }
|
||||
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
|
||||
public void close() { it.close(); }
|
||||
public Iterator<SAMRecord> iterator() { return this; }
|
||||
}
|
||||
Loading…
Reference in New Issue