Merge branch 'master' of ssh://gsa1.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
c8c06c7753
|
|
@ -107,7 +107,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this is the last pileup for this shard calculate the minimum alignment start so that we know
|
// If this is the last pileup for this shard calculate the minimum alignment start so that we know
|
||||||
// which active regions in the work queue are now safe to process
|
// which active regions in the work queue are now safe to process
|
||||||
if( !locusView.hasNext() ) {
|
if( !locusView.hasNext() ) {
|
||||||
for( final PileupElement p : locus.getBasePileup() ) {
|
for( final PileupElement p : locus.getBasePileup() ) {
|
||||||
final GATKSAMRecord read = p.getRead();
|
final GATKSAMRecord read = p.getRead();
|
||||||
|
|
@ -135,7 +135,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Since we've sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
|
// Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
|
||||||
while( workQueue.peek() != null && (workQueue.peek().getExtendedLoc().getStop() < minStart || !workQueue.peek().getExtendedLoc().getContig().equals(dataProvider.getLocus().getContig())) ) {
|
while( workQueue.peek() != null && (workQueue.peek().getExtendedLoc().getStop() < minStart || !workQueue.peek().getExtendedLoc().getContig().equals(dataProvider.getLocus().getContig())) ) {
|
||||||
final ActiveRegion activeRegion = workQueue.remove();
|
final ActiveRegion activeRegion = workQueue.remove();
|
||||||
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
||||||
|
|
@ -190,7 +190,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
||||||
|
|
||||||
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc());
|
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc());
|
||||||
final M x = walker.map( activeRegion, null ); // BUGBUG: tracker needs to be filled in and passed to the walker
|
final M x = walker.map( activeRegion, null );
|
||||||
return walker.reduce( x, sum );
|
return walker.reduce( x, sum );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,14 +5,12 @@ import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
|
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
|
||||||
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
|
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
|
||||||
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
|
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
|
||||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
@ -77,7 +75,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
||||||
public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||||
|
|
||||||
// Map over the ActiveRegion
|
// Map over the ActiveRegion
|
||||||
public abstract MapType map(final ActiveRegion activeRegion, final ReadMetaDataTracker metaDataTracker);
|
public abstract MapType map(final ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
|
||||||
|
|
||||||
public final GenomeLocSortedSet extendIntervals( final GenomeLocSortedSet intervals, final GenomeLocParser genomeLocParser, IndexedFastaSequenceFile reference ) {
|
public final GenomeLocSortedSet extendIntervals( final GenomeLocSortedSet intervals, final GenomeLocParser genomeLocParser, IndexedFastaSequenceFile reference ) {
|
||||||
final int activeRegionExtension = this.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
final int activeRegionExtension = this.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||||
|
|
|
||||||
|
|
@ -22,8 +22,8 @@ import java.util.*;
|
||||||
* User: chartl
|
* User: chartl
|
||||||
* Date: 9/14/11
|
* Date: 9/14/11
|
||||||
* Time: 12:24 PM
|
* Time: 12:24 PM
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
|
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
|
||||||
|
|
||||||
private MendelianViolation mendelianViolation = null;
|
private MendelianViolation mendelianViolation = null;
|
||||||
|
|
|
||||||
|
|
@ -98,4 +98,9 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
|
@Override
|
||||||
|
public final Comparable getValue(final String str) {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,7 @@ public interface Covariate {
|
||||||
*/
|
*/
|
||||||
public CovariateValues getValues(GATKSAMRecord read);
|
public CovariateValues getValues(GATKSAMRecord read);
|
||||||
|
|
||||||
|
public Comparable getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RequiredCovariate extends Covariate {}
|
interface RequiredCovariate extends Covariate {}
|
||||||
|
|
|
||||||
|
|
@ -15,18 +15,18 @@ public class CovariateKeySet {
|
||||||
|
|
||||||
private int nextCovariateIndex;
|
private int nextCovariateIndex;
|
||||||
|
|
||||||
private final String mismatchesCovariateName = "M";
|
public final static String mismatchesCovariateName = "M";
|
||||||
private final String insertionsCovariateName = "I";
|
public final static String insertionsCovariateName = "I";
|
||||||
private final String deletionsCovariateName = "D";
|
public final static String deletionsCovariateName = "D";
|
||||||
|
|
||||||
public CovariateKeySet(int readLength, int numberOfCovariates) {
|
public CovariateKeySet(int readLength, int numberOfCovariates) {
|
||||||
numberOfCovariates++; // +1 because we are adding the mismatch covariate (to comply with the molten table format)
|
numberOfCovariates++; // +1 because we are adding the mismatch covariate (to comply with the molten table format)
|
||||||
this.mismatchesKeySet = new Object[readLength][numberOfCovariates];
|
this.mismatchesKeySet = new Object[readLength][numberOfCovariates];
|
||||||
this.insertionsKeySet = new Object[readLength][numberOfCovariates];
|
this.insertionsKeySet = new Object[readLength][numberOfCovariates];
|
||||||
this.deletionsKeySet = new Object[readLength][numberOfCovariates];
|
this.deletionsKeySet = new Object[readLength][numberOfCovariates];
|
||||||
initializeCovariateKeySet(this.mismatchesKeySet, this.mismatchesCovariateName);
|
initializeCovariateKeySet(this.mismatchesKeySet, mismatchesCovariateName);
|
||||||
initializeCovariateKeySet(this.insertionsKeySet, this.insertionsCovariateName);
|
initializeCovariateKeySet(this.insertionsKeySet, insertionsCovariateName);
|
||||||
initializeCovariateKeySet(this.deletionsKeySet, this.deletionsCovariateName);
|
initializeCovariateKeySet(this.deletionsKeySet, deletionsCovariateName);
|
||||||
this.nextCovariateIndex = 0;
|
this.nextCovariateIndex = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -196,4 +196,9 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
return new CovariateValues(cycles, cycles, cycles);
|
return new CovariateValues(cycles, cycles, cycles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
|
@Override
|
||||||
|
public final Comparable getValue(final String str) {
|
||||||
|
return Integer.parseInt(str);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -39,39 +39,35 @@ import java.util.Arrays;
|
||||||
|
|
||||||
public class QualityScoreCovariate implements RequiredCovariate {
|
public class QualityScoreCovariate implements RequiredCovariate {
|
||||||
|
|
||||||
private byte defaultMismatchesQuality; // walker parameter. Must be > 0 to be used, otherwise we use the quality from the read.
|
|
||||||
private byte defaultInsertionsQuality; // walker parameter. Must be > 0 to be used, otherwise we use the quality from the read.
|
|
||||||
private byte defaultDeletionsQuality; // walker parameter. Must be > 0 to be used, otherwise we use the quality from the read.
|
|
||||||
|
|
||||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
@Override
|
@Override
|
||||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||||
defaultMismatchesQuality = RAC.MISMATCHES_DEFAULT_QUALITY;
|
|
||||||
defaultInsertionsQuality = RAC.INSERTIONS_DEFAULT_QUALITY;
|
|
||||||
defaultDeletionsQuality = RAC.DELETIONS_DEFAULT_QUALITY;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||||
int readLength = read.getReadLength();
|
int readLength = read.getReadLength();
|
||||||
|
|
||||||
Byte [] mismatches = new Byte[readLength];
|
Integer [] mismatches = new Integer[readLength];
|
||||||
Byte [] insertions = new Byte[readLength];
|
Integer [] insertions = new Integer[readLength];
|
||||||
Byte [] deletions = new Byte[readLength];
|
Integer [] deletions = new Integer[readLength];
|
||||||
|
|
||||||
byte [] baseQualities = read.getBaseQualities();
|
byte [] baseQualities = read.getBaseQualities();
|
||||||
|
byte [] baseInsertionQualities = read.getBaseInsertionQualities();
|
||||||
|
byte [] baseDeletionQualities = read.getBaseDeletionQualities();
|
||||||
|
|
||||||
if (defaultMismatchesQuality >= 0)
|
for (int i=0; i<baseQualities.length; i++) {
|
||||||
Arrays.fill(mismatches, defaultMismatchesQuality); // if the user decides to override the base qualities in the read, use the flat value
|
mismatches[i] = (int) baseQualities[i];
|
||||||
else {
|
insertions[i] = (int) baseInsertionQualities[i];
|
||||||
for (int i=0; i<baseQualities.length; i++)
|
deletions[i] = (int) baseDeletionQualities[i];
|
||||||
mismatches[i] = baseQualities[i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Arrays.fill(insertions, defaultInsertionsQuality); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
|
||||||
Arrays.fill( deletions, defaultDeletionsQuality); // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat value (parameter)
|
|
||||||
|
|
||||||
return new CovariateValues(mismatches, insertions, deletions);
|
return new CovariateValues(mismatches, insertions, deletions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
|
@Override
|
||||||
|
public final Comparable getValue(final String str) {
|
||||||
|
return Integer.parseInt(str);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,12 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
||||||
Arrays.fill(readGroups, readGroupId);
|
Arrays.fill(readGroups, readGroupId);
|
||||||
return new CovariateValues(readGroups, readGroups, readGroups);
|
return new CovariateValues(readGroups, readGroups, readGroups);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
|
@Override
|
||||||
|
public final Comparable getValue(final String str) {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
@ -52,20 +53,23 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class RecalDataManager {
|
public class RecalDataManager {
|
||||||
public final NestedHashMap nestedHashMap; // The full dataset
|
public final NestedHashMap nestedHashMap; // The full dataset
|
||||||
private final NestedHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed
|
private final HashMap<BaseRecalibrationType, NestedHashMap> dataCollapsedReadGroup; // Table where everything except read group has been collapsed
|
||||||
private final NestedHashMap dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
|
private final HashMap<BaseRecalibrationType, NestedHashMap> dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
|
||||||
private final ArrayList<NestedHashMap> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed
|
private final HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed
|
||||||
|
|
||||||
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
|
|
||||||
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
|
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
|
||||||
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
||||||
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
||||||
private static boolean warnUserNullReadGroup = false;
|
|
||||||
private static boolean warnUserNullPlatform = false;
|
private static boolean warnUserNullPlatform = false;
|
||||||
|
|
||||||
private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
|
private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
|
||||||
|
|
||||||
|
public enum BaseRecalibrationType {
|
||||||
|
BASE_SUBSTITUTION,
|
||||||
|
BASE_INSERTION,
|
||||||
|
BASE_DELETION
|
||||||
|
}
|
||||||
|
|
||||||
public enum SOLID_RECAL_MODE {
|
public enum SOLID_RECAL_MODE {
|
||||||
/**
|
/**
|
||||||
|
|
@ -109,13 +113,18 @@ public class RecalDataManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
public RecalDataManager(final boolean createCollapsedTables, final int numCovariates) {
|
public RecalDataManager(final boolean createCollapsedTables, final int numCovariates) {
|
||||||
if (createCollapsedTables) { // Initialize all the collapsed tables, only used by TableRecalibrationWalker
|
if (createCollapsedTables) { // Initialize all the collapsed tables, only used by on-the-fly recalibration
|
||||||
nestedHashMap = null;
|
nestedHashMap = null;
|
||||||
dataCollapsedReadGroup = new NestedHashMap();
|
dataCollapsedReadGroup = new HashMap<BaseRecalibrationType, NestedHashMap>();
|
||||||
dataCollapsedQualityScore = new NestedHashMap();
|
dataCollapsedQualityScore = new HashMap<BaseRecalibrationType, NestedHashMap>();
|
||||||
dataCollapsedByCovariate = new ArrayList<NestedHashMap>();
|
dataCollapsedByCovariate = new HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>>();
|
||||||
for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate
|
for ( final BaseRecalibrationType errorModel : BaseRecalibrationType.values() ) {
|
||||||
dataCollapsedByCovariate.add(new NestedHashMap());
|
dataCollapsedReadGroup.put(errorModel, new NestedHashMap());
|
||||||
|
dataCollapsedQualityScore.put(errorModel, new NestedHashMap());
|
||||||
|
dataCollapsedByCovariate.put(errorModel, new ArrayList<NestedHashMap>());
|
||||||
|
for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate
|
||||||
|
dataCollapsedByCovariate.get(errorModel).add(new NestedHashMap());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
@ -137,7 +146,7 @@ public class RecalDataManager {
|
||||||
* @param fullDatum The RecalDatum which is the data for this mapping
|
* @param fullDatum The RecalDatum which is the data for this mapping
|
||||||
* @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
|
* @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
|
||||||
*/
|
*/
|
||||||
public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN) {
|
public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN, final BaseRecalibrationType errorModel ) {
|
||||||
|
|
||||||
// The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
|
// The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
|
||||||
//data.put(key, thisDatum); // add the mapping to the main table
|
//data.put(key, thisDatum); // add the mapping to the main table
|
||||||
|
|
@ -151,9 +160,9 @@ public class RecalDataManager {
|
||||||
// Create dataCollapsedReadGroup, the table where everything except read group has been collapsed
|
// Create dataCollapsedReadGroup, the table where everything except read group has been collapsed
|
||||||
if (qualityScore >= PRESERVE_QSCORES_LESS_THAN) {
|
if (qualityScore >= PRESERVE_QSCORES_LESS_THAN) {
|
||||||
readGroupCollapsedKey[0] = key[0]; // Make a new key with just the read group
|
readGroupCollapsedKey[0] = key[0]; // Make a new key with just the read group
|
||||||
collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get(readGroupCollapsedKey);
|
collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get(errorModel).get(readGroupCollapsedKey);
|
||||||
if (collapsedDatum == null) {
|
if (collapsedDatum == null) {
|
||||||
dataCollapsedReadGroup.put(new RecalDatum(fullDatum), readGroupCollapsedKey);
|
dataCollapsedReadGroup.get(errorModel).put(new RecalDatum(fullDatum), readGroupCollapsedKey);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
collapsedDatum.combine(fullDatum); // using combine instead of increment in order to calculate overall aggregateQReported
|
collapsedDatum.combine(fullDatum); // using combine instead of increment in order to calculate overall aggregateQReported
|
||||||
|
|
@ -163,24 +172,24 @@ public class RecalDataManager {
|
||||||
// Create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
|
// Create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
|
||||||
qualityScoreCollapsedKey[0] = key[0]; // Make a new key with the read group ...
|
qualityScoreCollapsedKey[0] = key[0]; // Make a new key with the read group ...
|
||||||
qualityScoreCollapsedKey[1] = key[1]; // and quality score
|
qualityScoreCollapsedKey[1] = key[1]; // and quality score
|
||||||
collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get(qualityScoreCollapsedKey);
|
collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get(errorModel).get(qualityScoreCollapsedKey);
|
||||||
if (collapsedDatum == null) {
|
if (collapsedDatum == null) {
|
||||||
dataCollapsedQualityScore.put(new RecalDatum(fullDatum), qualityScoreCollapsedKey);
|
dataCollapsedQualityScore.get(errorModel).put(new RecalDatum(fullDatum), qualityScoreCollapsedKey);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
collapsedDatum.increment(fullDatum);
|
collapsedDatum.increment(fullDatum);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
|
// Create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
|
||||||
for (int iii = 0; iii < dataCollapsedByCovariate.size(); iii++) {
|
for (int iii = 0; iii < dataCollapsedByCovariate.get(errorModel).size(); iii++) {
|
||||||
covariateCollapsedKey[0] = key[0]; // Make a new key with the read group ...
|
covariateCollapsedKey[0] = key[0]; // Make a new key with the read group ...
|
||||||
covariateCollapsedKey[1] = key[1]; // and quality score ...
|
covariateCollapsedKey[1] = key[1]; // and quality score ...
|
||||||
final Object theCovariateElement = key[iii + 2]; // and the given covariate
|
final Object theCovariateElement = key[iii + 2]; // and the given covariate
|
||||||
if (theCovariateElement != null) {
|
if (theCovariateElement != null) {
|
||||||
covariateCollapsedKey[2] = theCovariateElement;
|
covariateCollapsedKey[2] = theCovariateElement;
|
||||||
collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(iii).get(covariateCollapsedKey);
|
collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(errorModel).get(iii).get(covariateCollapsedKey);
|
||||||
if (collapsedDatum == null) {
|
if (collapsedDatum == null) {
|
||||||
dataCollapsedByCovariate.get(iii).put(new RecalDatum(fullDatum), covariateCollapsedKey);
|
dataCollapsedByCovariate.get(errorModel).get(iii).put(new RecalDatum(fullDatum), covariateCollapsedKey);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
collapsedDatum.increment(fullDatum);
|
collapsedDatum.increment(fullDatum);
|
||||||
|
|
@ -198,11 +207,13 @@ public class RecalDataManager {
|
||||||
*/
|
*/
|
||||||
public final void generateEmpiricalQualities(final int smoothing, final int maxQual) {
|
public final void generateEmpiricalQualities(final int smoothing, final int maxQual) {
|
||||||
|
|
||||||
recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.data, smoothing, maxQual);
|
for( final BaseRecalibrationType errorModel : BaseRecalibrationType.values() ) {
|
||||||
recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.data, smoothing, maxQual);
|
recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.get(errorModel).data, smoothing, maxQual);
|
||||||
for (NestedHashMap map : dataCollapsedByCovariate) {
|
recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.get(errorModel).data, smoothing, maxQual);
|
||||||
recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual);
|
for (NestedHashMap map : dataCollapsedByCovariate.get(errorModel)) {
|
||||||
checkForSingletons(map.data);
|
recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual);
|
||||||
|
checkForSingletons(map.data);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -241,15 +252,15 @@ public class RecalDataManager {
|
||||||
* @param covariate Which covariate indexes the desired collapsed HashMap
|
* @param covariate Which covariate indexes the desired collapsed HashMap
|
||||||
* @return The desired collapsed HashMap
|
* @return The desired collapsed HashMap
|
||||||
*/
|
*/
|
||||||
public final NestedHashMap getCollapsedTable(final int covariate) {
|
public final NestedHashMap getCollapsedTable(final int covariate, final BaseRecalibrationType errorModel) {
|
||||||
if (covariate == 0) {
|
if (covariate == 0) {
|
||||||
return dataCollapsedReadGroup; // Table where everything except read group has been collapsed
|
return dataCollapsedReadGroup.get(errorModel); // Table where everything except read group has been collapsed
|
||||||
}
|
}
|
||||||
else if (covariate == 1) {
|
else if (covariate == 1) {
|
||||||
return dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
|
return dataCollapsedQualityScore.get(errorModel); // Table where everything except read group and quality score has been collapsed
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return dataCollapsedByCovariate.get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed
|
return dataCollapsedByCovariate.get(errorModel).get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -260,7 +271,7 @@ public class RecalDataManager {
|
||||||
* @param RAC The list of shared command line arguments
|
* @param RAC The list of shared command line arguments
|
||||||
*/
|
*/
|
||||||
public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
|
public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
|
||||||
GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord) read).getReadGroup();
|
GATKSAMReadGroupRecord readGroup = read.getReadGroup();
|
||||||
|
|
||||||
if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
|
if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
|
||||||
readGroup.setPlatform(RAC.FORCE_PLATFORM);
|
readGroup.setPlatform(RAC.FORCE_PLATFORM);
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ public class ContextCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
byte[] bases = read.getReadBases();
|
byte[] bases = read.getReadBases();
|
||||||
for (int i = 0; i < read.getReadLength(); i++)
|
for (int i = 0; i < read.getReadLength(); i++)
|
||||||
comparable[i] = (i < CONTEXT_SIZE) ? allN : new String(Arrays.copyOfRange(bases, i - CONTEXT_SIZE, i));
|
comparable[i] = (i < CONTEXT_SIZE) ? allN : new String(Arrays.copyOfRange(bases, i - CONTEXT_SIZE, i));
|
||||||
|
|
|
||||||
|
|
@ -378,7 +378,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
}
|
}
|
||||||
|
|
||||||
RecalDataManager.parseColorSpace(gatkRead);
|
RecalDataManager.parseColorSpace(gatkRead);
|
||||||
gatkRead.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(gatkRead, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION));
|
gatkRead.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(gatkRead, requestedCovariates));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip this position if base quality is zero
|
// Skip this position if base quality is zero
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ public interface Covariate {
|
||||||
|
|
||||||
public Comparable getValue(String str); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
public Comparable getValue(String str); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||||
|
|
||||||
public void getValues(GATKSAMRecord read, Comparable[] comparable, BaseRecalibration.BaseRecalibrationType modelType);
|
public void getValues(GATKSAMRecord read, Comparable[] comparable);
|
||||||
//Takes an array of size (at least) read.getReadLength() and fills it with covariate
|
//Takes an array of size (at least) read.getReadLength() and fills it with covariate
|
||||||
//values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows
|
//values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows
|
||||||
//read-specific calculations to be done just once rather than for each offset.
|
//read-specific calculations to be done just once rather than for each offset.
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,7 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
|
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
|
|
||||||
//-----------------------------
|
//-----------------------------
|
||||||
// Illumina, Solid, PacBio, and Complete Genomics
|
// Illumina, Solid, PacBio, and Complete Genomics
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,7 @@ public class DinucCovariate implements StandardCovariate {
|
||||||
* Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read.
|
* Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
final HashMap<Integer, Dinuc> dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap
|
final HashMap<Integer, Dinuc> dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap
|
||||||
final int readLength = read.getReadLength();
|
final int readLength = read.getReadLength();
|
||||||
final boolean negativeStrand = read.getReadNegativeStrandFlag();
|
final boolean negativeStrand = read.getReadNegativeStrandFlag();
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,7 @@ public class GCContentCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,7 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ public class MappingQualityCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -65,7 +65,7 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ public class PositionCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -62,7 +62,7 @@ public class PrimerRoundCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -46,16 +46,10 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
if (modelType == BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION) {
|
byte[] baseQualities = read.getBaseQualities();
|
||||||
byte[] baseQualities = read.getBaseQualities();
|
for (int i = 0; i < read.getReadLength(); i++) {
|
||||||
for (int i = 0; i < read.getReadLength(); i++) {
|
comparable[i] = (int) baseQualities[i];
|
||||||
comparable[i] = (int) baseQualities[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else { // model == BASE_INSERTION || model == BASE_DELETION
|
|
||||||
Arrays.fill(comparable, 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
|
||||||
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
final String readGroupId = read.getReadGroup().getReadGroupId();
|
final String readGroupId = read.getReadGroup().getReadGroupId();
|
||||||
for (int i = 0; i < read.getReadLength(); i++) {
|
for (int i = 0; i < read.getReadLength(); i++) {
|
||||||
comparable[i] = readGroupId;
|
comparable[i] = readGroupId;
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,6 @@ public class RecalDataManager {
|
||||||
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
|
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
|
||||||
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
||||||
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
||||||
private static boolean warnUserNullReadGroup = false;
|
|
||||||
private static boolean warnUserNullPlatform = false;
|
private static boolean warnUserNullPlatform = false;
|
||||||
|
|
||||||
public enum SOLID_RECAL_MODE {
|
public enum SOLID_RECAL_MODE {
|
||||||
|
|
@ -604,7 +603,7 @@ public class RecalDataManager {
|
||||||
* value for the ith position in the read and the jth covariate in
|
* value for the ith position in the read and the jth covariate in
|
||||||
* reqeustedCovariates list.
|
* reqeustedCovariates list.
|
||||||
*/
|
*/
|
||||||
public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates) {
|
||||||
//compute all covariates for this read
|
//compute all covariates for this read
|
||||||
final int numRequestedCovariates = requestedCovariates.size();
|
final int numRequestedCovariates = requestedCovariates.size();
|
||||||
final int readLength = gatkRead.getReadLength();
|
final int readLength = gatkRead.getReadLength();
|
||||||
|
|
@ -613,7 +612,7 @@ public class RecalDataManager {
|
||||||
final Comparable[] tempCovariateValuesHolder = new Comparable[readLength];
|
final Comparable[] tempCovariateValuesHolder = new Comparable[readLength];
|
||||||
|
|
||||||
for (int i = 0; i < numRequestedCovariates; i++) { // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
for (int i = 0; i < numRequestedCovariates; i++) { // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
||||||
requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder, modelType);
|
requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder);
|
||||||
for (int j = 0; j < readLength; j++)
|
for (int j = 0; j < readLength; j++)
|
||||||
covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j]; // copy values into a 2D array that allows all covar types to be extracted at once for an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
|
covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j]; // copy values into a 2D array that allows all covar types to be extracted at once for an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -405,7 +405,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
}
|
}
|
||||||
|
|
||||||
//compute all covariate values for this read
|
//compute all covariate values for this read
|
||||||
final Comparable[][] covariateValues_offset_x_covar = RecalDataManager.computeCovariates(read, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION);
|
final Comparable[][] covariateValues_offset_x_covar = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||||
|
|
||||||
// For each base in the read
|
// For each base in the read
|
||||||
for (int offset = 0; offset < read.getReadLength(); offset++) {
|
for (int offset = 0; offset < read.getReadLength(); offset++) {
|
||||||
|
|
|
||||||
|
|
@ -25,14 +25,12 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.recalibration;
|
package org.broadinstitute.sting.utils.recalibration;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
|
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
|
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||||
|
|
@ -52,19 +50,13 @@ import java.util.regex.Pattern;
|
||||||
|
|
||||||
public class BaseRecalibration {
|
public class BaseRecalibration {
|
||||||
|
|
||||||
public enum BaseRecalibrationType {
|
|
||||||
BASE_SUBSTITUTION,
|
|
||||||
BASE_INSERTION,
|
|
||||||
BASE_DELETION
|
|
||||||
}
|
|
||||||
|
|
||||||
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
|
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
|
||||||
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of covariates to be used in this calculation
|
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of covariates to be used in this calculation
|
||||||
public static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
public static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||||
public static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
|
public static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
|
||||||
public static final String EOF_MARKER = "EOF";
|
public static final String EOF_MARKER = "EOF";
|
||||||
private static final int MAX_QUALITY_SCORE = 65; //BUGBUG: what value to use here?
|
private static final int MAX_QUALITY_SCORE = 65; //BUGBUG: what value to use here?
|
||||||
private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(...) for all sets of covariate values.
|
||||||
|
|
||||||
public BaseRecalibration( final File RECAL_FILE ) {
|
public BaseRecalibration( final File RECAL_FILE ) {
|
||||||
// Get a list of all available covariates
|
// Get a list of all available covariates
|
||||||
|
|
@ -89,7 +81,7 @@ public class BaseRecalibration {
|
||||||
throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
|
throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
|
||||||
} else { // Found the covariate list in input file, loop through all of them and instantiate them
|
} else { // Found the covariate list in input file, loop through all of them and instantiate them
|
||||||
String[] vals = line.split(",");
|
String[] vals = line.split(",");
|
||||||
for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
|
for( int iii = 0; iii < vals.length - 4; iii++ ) { // There are n-4 covariates. The last four items are ErrorModel, nObservations, nMismatch, and Qempirical
|
||||||
boolean foundClass = false;
|
boolean foundClass = false;
|
||||||
for( Class<?> covClass : classes ) {
|
for( Class<?> covClass : classes ) {
|
||||||
if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
|
if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
|
||||||
|
|
@ -160,7 +152,7 @@ public class BaseRecalibration {
|
||||||
final String[] vals = line.split(",");
|
final String[] vals = line.split(",");
|
||||||
|
|
||||||
// Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
|
// Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
|
||||||
if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
|
if( vals.length != requestedCovariates.size() + 4 ) { // +4 because of ErrorModel, nObservations, nMismatch, and Qempirical
|
||||||
throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
|
throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
|
||||||
" --Perhaps the read group string contains a comma and isn't being parsed correctly.");
|
" --Perhaps the read group string contains a comma and isn't being parsed correctly.");
|
||||||
}
|
}
|
||||||
|
|
@ -172,39 +164,63 @@ public class BaseRecalibration {
|
||||||
cov = requestedCovariates.get( iii );
|
cov = requestedCovariates.get( iii );
|
||||||
key[iii] = cov.getValue( vals[iii] );
|
key[iii] = cov.getValue( vals[iii] );
|
||||||
}
|
}
|
||||||
|
final String modelString = vals[iii++];
|
||||||
|
final RecalDataManager.BaseRecalibrationType errorModel = ( modelString.equals(CovariateKeySet.mismatchesCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION :
|
||||||
|
( modelString.equals(CovariateKeySet.insertionsCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_INSERTION :
|
||||||
|
( modelString.equals(CovariateKeySet.deletionsCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_DELETION : null ) ) );
|
||||||
|
|
||||||
// Create a new datum using the number of observations, number of mismatches, and reported quality score
|
// Create a new datum using the number of observations, number of mismatches, and reported quality score
|
||||||
final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
|
final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
|
||||||
// Add that datum to all the collapsed tables which will be used in the sequential calculation
|
// Add that datum to all the collapsed tables which will be used in the sequential calculation
|
||||||
dataManager.addToAllTables( key, datum, QualityUtils.MIN_USABLE_Q_SCORE ); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
|
|
||||||
|
dataManager.addToAllTables( key, datum, QualityUtils.MIN_USABLE_Q_SCORE, errorModel ); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] recalibrateRead( final GATKSAMRecord read, final byte[] originalQuals, final BaseRecalibrationType modelType ) {
|
public void recalibrateRead( final GATKSAMRecord read ) {
|
||||||
|
|
||||||
final byte[] recalQuals = originalQuals.clone();
|
|
||||||
|
|
||||||
//compute all covariate values for this read
|
//compute all covariate values for this read
|
||||||
final Comparable[][] covariateValues_offset_x_covar =
|
RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||||
RecalDataManager.computeCovariates(read, requestedCovariates, modelType);
|
final CovariateKeySet covariateKeySet = RecalDataManager.getAllCovariateValuesFor( read );
|
||||||
|
|
||||||
// For each base in the read
|
for( final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values() ) {
|
||||||
for( int offset = 0; offset < read.getReadLength(); offset++ ) {
|
final byte[] originalQuals = ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ? read.getBaseQualities() :
|
||||||
|
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_INSERTION ? read.getBaseDeletionQualities() :
|
||||||
final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
|
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_DELETION ? read.getBaseDeletionQualities() : null ) ) );
|
||||||
|
final byte[] recalQuals = originalQuals.clone();
|
||||||
Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
|
|
||||||
if(qualityScore == null)
|
// For each base in the read
|
||||||
{
|
for( int offset = 0; offset < read.getReadLength(); offset++ ) {
|
||||||
qualityScore = performSequentialQualityCalculation( fullCovariateKey );
|
|
||||||
qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
|
|
||||||
}
|
|
||||||
|
|
||||||
recalQuals[offset] = qualityScore;
|
|
||||||
}
|
|
||||||
|
|
||||||
preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
|
|
||||||
|
|
||||||
return recalQuals;
|
final Object[] fullCovariateKey =
|
||||||
|
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ? covariateKeySet.getMismatchesKeySet(offset) :
|
||||||
|
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_INSERTION ? covariateKeySet.getInsertionsKeySet(offset) :
|
||||||
|
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_DELETION ? covariateKeySet.getDeletionsKeySet(offset) : null ) ) );
|
||||||
|
|
||||||
|
Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
|
||||||
|
if( qualityScore == null ) {
|
||||||
|
qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
|
||||||
|
qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
|
||||||
|
}
|
||||||
|
|
||||||
|
recalQuals[offset] = qualityScore;
|
||||||
|
}
|
||||||
|
|
||||||
|
preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
|
||||||
|
switch (errorModel) {
|
||||||
|
case BASE_SUBSTITUTION:
|
||||||
|
read.setBaseQualities( recalQuals );
|
||||||
|
break;
|
||||||
|
case BASE_INSERTION:
|
||||||
|
read.setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, recalQuals );
|
||||||
|
break;
|
||||||
|
case BASE_DELETION:
|
||||||
|
read.setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, recalQuals );
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -222,7 +238,7 @@ public class BaseRecalibration {
|
||||||
* @param key The list of Comparables that were calculated from the covariates
|
* @param key The list of Comparables that were calculated from the covariates
|
||||||
* @return A recalibrated quality score as a byte
|
* @return A recalibrated quality score as a byte
|
||||||
*/
|
*/
|
||||||
private byte performSequentialQualityCalculation( final Object... key ) {
|
private byte performSequentialQualityCalculation( final RecalDataManager.BaseRecalibrationType errorModel, final Object... key ) {
|
||||||
|
|
||||||
final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
|
final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
|
||||||
final Object[] readGroupCollapsedKey = new Object[1];
|
final Object[] readGroupCollapsedKey = new Object[1];
|
||||||
|
|
@ -231,7 +247,7 @@ public class BaseRecalibration {
|
||||||
|
|
||||||
// The global quality shift (over the read group only)
|
// The global quality shift (over the read group only)
|
||||||
readGroupCollapsedKey[0] = key[0];
|
readGroupCollapsedKey[0] = key[0];
|
||||||
final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey ));
|
final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0, errorModel).get( readGroupCollapsedKey ));
|
||||||
double globalDeltaQ = 0.0;
|
double globalDeltaQ = 0.0;
|
||||||
if( globalRecalDatum != null ) {
|
if( globalRecalDatum != null ) {
|
||||||
final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
|
final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
|
||||||
|
|
@ -242,7 +258,7 @@ public class BaseRecalibration {
|
||||||
// The shift in quality between reported and empirical
|
// The shift in quality between reported and empirical
|
||||||
qualityScoreCollapsedKey[0] = key[0];
|
qualityScoreCollapsedKey[0] = key[0];
|
||||||
qualityScoreCollapsedKey[1] = key[1];
|
qualityScoreCollapsedKey[1] = key[1];
|
||||||
final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey ));
|
final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1, errorModel).get( qualityScoreCollapsedKey ));
|
||||||
double deltaQReported = 0.0;
|
double deltaQReported = 0.0;
|
||||||
if( qReportedRecalDatum != null ) {
|
if( qReportedRecalDatum != null ) {
|
||||||
final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
|
final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
|
||||||
|
|
@ -256,7 +272,7 @@ public class BaseRecalibration {
|
||||||
covariateCollapsedKey[1] = key[1];
|
covariateCollapsedKey[1] = key[1];
|
||||||
for( int iii = 2; iii < key.length; iii++ ) {
|
for( int iii = 2; iii < key.length; iii++ ) {
|
||||||
covariateCollapsedKey[2] = key[iii]; // The given covariate
|
covariateCollapsedKey[2] = key[iii]; // The given covariate
|
||||||
final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( covariateCollapsedKey ));
|
final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii, errorModel).get( covariateCollapsedKey ));
|
||||||
if( covariateRecalDatum != null ) {
|
if( covariateRecalDatum != null ) {
|
||||||
deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
|
deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
|
||||||
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
|
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
|
||||||
|
|
@ -265,18 +281,6 @@ public class BaseRecalibration {
|
||||||
|
|
||||||
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
||||||
return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
|
return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
|
||||||
|
|
||||||
// Verbose printouts used to validate with old recalibrator
|
|
||||||
//if(key.contains(null)) {
|
|
||||||
// System.out.println( key + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d",
|
|
||||||
// qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte));
|
|
||||||
//}
|
|
||||||
//else {
|
|
||||||
// System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d",
|
|
||||||
// key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
|
|
||||||
//}
|
|
||||||
|
|
||||||
//return newQualityByte;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -291,5 +295,4 @@ public class BaseRecalibration {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,6 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
// Base Quality Score Recalibrator specific attribute tags
|
// Base Quality Score Recalibrator specific attribute tags
|
||||||
public static final String BQSR_BASE_INSERTION_QUALITIES = "BI";
|
public static final String BQSR_BASE_INSERTION_QUALITIES = "BI";
|
||||||
public static final String BQSR_BASE_DELETION_QUALITIES = "BD";
|
public static final String BQSR_BASE_DELETION_QUALITIES = "BD";
|
||||||
public static final String BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG = "BR";
|
|
||||||
|
|
||||||
// the SAMRecord data we're caching
|
// the SAMRecord data we're caching
|
||||||
private String mReadString = null;
|
private String mReadString = null;
|
||||||
|
|
@ -163,27 +162,6 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
return super.equals(o);
|
return super.equals(o);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public byte[] getBaseQualities() {
|
|
||||||
return super.getBaseQualities();
|
|
||||||
/*
|
|
||||||
if( getAttribute( BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG ) != null ) {
|
|
||||||
return super.getBaseQualities();
|
|
||||||
} else {
|
|
||||||
// if the recal data was populated in the engine then recalibrate the quality scores on the fly
|
|
||||||
if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
|
|
||||||
final byte[] quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, super.getBaseQualities() );
|
|
||||||
setBaseQualities(quals);
|
|
||||||
setAttribute( BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG, true );
|
|
||||||
return quals;
|
|
||||||
} else { // just use the qualities that are in the read since we don't have the sufficient information to recalibrate on the fly
|
|
||||||
return super.getBaseQualities();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Accessors for base insertion and base deletion quality scores
|
* Accessors for base insertion and base deletion quality scores
|
||||||
*/
|
*/
|
||||||
|
|
@ -191,13 +169,8 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
byte[] quals = getByteArrayAttribute( BQSR_BASE_INSERTION_QUALITIES );
|
byte[] quals = getByteArrayAttribute( BQSR_BASE_INSERTION_QUALITIES );
|
||||||
if( quals == null ) {
|
if( quals == null ) {
|
||||||
quals = new byte[getBaseQualities().length];
|
quals = new byte[getBaseQualities().length];
|
||||||
Arrays.fill(quals, (byte) 45); // allow for differing default values between BaseInsertions and BaseDeletions
|
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
||||||
// if the recal data was populated in the engine then recalibrate the quality scores on the fly
|
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
|
||||||
// else give default values which are flat Q45
|
|
||||||
if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
|
|
||||||
quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals, BaseRecalibration.BaseRecalibrationType.BASE_INSERTION ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
|
|
||||||
}
|
|
||||||
// add the qual array to the read so that we don't have to do the recalibration work again
|
|
||||||
setAttribute( BQSR_BASE_INSERTION_QUALITIES, quals );
|
setAttribute( BQSR_BASE_INSERTION_QUALITIES, quals );
|
||||||
}
|
}
|
||||||
return quals;
|
return quals;
|
||||||
|
|
@ -207,13 +180,8 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
byte[] quals = getByteArrayAttribute( BQSR_BASE_DELETION_QUALITIES );
|
byte[] quals = getByteArrayAttribute( BQSR_BASE_DELETION_QUALITIES );
|
||||||
if( quals == null ) {
|
if( quals == null ) {
|
||||||
quals = new byte[getBaseQualities().length];
|
quals = new byte[getBaseQualities().length];
|
||||||
Arrays.fill(quals, (byte) 45);
|
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
||||||
// if the recal data was populated in the engine then recalibrate the quality scores on the fly
|
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
|
||||||
// else give default values which are flat Q45
|
|
||||||
if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
|
|
||||||
quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals, BaseRecalibration.BaseRecalibrationType.BASE_DELETION ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
|
|
||||||
}
|
|
||||||
// add the qual array to the read so that we don't have to do the recalibration work again
|
|
||||||
setAttribute( BQSR_BASE_DELETION_QUALITIES, quals );
|
setAttribute( BQSR_BASE_DELETION_QUALITIES, quals );
|
||||||
}
|
}
|
||||||
return quals;
|
return quals;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue