Updating the half of the code that makes use of the recalibration information to work with the new refactoring of the bqsr. Reverting the covariate interface change in the original bqsr because the error model enum was moved to a different class and didn't make sense any more.
This commit is contained in:
parent
f52f1f659f
commit
9b8fd4c2ff
|
|
@ -135,7 +135,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Since we've sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
|
// Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
|
||||||
while( workQueue.peek() != null && (workQueue.peek().getExtendedLoc().getStop() < minStart || !workQueue.peek().getExtendedLoc().getContig().equals(dataProvider.getLocus().getContig())) ) {
|
while( workQueue.peek() != null && (workQueue.peek().getExtendedLoc().getStop() < minStart || !workQueue.peek().getExtendedLoc().getContig().equals(dataProvider.getLocus().getContig())) ) {
|
||||||
final ActiveRegion activeRegion = workQueue.remove();
|
final ActiveRegion activeRegion = workQueue.remove();
|
||||||
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
||||||
|
|
@ -190,7 +190,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
||||||
|
|
||||||
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc());
|
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc());
|
||||||
final M x = walker.map( activeRegion, null ); // BUGBUG: tracker needs to be filled in and passed to the walker
|
final M x = walker.map( activeRegion, null );
|
||||||
return walker.reduce( x, sum );
|
return walker.reduce( x, sum );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,14 +5,12 @@ import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
|
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
|
||||||
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
|
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
|
||||||
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
|
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
|
||||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
@ -77,7 +75,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
||||||
public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||||
|
|
||||||
// Map over the ActiveRegion
|
// Map over the ActiveRegion
|
||||||
public abstract MapType map(final ActiveRegion activeRegion, final ReadMetaDataTracker metaDataTracker);
|
public abstract MapType map(final ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
|
||||||
|
|
||||||
public final GenomeLocSortedSet extendIntervals( final GenomeLocSortedSet intervals, final GenomeLocParser genomeLocParser, IndexedFastaSequenceFile reference ) {
|
public final GenomeLocSortedSet extendIntervals( final GenomeLocSortedSet intervals, final GenomeLocParser genomeLocParser, IndexedFastaSequenceFile reference ) {
|
||||||
final int activeRegionExtension = this.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
final int activeRegionExtension = this.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||||
|
|
|
||||||
|
|
@ -22,8 +22,8 @@ import java.util.*;
|
||||||
* User: chartl
|
* User: chartl
|
||||||
* Date: 9/14/11
|
* Date: 9/14/11
|
||||||
* Time: 12:24 PM
|
* Time: 12:24 PM
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
|
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
|
||||||
|
|
||||||
private MendelianViolation mendelianViolation = null;
|
private MendelianViolation mendelianViolation = null;
|
||||||
|
|
|
||||||
|
|
@ -98,4 +98,9 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
|
@Override
|
||||||
|
public final Comparable getValue(final String str) {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,7 @@ public interface Covariate {
|
||||||
*/
|
*/
|
||||||
public CovariateValues getValues(GATKSAMRecord read);
|
public CovariateValues getValues(GATKSAMRecord read);
|
||||||
|
|
||||||
|
public Comparable getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RequiredCovariate extends Covariate {}
|
interface RequiredCovariate extends Covariate {}
|
||||||
|
|
|
||||||
|
|
@ -15,18 +15,18 @@ public class CovariateKeySet {
|
||||||
|
|
||||||
private int nextCovariateIndex;
|
private int nextCovariateIndex;
|
||||||
|
|
||||||
private final String mismatchesCovariateName = "M";
|
public final static String mismatchesCovariateName = "M";
|
||||||
private final String insertionsCovariateName = "I";
|
public final static String insertionsCovariateName = "I";
|
||||||
private final String deletionsCovariateName = "D";
|
public final static String deletionsCovariateName = "D";
|
||||||
|
|
||||||
public CovariateKeySet(int readLength, int numberOfCovariates) {
|
public CovariateKeySet(int readLength, int numberOfCovariates) {
|
||||||
numberOfCovariates++; // +1 because we are adding the mismatch covariate (to comply with the molten table format)
|
numberOfCovariates++; // +1 because we are adding the mismatch covariate (to comply with the molten table format)
|
||||||
this.mismatchesKeySet = new Object[readLength][numberOfCovariates];
|
this.mismatchesKeySet = new Object[readLength][numberOfCovariates];
|
||||||
this.insertionsKeySet = new Object[readLength][numberOfCovariates];
|
this.insertionsKeySet = new Object[readLength][numberOfCovariates];
|
||||||
this.deletionsKeySet = new Object[readLength][numberOfCovariates];
|
this.deletionsKeySet = new Object[readLength][numberOfCovariates];
|
||||||
initializeCovariateKeySet(this.mismatchesKeySet, this.mismatchesCovariateName);
|
initializeCovariateKeySet(this.mismatchesKeySet, mismatchesCovariateName);
|
||||||
initializeCovariateKeySet(this.insertionsKeySet, this.insertionsCovariateName);
|
initializeCovariateKeySet(this.insertionsKeySet, insertionsCovariateName);
|
||||||
initializeCovariateKeySet(this.deletionsKeySet, this.deletionsCovariateName);
|
initializeCovariateKeySet(this.deletionsKeySet, deletionsCovariateName);
|
||||||
this.nextCovariateIndex = 0;
|
this.nextCovariateIndex = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -196,4 +196,9 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
return new CovariateValues(cycles, cycles, cycles);
|
return new CovariateValues(cycles, cycles, cycles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
|
@Override
|
||||||
|
public final Comparable getValue(final String str) {
|
||||||
|
return Integer.parseInt(str);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -39,39 +39,35 @@ import java.util.Arrays;
|
||||||
|
|
||||||
public class QualityScoreCovariate implements RequiredCovariate {
|
public class QualityScoreCovariate implements RequiredCovariate {
|
||||||
|
|
||||||
private byte defaultMismatchesQuality; // walker parameter. Must be > 0 to be used, otherwise we use the quality from the read.
|
|
||||||
private byte defaultInsertionsQuality; // walker parameter. Must be > 0 to be used, otherwise we use the quality from the read.
|
|
||||||
private byte defaultDeletionsQuality; // walker parameter. Must be > 0 to be used, otherwise we use the quality from the read.
|
|
||||||
|
|
||||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
@Override
|
@Override
|
||||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||||
defaultMismatchesQuality = RAC.MISMATCHES_DEFAULT_QUALITY;
|
|
||||||
defaultInsertionsQuality = RAC.INSERTIONS_DEFAULT_QUALITY;
|
|
||||||
defaultDeletionsQuality = RAC.DELETIONS_DEFAULT_QUALITY;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||||
int readLength = read.getReadLength();
|
int readLength = read.getReadLength();
|
||||||
|
|
||||||
Byte [] mismatches = new Byte[readLength];
|
Integer [] mismatches = new Integer[readLength];
|
||||||
Byte [] insertions = new Byte[readLength];
|
Integer [] insertions = new Integer[readLength];
|
||||||
Byte [] deletions = new Byte[readLength];
|
Integer [] deletions = new Integer[readLength];
|
||||||
|
|
||||||
byte [] baseQualities = read.getBaseQualities();
|
byte [] baseQualities = read.getBaseQualities();
|
||||||
|
byte [] baseInsertionQualities = read.getBaseInsertionQualities();
|
||||||
|
byte [] baseDeletionQualities = read.getBaseDeletionQualities();
|
||||||
|
|
||||||
if (defaultMismatchesQuality >= 0)
|
for (int i=0; i<baseQualities.length; i++) {
|
||||||
Arrays.fill(mismatches, defaultMismatchesQuality); // if the user decides to override the base qualities in the read, use the flat value
|
mismatches[i] = (int) baseQualities[i];
|
||||||
else {
|
insertions[i] = (int) baseInsertionQualities[i];
|
||||||
for (int i=0; i<baseQualities.length; i++)
|
deletions[i] = (int) baseDeletionQualities[i];
|
||||||
mismatches[i] = baseQualities[i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Arrays.fill(insertions, defaultInsertionsQuality); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
|
||||||
Arrays.fill( deletions, defaultDeletionsQuality); // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat value (parameter)
|
|
||||||
|
|
||||||
return new CovariateValues(mismatches, insertions, deletions);
|
return new CovariateValues(mismatches, insertions, deletions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
|
@Override
|
||||||
|
public final Comparable getValue(final String str) {
|
||||||
|
return Integer.parseInt(str);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,12 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
||||||
Arrays.fill(readGroups, readGroupId);
|
Arrays.fill(readGroups, readGroupId);
|
||||||
return new CovariateValues(readGroups, readGroups, readGroups);
|
return new CovariateValues(readGroups, readGroups, readGroups);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
|
@Override
|
||||||
|
public final Comparable getValue(final String str) {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
@ -53,19 +54,22 @@ import java.util.Map;
|
||||||
|
|
||||||
public class RecalDataManager {
|
public class RecalDataManager {
|
||||||
public final NestedHashMap nestedHashMap; // The full dataset
|
public final NestedHashMap nestedHashMap; // The full dataset
|
||||||
private final NestedHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed
|
private final HashMap<BaseRecalibrationType, NestedHashMap> dataCollapsedReadGroup; // Table where everything except read group has been collapsed
|
||||||
private final NestedHashMap dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
|
private final HashMap<BaseRecalibrationType, NestedHashMap> dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
|
||||||
private final ArrayList<NestedHashMap> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed
|
private final HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed
|
||||||
|
|
||||||
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
|
|
||||||
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
|
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
|
||||||
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
||||||
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
||||||
private static boolean warnUserNullReadGroup = false;
|
|
||||||
private static boolean warnUserNullPlatform = false;
|
private static boolean warnUserNullPlatform = false;
|
||||||
|
|
||||||
private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
|
private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
|
||||||
|
|
||||||
|
public enum BaseRecalibrationType {
|
||||||
|
BASE_SUBSTITUTION,
|
||||||
|
BASE_INSERTION,
|
||||||
|
BASE_DELETION
|
||||||
|
}
|
||||||
|
|
||||||
public enum SOLID_RECAL_MODE {
|
public enum SOLID_RECAL_MODE {
|
||||||
/**
|
/**
|
||||||
|
|
@ -109,13 +113,18 @@ public class RecalDataManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
public RecalDataManager(final boolean createCollapsedTables, final int numCovariates) {
|
public RecalDataManager(final boolean createCollapsedTables, final int numCovariates) {
|
||||||
if (createCollapsedTables) { // Initialize all the collapsed tables, only used by TableRecalibrationWalker
|
if (createCollapsedTables) { // Initialize all the collapsed tables, only used by on-the-fly recalibration
|
||||||
nestedHashMap = null;
|
nestedHashMap = null;
|
||||||
dataCollapsedReadGroup = new NestedHashMap();
|
dataCollapsedReadGroup = new HashMap<BaseRecalibrationType, NestedHashMap>();
|
||||||
dataCollapsedQualityScore = new NestedHashMap();
|
dataCollapsedQualityScore = new HashMap<BaseRecalibrationType, NestedHashMap>();
|
||||||
dataCollapsedByCovariate = new ArrayList<NestedHashMap>();
|
dataCollapsedByCovariate = new HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>>();
|
||||||
|
for ( final BaseRecalibrationType errorModel : BaseRecalibrationType.values() ) {
|
||||||
|
dataCollapsedReadGroup.put(errorModel, new NestedHashMap());
|
||||||
|
dataCollapsedQualityScore.put(errorModel, new NestedHashMap());
|
||||||
|
dataCollapsedByCovariate.put(errorModel, new ArrayList<NestedHashMap>());
|
||||||
for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate
|
for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate
|
||||||
dataCollapsedByCovariate.add(new NestedHashMap());
|
dataCollapsedByCovariate.get(errorModel).add(new NestedHashMap());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
@ -137,7 +146,7 @@ public class RecalDataManager {
|
||||||
* @param fullDatum The RecalDatum which is the data for this mapping
|
* @param fullDatum The RecalDatum which is the data for this mapping
|
||||||
* @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
|
* @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
|
||||||
*/
|
*/
|
||||||
public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN) {
|
public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN, final BaseRecalibrationType errorModel ) {
|
||||||
|
|
||||||
// The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
|
// The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
|
||||||
//data.put(key, thisDatum); // add the mapping to the main table
|
//data.put(key, thisDatum); // add the mapping to the main table
|
||||||
|
|
@ -151,9 +160,9 @@ public class RecalDataManager {
|
||||||
// Create dataCollapsedReadGroup, the table where everything except read group has been collapsed
|
// Create dataCollapsedReadGroup, the table where everything except read group has been collapsed
|
||||||
if (qualityScore >= PRESERVE_QSCORES_LESS_THAN) {
|
if (qualityScore >= PRESERVE_QSCORES_LESS_THAN) {
|
||||||
readGroupCollapsedKey[0] = key[0]; // Make a new key with just the read group
|
readGroupCollapsedKey[0] = key[0]; // Make a new key with just the read group
|
||||||
collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get(readGroupCollapsedKey);
|
collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get(errorModel).get(readGroupCollapsedKey);
|
||||||
if (collapsedDatum == null) {
|
if (collapsedDatum == null) {
|
||||||
dataCollapsedReadGroup.put(new RecalDatum(fullDatum), readGroupCollapsedKey);
|
dataCollapsedReadGroup.get(errorModel).put(new RecalDatum(fullDatum), readGroupCollapsedKey);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
collapsedDatum.combine(fullDatum); // using combine instead of increment in order to calculate overall aggregateQReported
|
collapsedDatum.combine(fullDatum); // using combine instead of increment in order to calculate overall aggregateQReported
|
||||||
|
|
@ -163,9 +172,9 @@ public class RecalDataManager {
|
||||||
// Create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
|
// Create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
|
||||||
qualityScoreCollapsedKey[0] = key[0]; // Make a new key with the read group ...
|
qualityScoreCollapsedKey[0] = key[0]; // Make a new key with the read group ...
|
||||||
qualityScoreCollapsedKey[1] = key[1]; // and quality score
|
qualityScoreCollapsedKey[1] = key[1]; // and quality score
|
||||||
collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get(qualityScoreCollapsedKey);
|
collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get(errorModel).get(qualityScoreCollapsedKey);
|
||||||
if (collapsedDatum == null) {
|
if (collapsedDatum == null) {
|
||||||
dataCollapsedQualityScore.put(new RecalDatum(fullDatum), qualityScoreCollapsedKey);
|
dataCollapsedQualityScore.get(errorModel).put(new RecalDatum(fullDatum), qualityScoreCollapsedKey);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
collapsedDatum.increment(fullDatum);
|
collapsedDatum.increment(fullDatum);
|
||||||
|
|
@ -178,9 +187,9 @@ public class RecalDataManager {
|
||||||
final Object theCovariateElement = key[iii + 2]; // and the given covariate
|
final Object theCovariateElement = key[iii + 2]; // and the given covariate
|
||||||
if (theCovariateElement != null) {
|
if (theCovariateElement != null) {
|
||||||
covariateCollapsedKey[2] = theCovariateElement;
|
covariateCollapsedKey[2] = theCovariateElement;
|
||||||
collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(iii).get(covariateCollapsedKey);
|
collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(errorModel).get(iii).get(covariateCollapsedKey);
|
||||||
if (collapsedDatum == null) {
|
if (collapsedDatum == null) {
|
||||||
dataCollapsedByCovariate.get(iii).put(new RecalDatum(fullDatum), covariateCollapsedKey);
|
dataCollapsedByCovariate.get(errorModel).get(iii).put(new RecalDatum(fullDatum), covariateCollapsedKey);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
collapsedDatum.increment(fullDatum);
|
collapsedDatum.increment(fullDatum);
|
||||||
|
|
@ -198,13 +207,15 @@ public class RecalDataManager {
|
||||||
*/
|
*/
|
||||||
public final void generateEmpiricalQualities(final int smoothing, final int maxQual) {
|
public final void generateEmpiricalQualities(final int smoothing, final int maxQual) {
|
||||||
|
|
||||||
recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.data, smoothing, maxQual);
|
for( final BaseRecalibrationType errorModel : BaseRecalibrationType.values() ) {
|
||||||
recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.data, smoothing, maxQual);
|
recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.get(errorModel).data, smoothing, maxQual);
|
||||||
for (NestedHashMap map : dataCollapsedByCovariate) {
|
recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.get(errorModel).data, smoothing, maxQual);
|
||||||
|
for (NestedHashMap map : dataCollapsedByCovariate.get(errorModel)) {
|
||||||
recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual);
|
recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual);
|
||||||
checkForSingletons(map.data);
|
checkForSingletons(map.data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void recursivelyGenerateEmpiricalQualities(final Map data, final int smoothing, final int maxQual) {
|
private void recursivelyGenerateEmpiricalQualities(final Map data, final int smoothing, final int maxQual) {
|
||||||
|
|
||||||
|
|
@ -241,15 +252,15 @@ public class RecalDataManager {
|
||||||
* @param covariate Which covariate indexes the desired collapsed HashMap
|
* @param covariate Which covariate indexes the desired collapsed HashMap
|
||||||
* @return The desired collapsed HashMap
|
* @return The desired collapsed HashMap
|
||||||
*/
|
*/
|
||||||
public final NestedHashMap getCollapsedTable(final int covariate) {
|
public final NestedHashMap getCollapsedTable(final int covariate, final BaseRecalibrationType errorModel) {
|
||||||
if (covariate == 0) {
|
if (covariate == 0) {
|
||||||
return dataCollapsedReadGroup; // Table where everything except read group has been collapsed
|
return dataCollapsedReadGroup.get(errorModel); // Table where everything except read group has been collapsed
|
||||||
}
|
}
|
||||||
else if (covariate == 1) {
|
else if (covariate == 1) {
|
||||||
return dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
|
return dataCollapsedQualityScore.get(errorModel); // Table where everything except read group and quality score has been collapsed
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return dataCollapsedByCovariate.get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed
|
return dataCollapsedByCovariate.get(errorModel).get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -260,7 +271,7 @@ public class RecalDataManager {
|
||||||
* @param RAC The list of shared command line arguments
|
* @param RAC The list of shared command line arguments
|
||||||
*/
|
*/
|
||||||
public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
|
public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
|
||||||
GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord) read).getReadGroup();
|
GATKSAMReadGroupRecord readGroup = read.getReadGroup();
|
||||||
|
|
||||||
if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
|
if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
|
||||||
readGroup.setPlatform(RAC.FORCE_PLATFORM);
|
readGroup.setPlatform(RAC.FORCE_PLATFORM);
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ public class ContextCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
byte[] bases = read.getReadBases();
|
byte[] bases = read.getReadBases();
|
||||||
for (int i = 0; i < read.getReadLength(); i++)
|
for (int i = 0; i < read.getReadLength(); i++)
|
||||||
comparable[i] = (i < CONTEXT_SIZE) ? allN : new String(Arrays.copyOfRange(bases, i - CONTEXT_SIZE, i));
|
comparable[i] = (i < CONTEXT_SIZE) ? allN : new String(Arrays.copyOfRange(bases, i - CONTEXT_SIZE, i));
|
||||||
|
|
|
||||||
|
|
@ -378,7 +378,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
}
|
}
|
||||||
|
|
||||||
RecalDataManager.parseColorSpace(gatkRead);
|
RecalDataManager.parseColorSpace(gatkRead);
|
||||||
gatkRead.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(gatkRead, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION));
|
gatkRead.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(gatkRead, requestedCovariates));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip this position if base quality is zero
|
// Skip this position if base quality is zero
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ public interface Covariate {
|
||||||
|
|
||||||
public Comparable getValue(String str); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
public Comparable getValue(String str); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||||
|
|
||||||
public void getValues(GATKSAMRecord read, Comparable[] comparable, BaseRecalibration.BaseRecalibrationType modelType);
|
public void getValues(GATKSAMRecord read, Comparable[] comparable);
|
||||||
//Takes an array of size (at least) read.getReadLength() and fills it with covariate
|
//Takes an array of size (at least) read.getReadLength() and fills it with covariate
|
||||||
//values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows
|
//values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows
|
||||||
//read-specific calculations to be done just once rather than for each offset.
|
//read-specific calculations to be done just once rather than for each offset.
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,7 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
|
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
|
|
||||||
//-----------------------------
|
//-----------------------------
|
||||||
// Illumina, Solid, PacBio, and Complete Genomics
|
// Illumina, Solid, PacBio, and Complete Genomics
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,7 @@ public class DinucCovariate implements StandardCovariate {
|
||||||
* Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read.
|
* Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
final HashMap<Integer, Dinuc> dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap
|
final HashMap<Integer, Dinuc> dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap
|
||||||
final int readLength = read.getReadLength();
|
final int readLength = read.getReadLength();
|
||||||
final boolean negativeStrand = read.getReadNegativeStrandFlag();
|
final boolean negativeStrand = read.getReadNegativeStrandFlag();
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,7 @@ public class GCContentCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,7 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ public class MappingQualityCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -65,7 +65,7 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ public class PositionCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -62,7 +62,7 @@ public class PrimerRoundCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -46,18 +46,12 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
if (modelType == BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION) {
|
|
||||||
byte[] baseQualities = read.getBaseQualities();
|
byte[] baseQualities = read.getBaseQualities();
|
||||||
for (int i = 0; i < read.getReadLength(); i++) {
|
for (int i = 0; i < read.getReadLength(); i++) {
|
||||||
comparable[i] = (int) baseQualities[i];
|
comparable[i] = (int) baseQualities[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else { // model == BASE_INSERTION || model == BASE_DELETION
|
|
||||||
Arrays.fill(comparable, 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
|
||||||
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
|
||||||
final String readGroupId = read.getReadGroup().getReadGroupId();
|
final String readGroupId = read.getReadGroup().getReadGroupId();
|
||||||
for (int i = 0; i < read.getReadLength(); i++) {
|
for (int i = 0; i < read.getReadLength(); i++) {
|
||||||
comparable[i] = readGroupId;
|
comparable[i] = readGroupId;
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,6 @@ public class RecalDataManager {
|
||||||
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
|
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
|
||||||
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
||||||
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
||||||
private static boolean warnUserNullReadGroup = false;
|
|
||||||
private static boolean warnUserNullPlatform = false;
|
private static boolean warnUserNullPlatform = false;
|
||||||
|
|
||||||
public enum SOLID_RECAL_MODE {
|
public enum SOLID_RECAL_MODE {
|
||||||
|
|
@ -604,7 +603,7 @@ public class RecalDataManager {
|
||||||
* value for the ith position in the read and the jth covariate in
|
* value for the ith position in the read and the jth covariate in
|
||||||
* reqeustedCovariates list.
|
* reqeustedCovariates list.
|
||||||
*/
|
*/
|
||||||
public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates) {
|
||||||
//compute all covariates for this read
|
//compute all covariates for this read
|
||||||
final int numRequestedCovariates = requestedCovariates.size();
|
final int numRequestedCovariates = requestedCovariates.size();
|
||||||
final int readLength = gatkRead.getReadLength();
|
final int readLength = gatkRead.getReadLength();
|
||||||
|
|
@ -613,7 +612,7 @@ public class RecalDataManager {
|
||||||
final Comparable[] tempCovariateValuesHolder = new Comparable[readLength];
|
final Comparable[] tempCovariateValuesHolder = new Comparable[readLength];
|
||||||
|
|
||||||
for (int i = 0; i < numRequestedCovariates; i++) { // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
for (int i = 0; i < numRequestedCovariates; i++) { // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
||||||
requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder, modelType);
|
requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder);
|
||||||
for (int j = 0; j < readLength; j++)
|
for (int j = 0; j < readLength; j++)
|
||||||
covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j]; // copy values into a 2D array that allows all covar types to be extracted at once for an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
|
covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j]; // copy values into a 2D array that allows all covar types to be extracted at once for an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -405,7 +405,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
}
|
}
|
||||||
|
|
||||||
//compute all covariate values for this read
|
//compute all covariate values for this read
|
||||||
final Comparable[][] covariateValues_offset_x_covar = RecalDataManager.computeCovariates(read, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION);
|
final Comparable[][] covariateValues_offset_x_covar = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||||
|
|
||||||
// For each base in the read
|
// For each base in the read
|
||||||
for (int offset = 0; offset < read.getReadLength(); offset++) {
|
for (int offset = 0; offset < read.getReadLength(); offset++) {
|
||||||
|
|
|
||||||
|
|
@ -25,14 +25,12 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.recalibration;
|
package org.broadinstitute.sting.utils.recalibration;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
|
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
|
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||||
|
|
@ -52,19 +50,13 @@ import java.util.regex.Pattern;
|
||||||
|
|
||||||
public class BaseRecalibration {
|
public class BaseRecalibration {
|
||||||
|
|
||||||
public enum BaseRecalibrationType {
|
|
||||||
BASE_SUBSTITUTION,
|
|
||||||
BASE_INSERTION,
|
|
||||||
BASE_DELETION
|
|
||||||
}
|
|
||||||
|
|
||||||
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
|
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
|
||||||
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of covariates to be used in this calculation
|
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of covariates to be used in this calculation
|
||||||
public static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
public static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||||
public static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
|
public static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
|
||||||
public static final String EOF_MARKER = "EOF";
|
public static final String EOF_MARKER = "EOF";
|
||||||
private static final int MAX_QUALITY_SCORE = 65; //BUGBUG: what value to use here?
|
private static final int MAX_QUALITY_SCORE = 65; //BUGBUG: what value to use here?
|
||||||
private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(...) for all sets of covariate values.
|
||||||
|
|
||||||
public BaseRecalibration( final File RECAL_FILE ) {
|
public BaseRecalibration( final File RECAL_FILE ) {
|
||||||
// Get a list of all available covariates
|
// Get a list of all available covariates
|
||||||
|
|
@ -89,7 +81,7 @@ public class BaseRecalibration {
|
||||||
throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
|
throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
|
||||||
} else { // Found the covariate list in input file, loop through all of them and instantiate them
|
} else { // Found the covariate list in input file, loop through all of them and instantiate them
|
||||||
String[] vals = line.split(",");
|
String[] vals = line.split(",");
|
||||||
for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
|
for( int iii = 0; iii < vals.length - 4; iii++ ) { // There are n-4 covariates. The last four items are ErrorModel, nObservations, nMismatch, and Qempirical
|
||||||
boolean foundClass = false;
|
boolean foundClass = false;
|
||||||
for( Class<?> covClass : classes ) {
|
for( Class<?> covClass : classes ) {
|
||||||
if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
|
if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
|
||||||
|
|
@ -160,7 +152,7 @@ public class BaseRecalibration {
|
||||||
final String[] vals = line.split(",");
|
final String[] vals = line.split(",");
|
||||||
|
|
||||||
// Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
|
// Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
|
||||||
if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
|
if( vals.length != requestedCovariates.size() + 4 ) { // +4 because of ErrorModel, nObservations, nMismatch, and Qempirical
|
||||||
throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
|
throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
|
||||||
" --Perhaps the read group string contains a comma and isn't being parsed correctly.");
|
" --Perhaps the read group string contains a comma and isn't being parsed correctly.");
|
||||||
}
|
}
|
||||||
|
|
@ -172,30 +164,41 @@ public class BaseRecalibration {
|
||||||
cov = requestedCovariates.get( iii );
|
cov = requestedCovariates.get( iii );
|
||||||
key[iii] = cov.getValue( vals[iii] );
|
key[iii] = cov.getValue( vals[iii] );
|
||||||
}
|
}
|
||||||
|
final String modelString = vals[iii++];
|
||||||
|
final RecalDataManager.BaseRecalibrationType errorModel = ( modelString.equals(CovariateKeySet.mismatchesCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION :
|
||||||
|
( modelString.equals(CovariateKeySet.insertionsCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_INSERTION :
|
||||||
|
( modelString.equals(CovariateKeySet.deletionsCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_DELETION : null ) ) );
|
||||||
|
|
||||||
// Create a new datum using the number of observations, number of mismatches, and reported quality score
|
// Create a new datum using the number of observations, number of mismatches, and reported quality score
|
||||||
final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
|
final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
|
||||||
// Add that datum to all the collapsed tables which will be used in the sequential calculation
|
// Add that datum to all the collapsed tables which will be used in the sequential calculation
|
||||||
dataManager.addToAllTables( key, datum, QualityUtils.MIN_USABLE_Q_SCORE ); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
|
|
||||||
|
dataManager.addToAllTables( key, datum, QualityUtils.MIN_USABLE_Q_SCORE, errorModel ); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] recalibrateRead( final GATKSAMRecord read, final byte[] originalQuals, final BaseRecalibrationType modelType ) {
|
public void recalibrateRead( final GATKSAMRecord read ) {
|
||||||
|
|
||||||
final byte[] recalQuals = originalQuals.clone();
|
|
||||||
|
|
||||||
//compute all covariate values for this read
|
//compute all covariate values for this read
|
||||||
final Comparable[][] covariateValues_offset_x_covar =
|
RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||||
RecalDataManager.computeCovariates(read, requestedCovariates, modelType);
|
final CovariateKeySet covariateKeySet = RecalDataManager.getAllCovariateValuesFor( read );
|
||||||
|
|
||||||
|
for( final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values() ) {
|
||||||
|
final byte[] originalQuals = ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ? read.getBaseQualities() :
|
||||||
|
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_INSERTION ? read.getBaseDeletionQualities() :
|
||||||
|
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_DELETION ? read.getBaseDeletionQualities() : null ) ) );
|
||||||
|
final byte[] recalQuals = originalQuals.clone();
|
||||||
|
|
||||||
// For each base in the read
|
// For each base in the read
|
||||||
for( int offset = 0; offset < read.getReadLength(); offset++ ) {
|
for( int offset = 0; offset < read.getReadLength(); offset++ ) {
|
||||||
|
|
||||||
final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
|
final Object[] fullCovariateKey =
|
||||||
|
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ? covariateKeySet.getMismatchesKeySet(offset) :
|
||||||
|
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_INSERTION ? covariateKeySet.getInsertionsKeySet(offset) :
|
||||||
|
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_DELETION ? covariateKeySet.getDeletionsKeySet(offset) : null ) ) );
|
||||||
|
|
||||||
Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
|
Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
|
||||||
if(qualityScore == null)
|
if( qualityScore == null ) {
|
||||||
{
|
qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
|
||||||
qualityScore = performSequentialQualityCalculation( fullCovariateKey );
|
|
||||||
qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
|
qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -203,8 +206,21 @@ public class BaseRecalibration {
|
||||||
}
|
}
|
||||||
|
|
||||||
preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
|
preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
|
||||||
|
switch (errorModel) {
|
||||||
|
case BASE_SUBSTITUTION:
|
||||||
|
read.setBaseQualities( recalQuals );
|
||||||
|
break;
|
||||||
|
case BASE_INSERTION:
|
||||||
|
read.setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, recalQuals );
|
||||||
|
break;
|
||||||
|
case BASE_DELETION:
|
||||||
|
read.setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, recalQuals );
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return recalQuals;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -222,7 +238,7 @@ public class BaseRecalibration {
|
||||||
* @param key The list of Comparables that were calculated from the covariates
|
* @param key The list of Comparables that were calculated from the covariates
|
||||||
* @return A recalibrated quality score as a byte
|
* @return A recalibrated quality score as a byte
|
||||||
*/
|
*/
|
||||||
private byte performSequentialQualityCalculation( final Object... key ) {
|
private byte performSequentialQualityCalculation( final RecalDataManager.BaseRecalibrationType errorModel, final Object... key ) {
|
||||||
|
|
||||||
final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
|
final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
|
||||||
final Object[] readGroupCollapsedKey = new Object[1];
|
final Object[] readGroupCollapsedKey = new Object[1];
|
||||||
|
|
@ -231,7 +247,7 @@ public class BaseRecalibration {
|
||||||
|
|
||||||
// The global quality shift (over the read group only)
|
// The global quality shift (over the read group only)
|
||||||
readGroupCollapsedKey[0] = key[0];
|
readGroupCollapsedKey[0] = key[0];
|
||||||
final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey ));
|
final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0, errorModel).get( readGroupCollapsedKey ));
|
||||||
double globalDeltaQ = 0.0;
|
double globalDeltaQ = 0.0;
|
||||||
if( globalRecalDatum != null ) {
|
if( globalRecalDatum != null ) {
|
||||||
final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
|
final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
|
||||||
|
|
@ -242,7 +258,7 @@ public class BaseRecalibration {
|
||||||
// The shift in quality between reported and empirical
|
// The shift in quality between reported and empirical
|
||||||
qualityScoreCollapsedKey[0] = key[0];
|
qualityScoreCollapsedKey[0] = key[0];
|
||||||
qualityScoreCollapsedKey[1] = key[1];
|
qualityScoreCollapsedKey[1] = key[1];
|
||||||
final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey ));
|
final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1, errorModel).get( qualityScoreCollapsedKey ));
|
||||||
double deltaQReported = 0.0;
|
double deltaQReported = 0.0;
|
||||||
if( qReportedRecalDatum != null ) {
|
if( qReportedRecalDatum != null ) {
|
||||||
final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
|
final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
|
||||||
|
|
@ -256,7 +272,7 @@ public class BaseRecalibration {
|
||||||
covariateCollapsedKey[1] = key[1];
|
covariateCollapsedKey[1] = key[1];
|
||||||
for( int iii = 2; iii < key.length; iii++ ) {
|
for( int iii = 2; iii < key.length; iii++ ) {
|
||||||
covariateCollapsedKey[2] = key[iii]; // The given covariate
|
covariateCollapsedKey[2] = key[iii]; // The given covariate
|
||||||
final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( covariateCollapsedKey ));
|
final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii, errorModel).get( covariateCollapsedKey ));
|
||||||
if( covariateRecalDatum != null ) {
|
if( covariateRecalDatum != null ) {
|
||||||
deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
|
deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
|
||||||
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
|
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
|
||||||
|
|
@ -265,18 +281,6 @@ public class BaseRecalibration {
|
||||||
|
|
||||||
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
||||||
return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
|
return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
|
||||||
|
|
||||||
// Verbose printouts used to validate with old recalibrator
|
|
||||||
//if(key.contains(null)) {
|
|
||||||
// System.out.println( key + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d",
|
|
||||||
// qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte));
|
|
||||||
//}
|
|
||||||
//else {
|
|
||||||
// System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d",
|
|
||||||
// key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
|
|
||||||
//}
|
|
||||||
|
|
||||||
//return newQualityByte;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -291,5 +295,4 @@ public class BaseRecalibration {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,6 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
// Base Quality Score Recalibrator specific attribute tags
|
// Base Quality Score Recalibrator specific attribute tags
|
||||||
public static final String BQSR_BASE_INSERTION_QUALITIES = "BI";
|
public static final String BQSR_BASE_INSERTION_QUALITIES = "BI";
|
||||||
public static final String BQSR_BASE_DELETION_QUALITIES = "BD";
|
public static final String BQSR_BASE_DELETION_QUALITIES = "BD";
|
||||||
public static final String BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG = "BR";
|
|
||||||
|
|
||||||
// the SAMRecord data we're caching
|
// the SAMRecord data we're caching
|
||||||
private String mReadString = null;
|
private String mReadString = null;
|
||||||
|
|
@ -163,27 +162,6 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
return super.equals(o);
|
return super.equals(o);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public byte[] getBaseQualities() {
|
|
||||||
return super.getBaseQualities();
|
|
||||||
/*
|
|
||||||
if( getAttribute( BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG ) != null ) {
|
|
||||||
return super.getBaseQualities();
|
|
||||||
} else {
|
|
||||||
// if the recal data was populated in the engine then recalibrate the quality scores on the fly
|
|
||||||
if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
|
|
||||||
final byte[] quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, super.getBaseQualities() );
|
|
||||||
setBaseQualities(quals);
|
|
||||||
setAttribute( BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG, true );
|
|
||||||
return quals;
|
|
||||||
} else { // just use the qualities that are in the read since we don't have the sufficient information to recalibrate on the fly
|
|
||||||
return super.getBaseQualities();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Accessors for base insertion and base deletion quality scores
|
* Accessors for base insertion and base deletion quality scores
|
||||||
*/
|
*/
|
||||||
|
|
@ -191,13 +169,8 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
byte[] quals = getByteArrayAttribute( BQSR_BASE_INSERTION_QUALITIES );
|
byte[] quals = getByteArrayAttribute( BQSR_BASE_INSERTION_QUALITIES );
|
||||||
if( quals == null ) {
|
if( quals == null ) {
|
||||||
quals = new byte[getBaseQualities().length];
|
quals = new byte[getBaseQualities().length];
|
||||||
Arrays.fill(quals, (byte) 45); // allow for differing default values between BaseInsertions and BaseDeletions
|
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
||||||
// if the recal data was populated in the engine then recalibrate the quality scores on the fly
|
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
|
||||||
// else give default values which are flat Q45
|
|
||||||
if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
|
|
||||||
quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals, BaseRecalibration.BaseRecalibrationType.BASE_INSERTION ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
|
|
||||||
}
|
|
||||||
// add the qual array to the read so that we don't have to do the recalibration work again
|
|
||||||
setAttribute( BQSR_BASE_INSERTION_QUALITIES, quals );
|
setAttribute( BQSR_BASE_INSERTION_QUALITIES, quals );
|
||||||
}
|
}
|
||||||
return quals;
|
return quals;
|
||||||
|
|
@ -207,13 +180,8 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
byte[] quals = getByteArrayAttribute( BQSR_BASE_DELETION_QUALITIES );
|
byte[] quals = getByteArrayAttribute( BQSR_BASE_DELETION_QUALITIES );
|
||||||
if( quals == null ) {
|
if( quals == null ) {
|
||||||
quals = new byte[getBaseQualities().length];
|
quals = new byte[getBaseQualities().length];
|
||||||
Arrays.fill(quals, (byte) 45);
|
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
||||||
// if the recal data was populated in the engine then recalibrate the quality scores on the fly
|
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
|
||||||
// else give default values which are flat Q45
|
|
||||||
if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
|
|
||||||
quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals, BaseRecalibration.BaseRecalibrationType.BASE_DELETION ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
|
|
||||||
}
|
|
||||||
// add the qual array to the read so that we don't have to do the recalibration work again
|
|
||||||
setAttribute( BQSR_BASE_DELETION_QUALITIES, quals );
|
setAttribute( BQSR_BASE_DELETION_QUALITIES, quals );
|
||||||
}
|
}
|
||||||
return quals;
|
return quals;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue