diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 58c2df877..70fe43755 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -107,7 +107,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc()); - final M x = walker.map( activeRegion, null ); // BUGBUG: tracker needs to be filled in and passed to the walker + final M x = walker.map( activeRegion, null ); return walker.reduce( x, sum ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index 244870c78..6403f15a2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -5,14 +5,12 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.IntervalBinding; import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter; import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter; import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -77,7 +75,7 @@ public abstract class ActiveRegionWalker extends Walker 0 to be used, otherwise we use the quality from the read. - private byte defaultInsertionsQuality; // walker parameter. Must be > 0 to be used, otherwise we use the quality from the read. - private byte defaultDeletionsQuality; // walker parameter. Must be > 0 to be used, otherwise we use the quality from the read. - // Initialize any member variables using the command-line arguments passed to the walkers @Override public void initialize(final RecalibrationArgumentCollection RAC) { - defaultMismatchesQuality = RAC.MISMATCHES_DEFAULT_QUALITY; - defaultInsertionsQuality = RAC.INSERTIONS_DEFAULT_QUALITY; - defaultDeletionsQuality = RAC.DELETIONS_DEFAULT_QUALITY; } @Override public CovariateValues getValues(final GATKSAMRecord read) { int readLength = read.getReadLength(); - - Byte [] mismatches = new Byte[readLength]; - Byte [] insertions = new Byte[readLength]; - Byte [] deletions = new Byte[readLength]; - + + Integer [] mismatches = new Integer[readLength]; + Integer [] insertions = new Integer[readLength]; + Integer [] deletions = new Integer[readLength]; + byte [] baseQualities = read.getBaseQualities(); + byte [] baseInsertionQualities = read.getBaseInsertionQualities(); + byte [] baseDeletionQualities = read.getBaseDeletionQualities(); - if (defaultMismatchesQuality >= 0) - Arrays.fill(mismatches, defaultMismatchesQuality); // if the user decides to override the base qualities in the read, use the flat value - else { - for (int i=0; i dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed + public final NestedHashMap nestedHashMap; // The full dataset + private final HashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed + private final HashMap dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed + private final HashMap> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed - public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color - private static boolean warnUserNullReadGroup = false; private static boolean warnUserNullPlatform = false; private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ + public enum BaseRecalibrationType { + BASE_SUBSTITUTION, + BASE_INSERTION, + BASE_DELETION + } public enum SOLID_RECAL_MODE { /** @@ -109,13 +113,18 @@ public class RecalDataManager { } public RecalDataManager(final boolean createCollapsedTables, final int numCovariates) { - if (createCollapsedTables) { // Initialize all the collapsed tables, only used by TableRecalibrationWalker + if (createCollapsedTables) { // Initialize all the collapsed tables, only used by on-the-fly recalibration nestedHashMap = null; - dataCollapsedReadGroup = new NestedHashMap(); - dataCollapsedQualityScore = new NestedHashMap(); - dataCollapsedByCovariate = new ArrayList(); - for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate - dataCollapsedByCovariate.add(new NestedHashMap()); + dataCollapsedReadGroup = new HashMap(); + dataCollapsedQualityScore = new HashMap(); + dataCollapsedByCovariate = new HashMap>(); + for ( final BaseRecalibrationType errorModel : BaseRecalibrationType.values() ) { + dataCollapsedReadGroup.put(errorModel, new NestedHashMap()); + dataCollapsedQualityScore.put(errorModel, new NestedHashMap()); + dataCollapsedByCovariate.put(errorModel, new ArrayList()); + for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate + dataCollapsedByCovariate.get(errorModel).add(new NestedHashMap()); + } } } else { @@ -137,7 +146,7 @@ public class RecalDataManager { * @param fullDatum The RecalDatum which is the data for this mapping * @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table */ - public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN) { + public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN, final BaseRecalibrationType errorModel ) { // The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around //data.put(key, thisDatum); // add the mapping to the main table @@ -151,9 +160,9 @@ public class RecalDataManager { // Create dataCollapsedReadGroup, the table where everything except read group has been collapsed if (qualityScore >= PRESERVE_QSCORES_LESS_THAN) { readGroupCollapsedKey[0] = key[0]; // Make a new key with just the read group - collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get(readGroupCollapsedKey); + collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get(errorModel).get(readGroupCollapsedKey); if (collapsedDatum == null) { - dataCollapsedReadGroup.put(new RecalDatum(fullDatum), readGroupCollapsedKey); + dataCollapsedReadGroup.get(errorModel).put(new RecalDatum(fullDatum), readGroupCollapsedKey); } else { collapsedDatum.combine(fullDatum); // using combine instead of increment in order to calculate overall aggregateQReported @@ -163,9 +172,9 @@ public class RecalDataManager { // Create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed qualityScoreCollapsedKey[0] = key[0]; // Make a new key with the read group ... qualityScoreCollapsedKey[1] = key[1]; // and quality score - collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get(qualityScoreCollapsedKey); + collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get(errorModel).get(qualityScoreCollapsedKey); if (collapsedDatum == null) { - dataCollapsedQualityScore.put(new RecalDatum(fullDatum), qualityScoreCollapsedKey); + dataCollapsedQualityScore.get(errorModel).put(new RecalDatum(fullDatum), qualityScoreCollapsedKey); } else { collapsedDatum.increment(fullDatum); @@ -178,9 +187,9 @@ public class RecalDataManager { final Object theCovariateElement = key[iii + 2]; // and the given covariate if (theCovariateElement != null) { covariateCollapsedKey[2] = theCovariateElement; - collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(iii).get(covariateCollapsedKey); + collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(errorModel).get(iii).get(covariateCollapsedKey); if (collapsedDatum == null) { - dataCollapsedByCovariate.get(iii).put(new RecalDatum(fullDatum), covariateCollapsedKey); + dataCollapsedByCovariate.get(errorModel).get(iii).put(new RecalDatum(fullDatum), covariateCollapsedKey); } else { collapsedDatum.increment(fullDatum); @@ -198,11 +207,13 @@ public class RecalDataManager { */ public final void generateEmpiricalQualities(final int smoothing, final int maxQual) { - recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.data, smoothing, maxQual); - recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.data, smoothing, maxQual); - for (NestedHashMap map : dataCollapsedByCovariate) { - recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual); - checkForSingletons(map.data); + for( final BaseRecalibrationType errorModel : BaseRecalibrationType.values() ) { + recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.get(errorModel).data, smoothing, maxQual); + recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.get(errorModel).data, smoothing, maxQual); + for (NestedHashMap map : dataCollapsedByCovariate.get(errorModel)) { + recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual); + checkForSingletons(map.data); + } } } @@ -241,15 +252,15 @@ public class RecalDataManager { * @param covariate Which covariate indexes the desired collapsed HashMap * @return The desired collapsed HashMap */ - public final NestedHashMap getCollapsedTable(final int covariate) { + public final NestedHashMap getCollapsedTable(final int covariate, final BaseRecalibrationType errorModel) { if (covariate == 0) { - return dataCollapsedReadGroup; // Table where everything except read group has been collapsed + return dataCollapsedReadGroup.get(errorModel); // Table where everything except read group has been collapsed } else if (covariate == 1) { - return dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed + return dataCollapsedQualityScore.get(errorModel); // Table where everything except read group and quality score has been collapsed } else { - return dataCollapsedByCovariate.get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed + return dataCollapsedByCovariate.get(errorModel).get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed } } @@ -260,7 +271,7 @@ public class RecalDataManager { * @param RAC The list of shared command line arguments */ public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) { - GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord) read).getReadGroup(); + GATKSAMReadGroupRecord readGroup = read.getReadGroup(); if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) { readGroup.setPlatform(RAC.FORCE_PLATFORM); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java index 875782fdc..e1a7772db 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java @@ -56,7 +56,7 @@ public class ContextCovariate implements ExperimentalCovariate { } @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) { + public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { byte[] bases = read.getReadBases(); for (int i = 0; i < read.getReadLength(); i++) comparable[i] = (i < CONTEXT_SIZE) ? allN : new String(Arrays.copyOfRange(bases, i - CONTEXT_SIZE, i)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index 626460be6..a99f35f45 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -378,7 +378,7 @@ public class CountCovariatesWalker extends LocusWalker dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap final int readLength = read.getReadLength(); final boolean negativeStrand = read.getReadNegativeStrandFlag(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java index 7b209ae5c..14ffd35a4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java @@ -82,7 +82,7 @@ public class GCContentCovariate implements ExperimentalCovariate { } @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) { + public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { for (int iii = 0; iii < read.getReadLength(); iii++) { comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java index fd67edc3b..004fb0bdb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java @@ -95,7 +95,7 @@ public class HomopolymerCovariate implements ExperimentalCovariate { } @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) { + public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { for (int iii = 0; iii < read.getReadLength(); iii++) { comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java index e22049890..54fa18106 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java @@ -55,7 +55,7 @@ public class MappingQualityCovariate implements ExperimentalCovariate { } @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) { + public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { for (int iii = 0; iii < read.getReadLength(); iii++) { comparable[iii] = getValue(read); // BUGBUG: this can be optimized } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java index 1dfb915b9..ecaa55006 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java @@ -65,7 +65,7 @@ public class MinimumNQSCovariate implements ExperimentalCovariate { } @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) { + public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { for (int iii = 0; iii < read.getReadLength(); iii++) { comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java index fbd1efc47..fd720697f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java @@ -55,7 +55,7 @@ public class PositionCovariate implements ExperimentalCovariate { } @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) { + public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { for (int iii = 0; iii < read.getReadLength(); iii++) { comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java index 8dfa11884..d6bdea5bf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java @@ -62,7 +62,7 @@ public class PrimerRoundCovariate implements ExperimentalCovariate { } @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) { + public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { for (int iii = 0; iii < read.getReadLength(); iii++) { comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java index 1ed4a6fe8..a29a0530c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java @@ -46,16 +46,10 @@ public class QualityScoreCovariate implements RequiredCovariate { } @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) { - if (modelType == BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION) { - byte[] baseQualities = read.getBaseQualities(); - for (int i = 0; i < read.getReadLength(); i++) { - comparable[i] = (int) baseQualities[i]; - } - } - else { // model == BASE_INSERTION || model == BASE_DELETION - Arrays.fill(comparable, 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will - // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45 + public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { + byte[] baseQualities = read.getBaseQualities(); + for (int i = 0; i < read.getReadLength(); i++) { + comparable[i] = (int) baseQualities[i]; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java index 27e1d8263..33adf4417 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java @@ -44,7 +44,7 @@ public class ReadGroupCovariate implements RequiredCovariate { } @Override - public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) { + public void getValues(final GATKSAMRecord read, final Comparable[] comparable) { final String readGroupId = read.getReadGroup().getReadGroupId(); for (int i = 0; i < read.getReadLength(); i++) { comparable[i] = readGroupId; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index 311e33f8a..1a6b8cfcb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -63,7 +63,6 @@ public class RecalDataManager { public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color - private static boolean warnUserNullReadGroup = false; private static boolean warnUserNullPlatform = false; public enum SOLID_RECAL_MODE { @@ -604,7 +603,7 @@ public class RecalDataManager { * value for the ith position in the read and the jth covariate in * reqeustedCovariates list. */ - public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List requestedCovariates, final BaseRecalibration.BaseRecalibrationType modelType) { + public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List requestedCovariates) { //compute all covariates for this read final int numRequestedCovariates = requestedCovariates.size(); final int readLength = gatkRead.getReadLength(); @@ -613,7 +612,7 @@ public class RecalDataManager { final Comparable[] tempCovariateValuesHolder = new Comparable[readLength]; for (int i = 0; i < numRequestedCovariates; i++) { // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read - requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder, modelType); + requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder); for (int j = 0; j < readLength; j++) covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j]; // copy values into a 2D array that allows all covar types to be extracted at once for an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types. } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index cd848cd9e..08151321f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -405,7 +405,7 @@ public class TableRecalibrationWalker extends ReadWalker requestedCovariates = new ArrayList(); // List of covariates to be used in this calculation public static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*"); public static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*"); public static final String EOF_MARKER = "EOF"; private static final int MAX_QUALITY_SCORE = 65; //BUGBUG: what value to use here? - private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values. + private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(...) for all sets of covariate values. public BaseRecalibration( final File RECAL_FILE ) { // Get a list of all available covariates @@ -89,7 +81,7 @@ public class BaseRecalibration { throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE ); } else { // Found the covariate list in input file, loop through all of them and instantiate them String[] vals = line.split(","); - for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical + for( int iii = 0; iii < vals.length - 4; iii++ ) { // There are n-4 covariates. The last four items are ErrorModel, nObservations, nMismatch, and Qempirical boolean foundClass = false; for( Class covClass : classes ) { if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) { @@ -160,7 +152,7 @@ public class BaseRecalibration { final String[] vals = line.split(","); // Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly - if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical + if( vals.length != requestedCovariates.size() + 4 ) { // +4 because of ErrorModel, nObservations, nMismatch, and Qempirical throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line + " --Perhaps the read group string contains a comma and isn't being parsed correctly."); } @@ -172,39 +164,63 @@ public class BaseRecalibration { cov = requestedCovariates.get( iii ); key[iii] = cov.getValue( vals[iii] ); } - + final String modelString = vals[iii++]; + final RecalDataManager.BaseRecalibrationType errorModel = ( modelString.equals(CovariateKeySet.mismatchesCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION : + ( modelString.equals(CovariateKeySet.insertionsCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_INSERTION : + ( modelString.equals(CovariateKeySet.deletionsCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_DELETION : null ) ) ); + // Create a new datum using the number of observations, number of mismatches, and reported quality score final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 ); // Add that datum to all the collapsed tables which will be used in the sequential calculation - dataManager.addToAllTables( key, datum, QualityUtils.MIN_USABLE_Q_SCORE ); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter + + dataManager.addToAllTables( key, datum, QualityUtils.MIN_USABLE_Q_SCORE, errorModel ); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter } - public byte[] recalibrateRead( final GATKSAMRecord read, final byte[] originalQuals, final BaseRecalibrationType modelType ) { + public void recalibrateRead( final GATKSAMRecord read ) { - final byte[] recalQuals = originalQuals.clone(); - //compute all covariate values for this read - final Comparable[][] covariateValues_offset_x_covar = - RecalDataManager.computeCovariates(read, requestedCovariates, modelType); - - // For each base in the read - for( int offset = 0; offset < read.getReadLength(); offset++ ) { - - final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset]; - - Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey); - if(qualityScore == null) - { - qualityScore = performSequentialQualityCalculation( fullCovariateKey ); - qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey); - } - - recalQuals[offset] = qualityScore; - } - - preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low + RecalDataManager.computeCovariates(read, requestedCovariates); + final CovariateKeySet covariateKeySet = RecalDataManager.getAllCovariateValuesFor( read ); + + for( final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values() ) { + final byte[] originalQuals = ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ? read.getBaseQualities() : + ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_INSERTION ? read.getBaseDeletionQualities() : + ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_DELETION ? read.getBaseDeletionQualities() : null ) ) ); + final byte[] recalQuals = originalQuals.clone(); + + // For each base in the read + for( int offset = 0; offset < read.getReadLength(); offset++ ) { - return recalQuals; + final Object[] fullCovariateKey = + ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ? covariateKeySet.getMismatchesKeySet(offset) : + ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_INSERTION ? covariateKeySet.getInsertionsKeySet(offset) : + ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_DELETION ? covariateKeySet.getDeletionsKeySet(offset) : null ) ) ); + + Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey); + if( qualityScore == null ) { + qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey ); + qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey); + } + + recalQuals[offset] = qualityScore; + } + + preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low + switch (errorModel) { + case BASE_SUBSTITUTION: + read.setBaseQualities( recalQuals ); + break; + case BASE_INSERTION: + read.setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, recalQuals ); + break; + case BASE_DELETION: + read.setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, recalQuals ); + break; + default: + throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel ); + } + } + } /** @@ -222,7 +238,7 @@ public class BaseRecalibration { * @param key The list of Comparables that were calculated from the covariates * @return A recalibrated quality score as a byte */ - private byte performSequentialQualityCalculation( final Object... key ) { + private byte performSequentialQualityCalculation( final RecalDataManager.BaseRecalibrationType errorModel, final Object... key ) { final byte qualFromRead = (byte)Integer.parseInt(key[1].toString()); final Object[] readGroupCollapsedKey = new Object[1]; @@ -231,7 +247,7 @@ public class BaseRecalibration { // The global quality shift (over the read group only) readGroupCollapsedKey[0] = key[0]; - final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey )); + final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0, errorModel).get( readGroupCollapsedKey )); double globalDeltaQ = 0.0; if( globalRecalDatum != null ) { final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality(); @@ -242,7 +258,7 @@ public class BaseRecalibration { // The shift in quality between reported and empirical qualityScoreCollapsedKey[0] = key[0]; qualityScoreCollapsedKey[1] = key[1]; - final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey )); + final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1, errorModel).get( qualityScoreCollapsedKey )); double deltaQReported = 0.0; if( qReportedRecalDatum != null ) { final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality(); @@ -256,7 +272,7 @@ public class BaseRecalibration { covariateCollapsedKey[1] = key[1]; for( int iii = 2; iii < key.length; iii++ ) { covariateCollapsedKey[2] = key[iii]; // The given covariate - final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( covariateCollapsedKey )); + final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii, errorModel).get( covariateCollapsedKey )); if( covariateRecalDatum != null ) { deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality(); deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) ); @@ -265,18 +281,6 @@ public class BaseRecalibration { final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE ); - - // Verbose printouts used to validate with old recalibrator - //if(key.contains(null)) { - // System.out.println( key + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d", - // qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte)); - //} - //else { - // System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d", - // key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) ); - //} - - //return newQualityByte; } /** @@ -291,5 +295,4 @@ public class BaseRecalibration { } } } - } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index bdcf2b210..f6b3d759c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -54,7 +54,6 @@ public class GATKSAMRecord extends BAMRecord { // Base Quality Score Recalibrator specific attribute tags public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; - public static final String BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG = "BR"; // the SAMRecord data we're caching private String mReadString = null; @@ -163,27 +162,6 @@ public class GATKSAMRecord extends BAMRecord { return super.equals(o); } - - @Override - public byte[] getBaseQualities() { - return super.getBaseQualities(); - /* - if( getAttribute( BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG ) != null ) { - return super.getBaseQualities(); - } else { - // if the recal data was populated in the engine then recalibrate the quality scores on the fly - if( GenomeAnalysisEngine.hasBaseRecalibration() ) { - final byte[] quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, super.getBaseQualities() ); - setBaseQualities(quals); - setAttribute( BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG, true ); - return quals; - } else { // just use the qualities that are in the read since we don't have the sufficient information to recalibrate on the fly - return super.getBaseQualities(); - } - } - */ - } - /** * Accessors for base insertion and base deletion quality scores */ @@ -191,13 +169,8 @@ public class GATKSAMRecord extends BAMRecord { byte[] quals = getByteArrayAttribute( BQSR_BASE_INSERTION_QUALITIES ); if( quals == null ) { quals = new byte[getBaseQualities().length]; - Arrays.fill(quals, (byte) 45); // allow for differing default values between BaseInsertions and BaseDeletions - // if the recal data was populated in the engine then recalibrate the quality scores on the fly - // else give default values which are flat Q45 - if( GenomeAnalysisEngine.hasBaseRecalibration() ) { - quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals, BaseRecalibration.BaseRecalibrationType.BASE_INSERTION ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities - } - // add the qual array to the read so that we don't have to do the recalibration work again + Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will + // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45 setAttribute( BQSR_BASE_INSERTION_QUALITIES, quals ); } return quals; @@ -207,13 +180,8 @@ public class GATKSAMRecord extends BAMRecord { byte[] quals = getByteArrayAttribute( BQSR_BASE_DELETION_QUALITIES ); if( quals == null ) { quals = new byte[getBaseQualities().length]; - Arrays.fill(quals, (byte) 45); - // if the recal data was populated in the engine then recalibrate the quality scores on the fly - // else give default values which are flat Q45 - if( GenomeAnalysisEngine.hasBaseRecalibration() ) { - quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals, BaseRecalibration.BaseRecalibrationType.BASE_DELETION ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities - } - // add the qual array to the read so that we don't have to do the recalibration work again + Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will + // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45 setAttribute( BQSR_BASE_DELETION_QUALITIES, quals ); } return quals;