diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java index e6be01b82..e5c952b76 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java @@ -80,7 +80,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); final RecalDatum rgThisDatum = createDatumObject(qual, isError); - if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it + if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it rgRecalTable.put(rgThisDatum, keys[0], eventIndex); else rgPreviousDatum.combine(rgThisDatum); @@ -126,7 +126,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); final RecalDatum rgThisDatum = createDatumObject(qual, isError); - if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it + if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it rgRecalTable.put(rgThisDatum, keys[0], eventIndex); else rgPreviousDatum.combine(rgThisDatum); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index dee470cb3..e95af71c2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -32,13 +32,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -299,6 +297,4 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat return table; } - - } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index ea9d0976a..30d2e24ef 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -106,26 +106,26 @@ import java.util.ArrayList; @DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) @By(DataSource.READS) -@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file -@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality -@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta +@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file +@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality +@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta public class BaseRecalibrator extends LocusWalker implements TreeReducible { @ArgumentCollection - private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates + private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates - private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization + private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization private RecalibrationTables recalibrationTables; - private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental) + private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental) private RecalibrationEngine recalibrationEngine; private int minimumQToUse; - protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. - protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. - protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ + protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. + protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. + protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation."; @@ -143,16 +143,16 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed if (RAC.FORCE_PLATFORM != null) RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; - if (RAC.knownSites.isEmpty() && !RAC.RUN_WITHOUT_DBSNP) // Warn the user if no dbSNP file or other variant mask was specified + if (RAC.knownSites.isEmpty() && !RAC.RUN_WITHOUT_DBSNP) // Warn the user if no dbSNP file or other variant mask was specified throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION); if (RAC.LIST_ONLY) { RecalUtils.listAvailableCovariates(logger); System.exit(0); } - RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table + RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table - Pair, ArrayList> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates + Pair, ArrayList> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates ArrayList requiredCovariates = covariates.getFirst(); ArrayList optionalCovariates = covariates.getSecond(); @@ -164,9 +164,9 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed requestedCovariates[covariateIndex++] = covariate; logger.info("The covariates being used here: "); - for (Covariate cov : requestedCovariates) { // list all the covariates being used + for (Covariate cov : requestedCovariates) { // list all the covariates being used logger.info("\t" + cov.getClass().getSimpleName()); - cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection + cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection } int numReadGroups = 0; @@ -216,12 +216,14 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed */ public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { long countedSites = 0L; - if (tracker.getValues(RAC.knownSites).size() == 0) { // Only analyze sites not present in the provided known sites + // Only analyze sites not present in the provided known sites + if (tracker.getValues(RAC.knownSites).size() == 0) { for (final PileupElement p : context.getBasePileup()) { final GATKSAMRecord read = p.getRead(); final int offset = p.getOffset(); - if (readHasBeenSkipped(read) || isLowQualityBase(read, offset)) // This read has been marked to be skipped or base is low quality (we don't recalibrate low quality bases) + // This read has been marked to be skipped or base is low quality (we don't recalibrate low quality bases) + if (readHasBeenSkipped(read) || isLowQualityBase(read, offset)) continue; if (readNotSeen(read)) { @@ -234,10 +236,12 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates)); } - if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it + // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it + if (!ReadUtils.isSOLiDRead(read) || RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING || RecalUtils.isColorSpaceConsistent(read, offset)) - recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap + // This base finally passed all the checks for a good base, so add it to the big data hashmap + recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); } countedSites++; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java index 5459e9cfa..76a82a134 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java @@ -68,7 +68,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); final RecalDatum rgThisDatum = createDatumObject(qual, isError); - if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it + if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it rgRecalTable.put(rgThisDatum, keys[0], eventIndex); else rgPreviousDatum.combine(rgThisDatum); diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index c09eb0063..a563b18fc 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -48,9 +48,9 @@ public class BaseRecalibration { private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000; private final ReadCovariates readCovariates; - private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done) + private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done) private final RecalibrationTables recalibrationTables; - private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation + private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation private final boolean disableIndelQuals; private final int preserveQLessThan; @@ -76,9 +76,9 @@ public class BaseRecalibration { recalibrationTables = recalibrationReport.getRecalibrationTables(); requestedCovariates = recalibrationReport.getRequestedCovariates(); quantizationInfo = recalibrationReport.getQuantizationInfo(); - if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores + if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores quantizationInfo.noQuantization(); - else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report. + else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report. quantizationInfo.quantizeQualityScores(quantizationLevels); readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); @@ -103,24 +103,26 @@ public class BaseRecalibration { } } - RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read - for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings + // Compute all covariates for the read + RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); + + for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { read.setBaseQualities(null, errorModel); continue; } final byte[] quals = read.getBaseQualities(errorModel); - final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model + final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model final int readLength = read.getReadLength(); - for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read + for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read final byte originalQualityScore = quals[offset]; - if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) - final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model - final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base + if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) + final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model + final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base quals[offset] = recalibratedQualityScore; } } @@ -152,10 +154,10 @@ public class BaseRecalibration { final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead); final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, key, errorModel, globalDeltaQ, deltaQReported, qualFromRead); - double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula - recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL + double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula + recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL - return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality + return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality } private double calculateGlobalDeltaQ(final NestedIntegerArray table, final int[] key, final EventType errorModel) { diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java index f1f702a38..d3c6c3d83 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java @@ -30,7 +30,7 @@ public class QuantizationInfo { } public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) { - final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution + final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution for (int i = 0; i < qualHistogram.length; i++) qualHistogram[i] = 0L; @@ -38,10 +38,10 @@ public class QuantizationInfo { for (final RecalDatum value : qualTable.getAllValues()) { final RecalDatum datum = value; - final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL ) - qualHistogram[empiricalQual] += (long) datum.getNumObservations(); // add the number of observations for every key + final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL ) + qualHistogram[empiricalQual] += (long) datum.getNumObservations(); // add the number of observations for every key } - empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities + empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities quantizeQualityScores(quantizationLevels); this.quantizationLevels = quantizationLevels; @@ -49,8 +49,8 @@ public class QuantizationInfo { public void quantizeQualityScores(int nLevels) { - QualQuantizer quantizer = new QualQuantizer(empiricalQualCounts, nLevels, QualityUtils.MIN_USABLE_Q_SCORE); // quantize the qualities to the desired number of levels - quantizedQuals = quantizer.getOriginalToQuantizedMap(); // map with the original to quantized qual map (using the standard number of levels in the RAC) + QualQuantizer quantizer = new QualQuantizer(empiricalQualCounts, nLevels, QualityUtils.MIN_USABLE_Q_SCORE); // quantize the qualities to the desired number of levels + quantizedQuals = quantizer.getOriginalToQuantizedMap(); // map with the original to quantized qual map (using the standard number of levels in the RAC) } public void noQuantization() { diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index 8d2e799a0..20aabdb83 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -81,8 +81,8 @@ public class RecalUtils { public final static String NUMBER_OBSERVATIONS_COLUMN_NAME = "Observations"; public final static String NUMBER_ERRORS_COLUMN_NAME = "Errors"; - private final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams - private final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color + private final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams + private final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color private static boolean warnUserNullPlatform = false; private static final String SCRIPT_FILE = "BQSR.R"; @@ -111,12 +111,13 @@ public class RecalUtils { final List> requiredClasses = new PluginManager(RequiredCovariate.class).getPlugins(); final List> standardClasses = new PluginManager(StandardCovariate.class).getPlugins(); - final ArrayList requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates + final ArrayList requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates ArrayList optionalCovariates = new ArrayList(); if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES) - optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user + optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user - if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified + // parse the -cov arguments that were provided, skipping over the ones already specified + if (argumentCollection.COVARIATES != null) { for (String requestedCovariateString : argumentCollection.COVARIATES) { // help the transition from BQSR v1 to BQSR v2 if ( requestedCovariateString.equals("DinucCovariate") ) @@ -126,12 +127,12 @@ public class RecalUtils { boolean foundClass = false; for (Class covClass : covariateClasses) { - if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class + if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class foundClass = true; if (!requiredClasses.contains(covClass) && (argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) { try { - final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it + final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it optionalCovariates.add(covariate); } catch (Exception e) { throw new DynamicClassResolutionException(covClass, e); @@ -161,7 +162,7 @@ public class RecalUtils { if (classes.size() != 2) throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected"); - dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next. + dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next. dest.add(new QualityScoreCovariate()); return dest; } @@ -266,20 +267,20 @@ public class RecalUtils { for (int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++) { - final ArrayList> columnNames = new ArrayList>(); // initialize the array to hold the column names - columnNames.add(new Pair(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future + final ArrayList> columnNames = new ArrayList>(); // initialize the array to hold the column names + columnNames.add(new Pair(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) { - columnNames.add(new Pair(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future + columnNames.add(new Pair(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) { columnNames.add(covariateValue); columnNames.add(covariateName); } } - columnNames.add(eventType); // the order of these column names is important here + columnNames.add(eventType); // the order of these column names is important here columnNames.add(empiricalQuality); if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index) - columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported + columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported columnNames.add(nObservations); columnNames.add(nErrors); @@ -288,7 +289,7 @@ public class RecalUtils { reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size()); for (final Pair columnName : columnNames) reportTable.addColumn(columnName.getFirst(), columnName.getSecond()); - rowIndex = 0; // reset the row index since we're starting with a new table + rowIndex = 0; // reset the row index since we're starting with a new table } else { reportTable = result.get(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index); } @@ -316,7 +317,7 @@ public class RecalUtils { reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality()); if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index) - reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table + reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), Math.round(datum.getNumObservations())); reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), Math.round(datum.getNumMismatches())); @@ -349,7 +350,6 @@ public class RecalUtils { return Utils.join(",", names); } - public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) { outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile); } @@ -410,13 +410,13 @@ public class RecalUtils { // add the quality score table to the delta table final NestedIntegerArray qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); - for (final NestedIntegerArray.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table + for (final NestedIntegerArray.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table final int[] newCovs = new int[4]; newCovs[0] = leaf.keys[0]; - newCovs[1] = requestedCovariates.length; // replace the covariate name with an arbitrary (unused) index for QualityScore + newCovs[1] = requestedCovariates.length; // replace the covariate name with an arbitrary (unused) index for QualityScore newCovs[2] = leaf.keys[1]; newCovs[3] = leaf.keys[2]; - addToDeltaTable(deltaTable, newCovs, (RecalDatum)leaf.value); // add this covariate to the delta table + addToDeltaTable(deltaTable, newCovs, (RecalDatum)leaf.value); // add this covariate to the delta table } // add the optional covariates to the delta table @@ -425,10 +425,10 @@ public class RecalUtils { for (final NestedIntegerArray.Leaf leaf : covTable.getAllLeaves()) { final int[] covs = new int[4]; covs[0] = leaf.keys[0]; - covs[1] = i; // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS) + covs[1] = i; // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS) covs[2] = leaf.keys[2]; covs[3] = leaf.keys[3]; - addToDeltaTable(deltaTable, covs, (RecalDatum) leaf.value); // add this covariate to the delta table + addToDeltaTable(deltaTable, covs, (RecalDatum) leaf.value); // add this covariate to the delta table } } @@ -486,11 +486,11 @@ public class RecalUtils { */ private static void addToDeltaTable(final NestedHashMap deltaTable, final int[] deltaKey, final RecalDatum recalDatum) { Object[] wrappedKey = wrapKeys(deltaKey); - final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(wrappedKey); // check if we already have a RecalDatum for this key + final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(wrappedKey); // check if we already have a RecalDatum for this key if (deltaDatum == null) - deltaTable.put(new RecalDatum(recalDatum), wrappedKey); // if we don't have a key yet, create a new one with the same values as the curent datum + deltaTable.put(new RecalDatum(recalDatum), wrappedKey); // if we don't have a key yet, create a new one with the same values as the curent datum else - deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one. + deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one. } private static Object[] wrapKeys(final int[] keys) { @@ -539,10 +539,11 @@ public class RecalUtils { * @return true if this read is consistent or false if this read should be skipped */ public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) { - if (!ReadUtils.isSOLiDRead(read)) // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base + if (!ReadUtils.isSOLiDRead(read)) // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base return true; - if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read + // Haven't calculated the inconsistency array yet for this read + if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) { final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG); if (attr != null) { byte[] colorSpace; @@ -562,13 +563,13 @@ public class RecalUtils { } } - byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read + byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read if (read.getReadNegativeStrandFlag()) readBases = BaseUtils.simpleReverseComplement(read.getReadBases()); final byte[] inconsistency = new byte[readBases.length]; int i; - byte prevBase = colorSpace[0]; // The sentinel + byte prevBase = colorSpace[0]; // The sentinel for (i = 0; i < readBases.length; i++) { final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]); inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1); @@ -576,11 +577,11 @@ public class RecalUtils { } read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency); } - else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it + else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); else - return false; // otherwise, just skip the read + return false; // otherwise, just skip the read } return true; @@ -774,6 +775,4 @@ public class RecalUtils { return base; } } - - } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java index e6ab9e38b..271c07649 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java @@ -19,13 +19,13 @@ import java.util.*; * @since 3/26/12 */ public class RecalibrationReport { - private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done) - private final RecalibrationTables recalibrationTables; // quick access reference to the tables - private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation + private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done) + private final RecalibrationTables recalibrationTables; // quick access reference to the tables + private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation private final HashMap optionalCovariateIndexes; - private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes - private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter + private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes + private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter private final int[] tempRGarray = new int[2]; private final int[] tempQUALarray = new int[3]; @@ -40,7 +40,7 @@ public class RecalibrationReport { GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE); quantizationInfo = initializeQuantizationTable(quantizedTable); - Pair, ArrayList> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates + Pair, ArrayList> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates ArrayList requiredCovariates = covariates.getFirst(); ArrayList optionalCovariates = covariates.getSecond(); requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()]; @@ -50,13 +50,13 @@ public class RecalibrationReport { requestedCovariates[covariateIndex++] = covariate; for (final Covariate covariate : optionalCovariates) { requestedCovariates[covariateIndex] = covariate; - final String covariateName = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport + final String covariateName = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport optionalCovariateIndexes.put(covariateName, covariateIndex-2); covariateIndex++; } for (Covariate cov : requestedCovariates) - cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection + cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE))); @@ -198,9 +198,10 @@ public class RecalibrationReport { final long nErrors = (Long) reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME); - final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table - (Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table - Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table + // the estimatedQreported column only exists in the ReadGroup table + final double estimatedQReported = hasEstimatedQReportedColumn ? + (Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table + Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1); datum.setEstimatedQReported(estimatedQReported); @@ -242,7 +243,7 @@ public class RecalibrationReport { final String argument = table.get(i, "Argument").toString(); Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); if (value.equals("null")) - value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport + value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport if (argument.equals("covariate") && value != null) RAC.COVARIATES = value.toString().split(","); diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java index 570944245..5e470b35f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java @@ -87,7 +87,8 @@ public class ContextCovariate implements StandardCovariate { // store the original bases and then write Ns over low quality ones final byte[] originalBases = read.getReadBases().clone(); - final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context + // Write N's over the low quality tail of the reads to avoid adding them into the context + final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag(); byte[] bases = clippedRead.getReadBases(); @@ -115,7 +116,7 @@ public class ContextCovariate implements StandardCovariate { @Override public String formatKey(final int key) { - if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file + if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file return null; return contextFromKey(key); @@ -176,9 +177,9 @@ public class ContextCovariate implements StandardCovariate { for (int currentIndex = contextSize; currentIndex < readLength; currentIndex++) { final int baseIndex = BaseUtils.simpleBaseToBaseIndex(bases[currentIndex]); - if (baseIndex == -1) { // ignore non-ACGT bases + if (baseIndex == -1) { // ignore non-ACGT bases currentNPenalty = contextSize; - currentKey = 0; // reset the key + currentKey = 0; // reset the key } else { // push this base's contribution onto the key: shift everything 2 bits, mask out the non-context bits, and add the new base and the length in currentKey = (currentKey >> 2) & mask; @@ -215,7 +216,7 @@ public class ContextCovariate implements StandardCovariate { int bitOffset = LENGTH_BITS; for (int i = start; i < end; i++) { final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]); - if (baseIndex == -1) // ignore non-ACGT bases + if (baseIndex == -1) // ignore non-ACGT bases return -1; key |= (baseIndex << bitOffset); bitOffset += 2; @@ -233,15 +234,15 @@ public class ContextCovariate implements StandardCovariate { if (key < 0) throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?"); - final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context - int mask = 48; // use the mask to pull out bases + final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context + int mask = 48; // use the mask to pull out bases int offset = LENGTH_BITS; StringBuilder dna = new StringBuilder(); for (int i = 0; i < length; i++) { final int baseIndex = (key & mask) >> offset; dna.append((char)BaseUtils.baseIndexToSimpleBase(baseIndex)); - mask = mask << 2; // move the mask over to the next 2 bits + mask = mask << 2; // move the mask over to the next 2 bits offset += 2; } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java index cdf12d284..5d0d94b69 100755 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java @@ -108,7 +108,7 @@ public class CycleCovariate implements StandardCovariate { // the current sequential model would consider the effects independently instead of jointly. final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag(); - int cycle = multiplyByNegative1 ? -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms. + int cycle = multiplyByNegative1 ? -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms. // BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change // For example, AAAAAAA was probably read in two flow cycles but here we count it as one @@ -201,9 +201,9 @@ public class CycleCovariate implements StandardCovariate { @Override public String formatKey(final int key) { - int cycle = key >> 1; // shift so we can remove the "sign" bit - if ( (key & 1) != 0 ) // is the last bit set? - cycle *= -1; // then the cycle is negative + int cycle = key >> 1; // shift so we can remove the "sign" bit + if ( (key & 1) != 0 ) // is the last bit set? + cycle *= -1; // then the cycle is negative return String.format("%d", cycle); } @@ -222,7 +222,7 @@ public class CycleCovariate implements StandardCovariate { int result = Math.abs(cycle); result = result << 1; // shift so we can add the "sign" bit if ( cycle < 0 ) - result++; // negative cycles get the lower-most bit set + result++; // negative cycles get the lower-most bit set return result; } } \ No newline at end of file