From e00ed8bc5e2e2c9bf7376c4926fe1359de1a738c Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 27 Jul 2012 12:25:06 -0400 Subject: [PATCH] Cleanup BQSR classes -- Moved most of BQSR classes (which are used throughout the codebase) to utils.recalibration. It's better in my opinion to keep commonly used code in utils, and only specialized code in walkers. As code becomes embedded throughout GATK its should be refactored to live in utils -- Removed unncessary imports of BQSR in VQSR v3 -- Now ready to refactor QualQuantizer and unit test into a subclass of RecalDatum, refactor unit tests into RecalDatum unit tests, and generalize into hierarchical recal datum that can be used in QualQuantizer and the analysis of adaptive context covariate -- Update PluginManager to sort the plugins and interfaces. This allows us to have a deterministic order in which the plugin classes come back, which caused BQSR integration tests to temporarily change because I moved my classes around a bit. --- .../bqsr/AdvancedRecalibrationEngine.java | 4 + .../reducereads/SyntheticRead.java | 2 +- .../sting/gatk/walkers/bqsr/BQSRGatherer.java | 6 +- .../gatk/walkers/bqsr/BaseRecalibrator.java | 26 +- .../bqsr/RecalibrationArgumentCollection.java | 41 ++-- .../walkers/bqsr/RecalibrationEngine.java | 1 + .../bqsr/StandardRecalibrationEngine.java | 4 + .../utils/classloader/PluginManager.java | 22 ++ .../sting/utils/clipping/ClippingOp.java | 2 +- .../sting/utils/clipping/ReadClipper.java | 2 +- .../sting/utils/fragments/FragmentUtils.java | 2 +- .../recalibration/BaseRecalibration.java | 4 +- .../recalibration}/EventType.java | 2 +- .../recalibration}/QuantizationInfo.java | 20 +- .../recalibration}/ReadCovariates.java | 2 +- .../recalibration}/RecalDatum.java | 60 +++-- .../recalibration/RecalUtils.java} | 226 +++++++++--------- .../recalibration}/RecalibrationReport.java | 63 ++--- .../recalibration/RecalibrationTables.java | 4 +- .../covariates}/BinaryTagCovariate.java | 4 +- .../covariates}/ContextCovariate.java | 4 +- .../recalibration/covariates}/Covariate.java | 9 +- .../covariates}/CycleCovariate.java | 4 +- .../covariates/ExperimentalCovariate.java | 30 +++ .../covariates}/QualityScoreCovariate.java | 4 +- .../covariates}/ReadGroupCovariate.java | 4 +- .../covariates/RequiredCovariate.java | 30 +++ .../covariates/StandardCovariate.java | 30 +++ .../sting/utils/sam/GATKSAMRecord.java | 2 +- .../walkers/bqsr/BQSRGathererUnitTest.java | 13 +- .../ContextCovariateUnitTest.java | 5 +- .../CycleCovariateUnitTest.java | 4 +- .../ReadCovariatesUnitTest.java | 6 +- .../ReadGroupCovariateUnitTest.java | 4 +- .../RecalibrationReportUnitTest.java | 7 +- 35 files changed, 413 insertions(+), 240 deletions(-) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/EventType.java (96%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/QuantizationInfo.java (78%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/ReadCovariates.java (97%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/RecalDatum.java (77%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr/RecalDataManager.java => utils/recalibration/RecalUtils.java} (96%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/RecalibrationReport.java (82%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration/covariates}/BinaryTagCovariate.java (89%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration/covariates}/ContextCovariate.java (98%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration/covariates}/Covariate.java (94%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration/covariates}/CycleCovariate.java (97%) create mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration/covariates}/QualityScoreCovariate.java (92%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration/covariates}/ReadGroupCovariate.java (94%) create mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java rename public/java/test/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/ContextCovariateUnitTest.java (89%) rename public/java/test/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/CycleCovariateUnitTest.java (90%) rename public/java/test/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/ReadCovariatesUnitTest.java (92%) rename public/java/test/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/ReadGroupCovariateUnitTest.java (88%) rename public/java/test/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/RecalibrationReportUnitTest.java (95%) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java index 9eca81852..d714ca185 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java @@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; * OTHER DEALINGS IN THE SOFTWARE. */ +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.EventType; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.utils.recalibration.RecalDatum; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java index 9ee1a4634..3b9fc0390 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java @@ -5,7 +5,7 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java index 122958ac2..a6d82d5b3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java @@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.commandline.Gatherer; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.recalibration.RecalUtils; +import org.broadinstitute.sting.utils.recalibration.RecalibrationReport; import java.io.File; import java.io.FileNotFoundException; @@ -71,11 +73,11 @@ public class BQSRGatherer extends Gatherer { if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) { final File recal_out = new File(output.getName() + ".original"); final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport); - RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES); + RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES); } else if (!RAC.NO_PLOTS) { final File recal_out = new File(output.getName() + ".recal"); - RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES); + RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES); } generalReport.output(outputFile); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index b2400a49d..e95a0e6c1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; import org.broadinstitute.sting.utils.collections.Pair; @@ -41,6 +42,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.QuantizationInfo; +import org.broadinstitute.sting.utils.recalibration.RecalUtils; +import org.broadinstitute.sting.utils.recalibration.RecalibrationReport; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -109,7 +113,7 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed @ArgumentCollection private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates - private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization + private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization private RecalibrationTables recalibrationTables; @@ -143,12 +147,12 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION); if (RAC.LIST_ONLY) { - RecalDataManager.listAvailableCovariates(logger); + RecalUtils.listAvailableCovariates(logger); System.exit(0); } RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table - Pair, ArrayList> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates + Pair, ArrayList> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates ArrayList requiredCovariates = covariates.getFirst(); ArrayList optionalCovariates = covariates.getSecond(); @@ -222,17 +226,17 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed if (readNotSeen(read)) { read.setTemporaryAttribute(SEEN_ATTRIBUTE, true); - RecalDataManager.parsePlatformForRead(read, RAC); - if (RecalDataManager.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { + RecalUtils.parsePlatformForRead(read, RAC); + if (RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true); continue; } - read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(read, requestedCovariates)); + read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates)); } if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it - RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING || - RecalDataManager.isColorSpaceConsistent(read, offset)) + RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING || + RecalUtils.isColorSpaceConsistent(read, offset)) recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap } countedSites++; @@ -285,10 +289,10 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE; if (recalFile != null) { RecalibrationReport report = new RecalibrationReport(recalFile); - RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES); + RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES); } else - RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES); + RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES); } @@ -309,7 +313,7 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created"); } - RecalDataManager.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output); + RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 2a94426a7..f04e4a1b3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -29,6 +29,7 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.recalibration.RecalUtils; import java.io.File; import java.util.Collections; @@ -100,7 +101,7 @@ public class RecalibrationArgumentCollection { * reads which have had the reference inserted because of color space inconsistencies. */ @Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS") - public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO; + public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO; /** * CountCovariates and TableRecalibration accept a --solid_nocall_strategy flag which governs how the recalibrator handles @@ -108,7 +109,7 @@ public class RecalibrationArgumentCollection { * their color space tag can not be recalibrated. */ @Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false) - public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION; + public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION; /** * The context covariate will use a context of this size to calculate it's covariate value for base mismatches @@ -177,41 +178,41 @@ public class RecalibrationArgumentCollection { public GATKReportTable generateReportTable() { GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2); argumentsTable.addColumn("Argument"); - argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); + argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); argumentsTable.addRowID("covariate", true); - argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES)); + argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES)); argumentsTable.addRowID("no_standard_covs", true); - argumentsTable.set("no_standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES); + argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES); argumentsTable.addRowID("run_without_dbsnp", true); - argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP); + argumentsTable.set("run_without_dbsnp", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP); argumentsTable.addRowID("solid_recal_mode", true); - argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE); + argumentsTable.set("solid_recal_mode", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE); argumentsTable.addRowID("solid_nocall_strategy", true); - argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY); + argumentsTable.set("solid_nocall_strategy", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY); argumentsTable.addRowID("mismatches_context_size", true); - argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE); + argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE); argumentsTable.addRowID("indels_context_size", true); - argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE); + argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE); argumentsTable.addRowID("mismatches_default_quality", true); - argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY); + argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY); argumentsTable.addRowID("insertions_default_quality", true); - argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY); + argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY); argumentsTable.addRowID("low_quality_tail", true); - argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL); + argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL); argumentsTable.addRowID("default_platform", true); - argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM); + argumentsTable.set("default_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM); argumentsTable.addRowID("force_platform", true); - argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM); + argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM); argumentsTable.addRowID("quantizing_levels", true); - argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS); + argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS); argumentsTable.addRowID("keep_intermediate_files", true); - argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES); + argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES); argumentsTable.addRowID("no_plots", true); - argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS); + argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS); argumentsTable.addRowID("recalibration_report", true); - argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath()); + argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath()); argumentsTable.addRowID("binary_tag_name", true); - argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME); + argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME); return argumentsTable; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java index aa62a18bc..38e306939 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java index a24506d07..08c7da754 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java @@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; * OTHER DEALINGS IN THE SOFTWARE. */ +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.classloader.PublicPackageSource; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.EventType; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.utils.recalibration.RecalDatum; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java index f24bbb636..9a2cb68db 100644 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java @@ -168,6 +168,28 @@ public class PluginManager { String pluginName = getName(pluginClass); pluginsByName.put(pluginName, pluginClass); } + + // sort the plugins so the order of elements is deterministic + sortPlugins(plugins); + sortPlugins(interfaces); + } + + /** + * Sorts, in place, the list of plugins according to getName() on each element + * + * @param unsortedPlugins + */ + private final void sortPlugins(final List> unsortedPlugins) { + Collections.sort(unsortedPlugins, new ComparePluginsByName()); + } + + private final class ComparePluginsByName implements Comparator> { + @Override + public int compare(final Class aClass, final Class aClass1) { + String pluginName1 = getName(aClass); + String pluginName2 = getName(aClass1); + return pluginName1.compareTo(pluginName2); + } } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index a4383c3ae..554188bc1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -4,7 +4,7 @@ import com.google.java.contract.Requires; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index ba9267222..6392ce4ce 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.utils.clipping; import com.google.java.contract.Requires; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java index c6eec24f1..851272673 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java @@ -4,7 +4,7 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index b5f7ad046..c09eb0063 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -27,7 +27,7 @@ package org.broadinstitute.sting.utils.recalibration; import net.sf.samtools.SAMTag; import net.sf.samtools.SAMUtils; -import org.broadinstitute.sting.gatk.walkers.bqsr.*; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; @@ -103,7 +103,7 @@ public class BaseRecalibration { } } - RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read + RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { read.setBaseQualities(null, errorModel); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java index 2650f0f8d..1c84518eb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java similarity index 78% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java index fb3aef949..2b67d12a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java @@ -1,11 +1,9 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; -import org.broadinstitute.sting.utils.recalibration.QualQuantizer; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import java.util.Arrays; import java.util.List; @@ -41,7 +39,7 @@ public class QuantizationInfo { for (final RecalDatum value : qualTable.getAllValues()) { final RecalDatum datum = value; final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL ) - qualHistogram[empiricalQual] += datum.numObservations; // add the number of observations for every key + qualHistogram[empiricalQual] += datum.getNumObservations(); // add the number of observations for every key } empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities quantizeQualityScores(quantizationLevels); @@ -70,15 +68,15 @@ public class QuantizationInfo { } public GATKReportTable generateReportTable() { - GATKReportTable quantizedTable = new GATKReportTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3); - quantizedTable.addColumn(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); - quantizedTable.addColumn(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME); - quantizedTable.addColumn(RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME); + GATKReportTable quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3); + quantizedTable.addColumn(RecalUtils.QUALITY_SCORE_COLUMN_NAME); + quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME); + quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME); for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) { - quantizedTable.set(qual, RecalDataManager.QUALITY_SCORE_COLUMN_NAME, qual); - quantizedTable.set(qual, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual)); - quantizedTable.set(qual, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual)); + quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual); + quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual)); + quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual)); } return quantizedTable; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java index 5e907237d..c86bd4deb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; /** * The object temporarily held by a read that describes all of it's covariates. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java similarity index 77% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java index ed4e769b1..43ff54378 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; /* * Copyright (c) 2009 The Broad Institute @@ -56,12 +56,12 @@ public class RecalDatum { /** * number of bases seen in total */ - long numObservations; + private long numObservations; /** * number of bases seen that didn't match the reference */ - long numMismatches; + private long numMismatches; /** * used when calculating empirical qualities to avoid division by zero @@ -82,16 +82,16 @@ public class RecalDatum { } public RecalDatum(final RecalDatum copy) { - this.numObservations = copy.numObservations; - this.numMismatches = copy.numMismatches; + this.numObservations = copy.getNumObservations(); + this.numMismatches = copy.getNumMismatches(); this.estimatedQReported = copy.estimatedQReported; this.empiricalQuality = copy.empiricalQuality; } public synchronized void combine(final RecalDatum other) { final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors(); - increment(other.numObservations, other.numMismatches); - estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations); + increment(other.getNumObservations(), other.getNumMismatches()); + estimatedQReported = -10 * Math.log10(sumErrors / getNumObservations()); empiricalQuality = UNINITIALIZED; } @@ -100,7 +100,8 @@ public class RecalDatum { private synchronized final void calcEmpiricalQuality() { // cache the value so we don't call log over and over again final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT); - final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT); + // smoothing is one error and one non-error observation, for example + final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); } @@ -125,7 +126,7 @@ public class RecalDatum { @Override public String toString() { - return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality())); + return String.format("%d,%d,%d", getNumObservations(), getNumMismatches(), (byte) Math.floor(getEmpiricalQuality())); } public String stringForCSV() { @@ -133,7 +134,7 @@ public class RecalDatum { } private double calcExpectedErrors() { - return (double) this.numObservations * qualToErrorProb(estimatedQReported); + return (double) getNumObservations() * qualToErrorProb(estimatedQReported); } private double qualToErrorProb(final double qual) { @@ -170,15 +171,42 @@ public class RecalDatum { // //--------------------------------------------------------------------------------------------------------------- - synchronized void increment(final long incObservations, final long incMismatches) { - numObservations += incObservations; - numMismatches += incMismatches; + public long getNumObservations() { + return numObservations; + } + + public synchronized void setNumObservations(final long numObservations) { + this.numObservations = numObservations; empiricalQuality = UNINITIALIZED; } - synchronized void increment(final boolean isError) { - numObservations++; - numMismatches += isError ? 1:0; + public long getNumMismatches() { + return numMismatches; + } + + public synchronized void setNumMismatches(final long numMismatches) { + this.numMismatches = numMismatches; empiricalQuality = UNINITIALIZED; } + + public synchronized void incrementNumObservations(final long by) { + numObservations += by; + empiricalQuality = UNINITIALIZED; + } + + public synchronized void incrementNumMismatches(final long by) { + numMismatches += by; + empiricalQuality = UNINITIALIZED; + } + + public synchronized void increment(final long incObservations, final long incMismatches) { + incrementNumObservations(incObservations); + incrementNumMismatches(incMismatches); + } + + public synchronized void increment(final boolean isError) { + incrementNumObservations(1); + if ( isError ) + incrementNumMismatches(1); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index f40a62d53..fe6ef7018 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -23,11 +23,13 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.Utils; @@ -39,7 +41,6 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.io.Resource; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -59,7 +60,7 @@ import java.util.*; * This class holds the parsing methods that are shared between CountCovariates and TableRecalibration. */ -public class RecalDataManager { +public class RecalUtils { public final static String ARGUMENT_REPORT_TABLE_TITLE = "Arguments"; public final static String QUANTIZED_REPORT_TABLE_TITLE = "Quantized"; public final static String READGROUP_REPORT_TABLE_TITLE = "RecalTable0"; @@ -85,13 +86,108 @@ public class RecalDataManager { private static final String SCRIPT_FILE = "BQSR.R"; - private static final Pair covariateValue = new Pair(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s"); - private static final Pair covariateName = new Pair(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s"); - private static final Pair eventType = new Pair(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s"); - private static final Pair empiricalQuality = new Pair(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f"); - private static final Pair estimatedQReported = new Pair(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f"); - private static final Pair nObservations = new Pair(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d"); - private static final Pair nErrors = new Pair(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d"); + private static final Pair covariateValue = new Pair(RecalUtils.COVARIATE_VALUE_COLUMN_NAME, "%s"); + private static final Pair covariateName = new Pair(RecalUtils.COVARIATE_NAME_COLUMN_NAME, "%s"); + private static final Pair eventType = new Pair(RecalUtils.EVENT_TYPE_COLUMN_NAME, "%s"); + private static final Pair empiricalQuality = new Pair(RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f"); + private static final Pair estimatedQReported = new Pair(RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f"); + private static final Pair nObservations = new Pair(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d"); + private static final Pair nErrors = new Pair(RecalUtils.NUMBER_ERRORS_COLUMN_NAME, "%d"); + + /** + * Generates two lists : required covariates and optional covariates based on the user's requests. + * + * Performs the following tasks in order: + * 1. Adds all requierd covariates in order + * 2. Check if the user asked to use the standard covariates and adds them all if that's the case + * 3. Adds all covariates requested by the user that were not already added by the two previous steps + * + * @param argumentCollection the argument collection object for the recalibration walker + * @return a pair of ordered lists : required covariates (first) and optional covariates (second) + */ + public static Pair, ArrayList> initializeCovariates(RecalibrationArgumentCollection argumentCollection) { + final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); + final List> requiredClasses = new PluginManager(RequiredCovariate.class).getPlugins(); + final List> standardClasses = new PluginManager(StandardCovariate.class).getPlugins(); + + final ArrayList requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates + ArrayList optionalCovariates = new ArrayList(); + if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES) + optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user + + if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified + for (String requestedCovariateString : argumentCollection.COVARIATES) { + boolean foundClass = false; + for (Class covClass : covariateClasses) { + if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class + foundClass = true; + if (!requiredClasses.contains(covClass) && + (argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) { + try { + final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it + optionalCovariates.add(covariate); + } catch (Exception e) { + throw new DynamicClassResolutionException(covClass, e); + } + } + } + } + + if (!foundClass) { + throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates."); + } + } + } + return new Pair, ArrayList>(requiredCovariates, optionalCovariates); + } + + /** + * Adds the required covariates to a covariate list + * + * Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand. + * + * @param classes list of classes to add to the covariate list + * @return the covariate list + */ + private static ArrayList addRequiredCovariatesToList(List> classes) { + ArrayList dest = new ArrayList(classes.size()); + if (classes.size() != 2) + throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected"); + + dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next. + dest.add(new QualityScoreCovariate()); + return dest; + } + + /** + * Adds the standard covariates to a covariate list + * + * @param classes list of classes to add to the covariate list + * @return the covariate list + */ + private static ArrayList addStandardCovariatesToList(List> classes) { + ArrayList dest = new ArrayList(classes.size()); + for (Class covClass : classes) { + try { + final Covariate covariate = (Covariate) covClass.newInstance(); + dest.add(covariate); + } catch (Exception e) { + throw new DynamicClassResolutionException(covClass, e); + } + } + return dest; + } + + public static void listAvailableCovariates(Logger logger) { + // Get a list of all available covariates + final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); + + // Print and exit if that's what was requested + logger.info("Available covariates:"); + for (Class covClass : covariateClasses) + logger.info(covClass.getSimpleName()); + logger.info(""); + } public enum SOLID_RECAL_MODE { @@ -152,64 +248,6 @@ public class RecalDataManager { } } - /** - * Generates two lists : required covariates and optional covariates based on the user's requests. - * - * Performs the following tasks in order: - * 1. Adds all requierd covariates in order - * 2. Check if the user asked to use the standard covariates and adds them all if that's the case - * 3. Adds all covariates requested by the user that were not already added by the two previous steps - * - * @param argumentCollection the argument collection object for the recalibration walker - * @return a pair of ordered lists : required covariates (first) and optional covariates (second) - */ - public static Pair, ArrayList> initializeCovariates(RecalibrationArgumentCollection argumentCollection) { - final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); - final List> requiredClasses = new PluginManager(RequiredCovariate.class).getPlugins(); - final List> standardClasses = new PluginManager(StandardCovariate.class).getPlugins(); - - final ArrayList requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates - ArrayList optionalCovariates = new ArrayList(); - if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES) - optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user - - if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified - for (String requestedCovariateString : argumentCollection.COVARIATES) { - boolean foundClass = false; - for (Class covClass : covariateClasses) { - if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class - foundClass = true; - if (!requiredClasses.contains(covClass) && - (argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) { - try { - final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it - optionalCovariates.add(covariate); - } catch (Exception e) { - throw new DynamicClassResolutionException(covClass, e); - } - } - } - } - - if (!foundClass) { - throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates."); - } - } - } - return new Pair, ArrayList>(requiredCovariates, optionalCovariates); - } - - public static void listAvailableCovariates(Logger logger) { - // Get a list of all available covariates - final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); - - // Print and exit if that's what was requested - logger.info("Available covariates:"); - for (Class covClass : covariateClasses) - logger.info(covClass.getSimpleName()); - logger.info(""); - } - private static List generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) { List result = new LinkedList(); int reportTableIndex = 0; @@ -272,8 +310,8 @@ public class RecalDataManager { reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality()); if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index) reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table - reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.numObservations); - reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.numMismatches); + reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getNumObservations()); + reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.getNumMismatches()); rowIndex++; } @@ -320,7 +358,7 @@ public class RecalDataManager { files.getFirst().close(); final RScriptExecutor executor = new RScriptExecutor(); - executor.addScript(new Resource(SCRIPT_FILE, RecalDataManager.class)); + executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class)); executor.addArgs(csvFileName.getAbsolutePath()); executor.addArgs(plotFileName.getAbsolutePath()); executor.exec(); @@ -480,14 +518,14 @@ public class RecalDataManager { */ public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) { if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base - if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read - final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); + if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read + final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG); if (attr != null) { byte[] colorSpace; if (attr instanceof String) colorSpace = ((String) attr).getBytes(); else - throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); + throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read if (read.getReadNegativeStrandFlag()) @@ -501,7 +539,7 @@ public class RecalDataManager { inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1); prevBase = readBases[i]; } - read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency); + read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency); } else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); @@ -545,7 +583,7 @@ public class RecalDataManager { * @return Returns true if the base was inconsistent with the color space */ public static boolean isColorSpaceConsistent(final GATKSAMRecord read, final int offset) { - final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG); + final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG); if (attr != null) { final byte[] inconsistency = (byte[]) attr; // NOTE: The inconsistency array is in the direction of the read, not aligned to the reference! @@ -691,40 +729,4 @@ public class RecalDataManager { } - /** - * Adds the required covariates to a covariate list - * - * Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand. - * - * @param classes list of classes to add to the covariate list - * @return the covariate list - */ - private static ArrayList addRequiredCovariatesToList(List> classes) { - ArrayList dest = new ArrayList(classes.size()); - if (classes.size() != 2) - throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected"); - - dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next. - dest.add(new QualityScoreCovariate()); - return dest; - } - - /** - * Adds the standard covariates to a covariate list - * - * @param classes list of classes to add to the covariate list - * @return the covariate list - */ - private static ArrayList addStandardCovariatesToList(List> classes) { - ArrayList dest = new ArrayList(classes.size()); - for (Class covClass : classes) { - try { - final Covariate covariate = (Covariate) covClass.newInstance(); - dest.add(covariate); - } catch (Exception e) { - throw new DynamicClassResolutionException(covClass, e); - } - } - return dest; - } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java similarity index 82% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java index e69cf4d69..e6ab9e38b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java @@ -1,11 +1,12 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.bqsr.*; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import java.io.File; import java.io.PrintStream; @@ -33,13 +34,13 @@ public class RecalibrationReport { public RecalibrationReport(final File RECAL_FILE) { final GATKReport report = new GATKReport(RECAL_FILE); - argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE); + argumentTable = report.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE); RAC = initializeArgumentCollectionTable(argumentTable); - GATKReportTable quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE); + GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE); quantizationInfo = initializeQuantizationTable(quantizedTable); - Pair, ArrayList> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates + Pair, ArrayList> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates ArrayList requiredCovariates = covariates.getFirst(); ArrayList optionalCovariates = covariates.getSecond(); requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()]; @@ -57,13 +58,13 @@ public class RecalibrationReport { for (Covariate cov : requestedCovariates) cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection - recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE))); + recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE))); - parseReadGroupTable(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE)); + parseReadGroupTable(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE)); - parseQualityScoreTable(report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE)); + parseQualityScoreTable(report.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE)); - parseAllCovariatesTable(report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables); + parseAllCovariatesTable(report.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables); } @@ -85,7 +86,7 @@ public class RecalibrationReport { private int countReadGroups(final GATKReportTable reportTable) { Set readGroups = new HashSet(); for ( int i = 0; i < reportTable.getNumRows(); i++ ) - readGroups.add(reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME).toString()); + readGroups.add(reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME).toString()); return readGroups.size(); } @@ -139,17 +140,17 @@ public class RecalibrationReport { \ */ private void parseAllCovariatesTable(final GATKReportTable reportTable, final RecalibrationTables recalibrationTables) { for ( int i = 0; i < reportTable.getNumRows(); i++ ) { - final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME); + final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME); tempCOVarray[0] = requestedCovariates[0].keyFromValue(rg); - final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME); + final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME); tempCOVarray[1] = requestedCovariates[1].keyFromValue(qual); - final String covName = (String)reportTable.get(i, RecalDataManager.COVARIATE_NAME_COLUMN_NAME); + final String covName = (String)reportTable.get(i, RecalUtils.COVARIATE_NAME_COLUMN_NAME); final int covIndex = optionalCovariateIndexes.get(covName); - final Object covValue = reportTable.get(i, RecalDataManager.COVARIATE_VALUE_COLUMN_NAME); + final Object covValue = reportTable.get(i, RecalUtils.COVARIATE_VALUE_COLUMN_NAME); tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue); - final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME)); + final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME)); tempCOVarray[3] = event.index; recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray); @@ -164,11 +165,11 @@ public class RecalibrationReport { */ private void parseQualityScoreTable(final GATKReportTable reportTable, final NestedIntegerArray qualTable) { for ( int i = 0; i < reportTable.getNumRows(); i++ ) { - final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME); + final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME); tempQUALarray[0] = requestedCovariates[0].keyFromValue(rg); - final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME); + final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME); tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual); - final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME)); + final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME)); tempQUALarray[2] = event.index; qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray); @@ -183,9 +184,9 @@ public class RecalibrationReport { */ private void parseReadGroupTable(final GATKReportTable reportTable, final NestedIntegerArray rgTable) { for ( int i = 0; i < reportTable.getNumRows(); i++ ) { - final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME); + final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME); tempRGarray[0] = requestedCovariates[0].keyFromValue(rg); - final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME)); + final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME)); tempRGarray[1] = event.index; rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray); @@ -193,13 +194,13 @@ public class RecalibrationReport { } private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) { - final long nObservations = (Long) reportTable.get(row, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME); - final long nErrors = (Long) reportTable.get(row, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME); - final double empiricalQuality = (Double) reportTable.get(row, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME); + final long nObservations = (Long) reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME); + final long nErrors = (Long) reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME); final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table - (Double) reportTable.get(row, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table - Byte.parseByte((String) reportTable.get(row, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table + (Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table + Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1); datum.setEstimatedQReported(estimatedQReported); @@ -218,8 +219,8 @@ public class RecalibrationReport { final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1]; for ( int i = 0; i < table.getNumRows(); i++ ) { final byte originalQual = (byte)i; - final Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME); - final Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME); + final Object quantizedObject = table.get(i, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME); + final Object countObject = table.get(i, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME); final byte quantizedQual = Byte.parseByte(quantizedObject.toString()); final long quantizedCount = Long.parseLong(countObject.toString()); quals[originalQual] = quantizedQual; @@ -239,7 +240,7 @@ public class RecalibrationReport { for ( int i = 0; i < table.getNumRows(); i++ ) { final String argument = table.get(i, "Argument").toString(); - Object value = table.get(i, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); + Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); if (value.equals("null")) value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport @@ -250,10 +251,10 @@ public class RecalibrationReport { RAC.DO_NOT_USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value); else if (argument.equals("solid_recal_mode")) - RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value); + RAC.SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.recalModeFromString((String) value); else if (argument.equals("solid_nocall_strategy")) - RAC.SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value); + RAC.SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value); else if (argument.equals("mismatches_context_size")) RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value); @@ -307,7 +308,7 @@ public class RecalibrationReport { } public void output(PrintStream output) { - RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output); + RecalUtils.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output); } public RecalibrationArgumentCollection getRAC() { diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java index 0416b5eb9..f37e69c9a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java @@ -25,9 +25,7 @@ package org.broadinstitute.sting.utils.recalibration; -import org.broadinstitute.sting.gatk.walkers.bqsr.Covariate; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; -import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDatum; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/BinaryTagCovariate.java similarity index 89% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/BinaryTagCovariate.java index a89586c2c..cebdebf9d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/BinaryTagCovariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java index 5fe8809fb..4c20284d9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java @@ -23,8 +23,10 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ReadClipper; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/Covariate.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/Covariate.java index 1ad5346fa..c613135bb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/Covariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /* @@ -89,8 +91,3 @@ public interface Covariate { public int maximumKeyValue(); } -interface RequiredCovariate extends Covariate {} - -interface StandardCovariate extends Covariate {} - -interface ExperimentalCovariate extends Covariate {} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java index f0ff8f2bd..4f15419c7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java new file mode 100644 index 000000000..72df2a410 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.utils.recalibration.covariates; + +/** + * [Short one sentence description of this walker] + *

+ *

+ * [Functionality of this walker] + *

+ *

+ *

Input

+ *

+ * [Input description] + *

+ *

+ *

Output

+ *

+ * [Output description] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +public interface ExperimentalCovariate extends Covariate {} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java similarity index 92% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java index dd7060ff8..3ef8ee931 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java index f04d27b7a..85568dac9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java new file mode 100644 index 000000000..50755dbcf --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.utils.recalibration.covariates; + +/** + * [Short one sentence description of this walker] + *

+ *

+ * [Functionality of this walker] + *

+ *

+ *

Input

+ *

+ * [Input description] + *

+ *

+ *

Output

+ *

+ * [Output description] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +public interface RequiredCovariate extends Covariate {} diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java new file mode 100644 index 000000000..444954f25 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.utils.recalibration.covariates; + +/** + * [Short one sentence description of this walker] + *

+ *

+ * [Functionality of this walker] + *

+ *

+ *

Input

+ *

+ * [Input description] + *

+ *

+ *

Output

+ *

+ * [Output description] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +public interface StandardCovariate extends Covariate {} diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 659615cf4..c9b3a2df8 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java index 8e9f2533f..f1ffbe80f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.utils.recalibration.RecalUtils; import org.testng.Assert; import org.testng.annotations.Test; @@ -33,15 +34,15 @@ public class BQSRGathererUnitTest { for (GATKReportTable originalTable : originalReport.getTables()) { GATKReportTable calculatedTable = calculatedReport.getTable(originalTable.getTableName()); List columnsToTest = new LinkedList(); - columnsToTest.add(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME); - columnsToTest.add(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME); - if (originalTable.getTableName().equals(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE)) { // these tables must be IDENTICAL - columnsToTest.add(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); + columnsToTest.add(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME); + columnsToTest.add(RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + if (originalTable.getTableName().equals(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE)) { // these tables must be IDENTICAL + columnsToTest.add(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 1); } - else if (originalTable.getTableName().equals(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE)) { - columnsToTest.add(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME); + else if (originalTable.getTableName().equals(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE)) { + columnsToTest.add(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME); testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 2); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java similarity index 89% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java index 553b7e237..2556448ad 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java @@ -1,5 +1,8 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.ContextCovariate; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java similarity index 90% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java index 3fa1e916d..c3d93b2cb 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.CycleCovariate; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java similarity index 92% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java index 37994cf12..dac26cb53 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.*; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -41,7 +43,7 @@ public class ReadCovariatesUnitTest { requestedCovariates[2] = coCov; requestedCovariates[3] = cyCov; - ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates); + ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates); // check that the length is correct Assert.assertEquals(rc.getMismatchesKeySet().length, length); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java similarity index 88% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java index a83508353..78a74d259 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.ReadGroupCovariate; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java similarity index 95% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java index e4a77c016..387cc94d6 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java @@ -1,9 +1,10 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.*; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -72,7 +73,7 @@ public class RecalibrationReportUnitTest { final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE); int nKeys = 0; // keep track of how many keys were produced - final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates); + final ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates); final RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates); final NestedIntegerArray rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);