diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/DummyProtectedWalker.java b/protected/java/src/org/broadinstitute/sting/gatk/DummyProtectedClass.java similarity index 92% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/DummyProtectedWalker.java rename to protected/java/src/org/broadinstitute/sting/gatk/DummyProtectedClass.java index c3ae0b469..c1324aea4 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/DummyProtectedWalker.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/DummyProtectedClass.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils; +package org.broadinstitute.sting.gatk; /* * Copyright (c) 2009 The Broad Institute @@ -27,7 +27,7 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; -public class DummyProtectedWalker implements ProtectedPackageSource { +public class DummyProtectedClass implements ProtectedPackageSource { // THIS CLASS IS USED JUST SO THAT WE CAN TEST WHETHER WE ARE USING THE LITE OR FULL VERSION OF THE GATK // **** DO NOT REMOVE! **** diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java index 04e805988..9eca81852 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; -public class AdvancedRecalibrationEngine extends RecalibrationEngine implements ProtectedPackageSource { +public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource { // optimizations: don't reallocate an array each time private byte[] tempQualArray; diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 29bf5c245..5f4f66c89 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.samples.SampleDBBuilder; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.collections.Pair; @@ -199,20 +199,15 @@ public class GenomeAnalysisEngine { public BaseRecalibration getBaseRecalibration() { return baseRecalibration; } public boolean hasBaseRecalibration() { return baseRecalibration != null; } public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan) { - baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, disableIndelQuals, preserveQLessThan, isGATKLite()); + baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, disableIndelQuals, preserveQLessThan); } /** * Utility method to determine whether this is the lite version of the GATK */ public boolean isGATKLite() { - if ( isLiteVersion == null ) { - isLiteVersion = !(new PluginManager(Object.class).exists(DummyProtectedWalkerName)); - } - return isLiteVersion; + return JVMUtils.isGATKLite(); } - private static final String DummyProtectedWalkerName = "DummyProtectedWalker"; - private static Boolean isLiteVersion = null; /** * Actually run the GATK with the specified walker. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java index fe88320c9..299daebd9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java @@ -33,7 +33,6 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; @@ -120,6 +119,8 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem private RecalibrationEngine recalibrationEngine; + private int minimumQToUse; + protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ @@ -133,6 +134,10 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem */ public void initialize() { + // check for unsupported access + if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals) + throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument"); + if (RAC.FORCE_PLATFORM != null) RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; @@ -169,12 +174,14 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem recalibrationEngine = initializeRecalibrationEngine(); recalibrationEngine.initialize(requestedCovariates, recalibrationTables); + + minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN; } private RecalibrationEngine initializeRecalibrationEngine() { List> REclasses = new PluginManager(RecalibrationEngine.class).getPlugins(); if ( REclasses.isEmpty() ) - throw new ReviewedStingException("There are no classes found that extend RecalibrationEngine; repository must be corrupted"); + throw new ReviewedStingException("The RecalibrationEngine class is not found; repository must be corrupted"); Class c = null; for ( Class REclass : REclasses ) { @@ -201,7 +208,7 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem } private boolean isLowQualityBase(GATKSAMRecord read, int offset) { - return read.getBaseQualities()[offset] < QualityUtils.MIN_USABLE_Q_SCORE; + return read.getBaseQualities()[offset] < minimumQToUse; } private boolean readNotSeen(GATKSAMRecord read) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java index 16e59ec30..aa62a18bc 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java @@ -1,110 +1,36 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.classloader.PublicPackageSource; -import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -public class RecalibrationEngine implements PublicPackageSource { +/* +* Copyright (c) 2009 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +* OTHER DEALINGS IN THE SOFTWARE. +*/ +public interface RecalibrationEngine { - protected Covariate[] covariates; - protected RecalibrationTables recalibrationTables; + public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables); - public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { - this.covariates = covariates; - this.recalibrationTables = recalibrationTables; - } + public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase); - /** - * Loop through the list of requested covariates and pick out the value from the read, offset, and reference - * Using the list of covariate values as a key, pick out the RecalDatum and increment, - * adding one to the number of observations and potentially one to the number of mismatches for mismatches only. - * - * @param pileupElement The pileup element to update - * @param refBase The reference base at this locus - */ - public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) { - final int offset = pileupElement.getOffset(); - final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead()); - - final byte qual = pileupElement.getQual(); - final boolean isError = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase); - - final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION); - final int eventIndex = EventType.BASE_SUBSTITUTION.index; - - final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); - final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); - final RecalDatum rgThisDatum = createDatumObject(qual, isError); - if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it - rgRecalTable.put(rgThisDatum, keys[0], eventIndex); - else - rgPreviousDatum.combine(rgThisDatum); - - final NestedIntegerArray qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); - final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex); - if (qualPreviousDatum == null) - qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex); - else - qualPreviousDatum.increment(isError); - - for (int i = 2; i < covariates.length; i++) { - if (keys[i] < 0) - continue; - final NestedIntegerArray covRecalTable = recalibrationTables.getTable(i); - final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex); - if (covPreviousDatum == null) - covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex); - else - covPreviousDatum.increment(isError); - } - } - - /** - * creates a datum object with one observation and one or zero error - * - * @param reportedQual the quality score reported by the instrument for this base - * @param isError whether or not the observation is an error - * @return a new RecalDatum object with the observation and the error - */ - protected RecalDatum createDatumObject(final byte reportedQual, final boolean isError) { - return new RecalDatum(1, isError ? 1:0, reportedQual); - } - - /** - * Get the covariate key set from a read - * - * @param read the read - * @return the covariate keysets for this read - */ - protected ReadCovariates covariateKeySetFrom(GATKSAMRecord read) { - return (ReadCovariates) read.getTemporaryAttribute(BaseQualityScoreRecalibrator.COVARS_ATTRIBUTE); - } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java new file mode 100644 index 000000000..34d99c018 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java @@ -0,0 +1,110 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.classloader.PublicPackageSource; +import org.broadinstitute.sting.utils.collections.NestedIntegerArray; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +public class StandardRecalibrationEngine implements RecalibrationEngine, PublicPackageSource { + + protected Covariate[] covariates; + protected RecalibrationTables recalibrationTables; + + public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { + this.covariates = covariates; + this.recalibrationTables = recalibrationTables; + } + + /** + * Loop through the list of requested covariates and pick out the value from the read, offset, and reference + * Using the list of covariate values as a key, pick out the RecalDatum and increment, + * adding one to the number of observations and potentially one to the number of mismatches for mismatches only. + * + * @param pileupElement The pileup element to update + * @param refBase The reference base at this locus + */ + public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) { + final int offset = pileupElement.getOffset(); + final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead()); + + final byte qual = pileupElement.getQual(); + final boolean isError = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase); + + final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION); + final int eventIndex = EventType.BASE_SUBSTITUTION.index; + + final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); + final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); + final RecalDatum rgThisDatum = createDatumObject(qual, isError); + if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it + rgRecalTable.put(rgThisDatum, keys[0], eventIndex); + else + rgPreviousDatum.combine(rgThisDatum); + + final NestedIntegerArray qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); + final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex); + if (qualPreviousDatum == null) + qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex); + else + qualPreviousDatum.increment(isError); + + for (int i = 2; i < covariates.length; i++) { + if (keys[i] < 0) + continue; + final NestedIntegerArray covRecalTable = recalibrationTables.getTable(i); + final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex); + if (covPreviousDatum == null) + covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex); + else + covPreviousDatum.increment(isError); + } + } + + /** + * creates a datum object with one observation and one or zero error + * + * @param reportedQual the quality score reported by the instrument for this base + * @param isError whether or not the observation is an error + * @return a new RecalDatum object with the observation and the error + */ + protected RecalDatum createDatumObject(final byte reportedQual, final boolean isError) { + return new RecalDatum(1, isError ? 1:0, reportedQual); + } + + /** + * Get the covariate key set from a read + * + * @param read the read + * @return the covariate keysets for this read + */ + protected ReadCovariates covariateKeySetFrom(GATKSAMRecord read) { + return (ReadCovariates) read.getTemporaryAttribute(BaseQualityScoreRecalibrator.COVARS_ATTRIBUTE); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java index fa154fca3..745c9ad1e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java @@ -105,6 +105,23 @@ public class JVMUtils { return allFields; } + /** + * Utility method to determine whether this is the lite version of the GATK + */ + public static boolean isGATKLite() { + if ( isLiteVersion == null ) { + try { + Class.forName(DummyProtectedClassName); + isLiteVersion = false; + } catch ( ClassNotFoundException e) { + isLiteVersion = true; + } + } + return isLiteVersion; + } + private static final String DummyProtectedClassName = "org.broadinstitute.sting.gatk.DummyProtectedClass"; + private static Boolean isLiteVersion = null; + /** * Find the field with the given name in the class. Will inspect all fields, independent * of access level. diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index efb6ecef9..074186305 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -30,7 +30,6 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.collections.NestedHashMap; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.File; @@ -66,13 +65,8 @@ public class BaseRecalibration { * @param quantizationLevels number of bins to quantize the quality scores * @param disableIndelQuals if true, do not emit base indel qualities * @param preserveQLessThan preserve quality scores less than this value - * @param isGATKLite is this being called from the full or Lite version of the GATK */ - public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan, final boolean isGATKLite) { - // check for unsupported access - if (isGATKLite && !disableIndelQuals) - throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument"); - + public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan) { RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE); recalibrationTables = recalibrationReport.getRecalibrationTables();