From 0a89adbcdb212aff3634294b5b3a47fcd3188f52 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 16 Jul 2012 15:34:50 -0400 Subject: [PATCH] Add utility decorators so that classes can tell you which package source they come from if they want to (suggested by Khalid). Using those decorators, we can easily pull out the BQSR updateDataForPileupElement() method into a standard RecalibrationEngine and an AdvancedRecalibrationEngine and use the protected one (AdvancedRE) if available (otherwise, the public one). --- .../bqsr/AdvancedRecalibrationEngine.java | 99 ++++++++++++++ .../bqsr/BaseQualityScoreRecalibrator.java | 124 ++++++------------ .../walkers/bqsr/RecalibrationEngine.java | 110 ++++++++++++++++ .../classloader/ProtectedPackageSource.java | 3 + .../classloader/PublicPackageSource.java | 3 + 5 files changed, 252 insertions(+), 87 deletions(-) create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java create mode 100755 public/java/src/org/broadinstitute/sting/utils/classloader/ProtectedPackageSource.java create mode 100755 public/java/src/org/broadinstitute/sting/utils/classloader/PublicPackageSource.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java new file mode 100644 index 000000000..04e805988 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java @@ -0,0 +1,99 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; +import org.broadinstitute.sting.utils.collections.NestedIntegerArray; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; + +public class AdvancedRecalibrationEngine extends RecalibrationEngine implements ProtectedPackageSource { + + // optimizations: don't reallocate an array each time + private byte[] tempQualArray; + private boolean[] tempErrorArray; + + public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { + super.initialize(covariates, recalibrationTables); + tempQualArray = new byte[EventType.values().length]; + tempErrorArray = new boolean[EventType.values().length]; + } + + /** + * Loop through the list of requested covariates and pick out the value from the read, offset, and reference + * Using the list of covariate values as a key, pick out the RecalDatum and increment, + * adding one to the number of observations and potentially one to the number of mismatches for all three + * categories (mismatches, insertions and deletions). + * + * @param pileupElement The pileup element to update + * @param refBase The reference base at this locus + */ + public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) { + final int offset = pileupElement.getOffset(); + final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead()); + + tempQualArray[EventType.BASE_SUBSTITUTION.index] = pileupElement.getQual(); + tempErrorArray[EventType.BASE_SUBSTITUTION.index] = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase); + tempQualArray[EventType.BASE_INSERTION.index] = pileupElement.getBaseInsertionQual(); + tempErrorArray[EventType.BASE_INSERTION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterInsertion() : pileupElement.isBeforeInsertion(); + tempQualArray[EventType.BASE_DELETION.index] = pileupElement.getBaseDeletionQual(); + tempErrorArray[EventType.BASE_DELETION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterDeletedBase() : pileupElement.isBeforeDeletedBase(); + + for (final EventType eventType : EventType.values()) { + final int[] keys = readCovariates.getKeySet(offset, eventType); + final int eventIndex = eventType.index; + final byte qual = tempQualArray[eventIndex]; + final boolean isError = tempErrorArray[eventIndex]; + + final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); + final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); + final RecalDatum rgThisDatum = createDatumObject(qual, isError); + if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it + rgRecalTable.put(rgThisDatum, keys[0], eventIndex); + else + rgPreviousDatum.combine(rgThisDatum); + + final NestedIntegerArray qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); + final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex); + if (qualPreviousDatum == null) + qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex); + else + qualPreviousDatum.increment(isError); + + for (int i = 2; i < covariates.length; i++) { + if (keys[i] < 0) + continue; + final NestedIntegerArray covRecalTable = recalibrationTables.getTable(i); + final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex); + if (covPreviousDatum == null) + covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex); + else + covPreviousDatum.increment(isError); + } + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java index afa9620fd..fe88320c9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java @@ -33,11 +33,12 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.collections.NestedIntegerArray; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; @@ -47,6 +48,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils; import java.io.File; import java.io.FileNotFoundException; import java.io.PrintStream; +import java.lang.reflect.Constructor; import java.util.*; /** @@ -116,17 +118,15 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental) - private static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. - private static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. - private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ + private RecalibrationEngine recalibrationEngine; - private final byte[] tempQualArray = new byte[EventType.values().length]; // optimization: don't reallocate an array each time - private final boolean[] tempErrorArray = new boolean[EventType.values().length]; // optimization: don't reallocate an array each time + protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. + protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. + protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation."; - /** * Parse the -cov arguments and create a list of covariates to be used here * Based on the covariates' estimates for initial capacity allocate the data hashmap @@ -136,7 +136,6 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem if (RAC.FORCE_PLATFORM != null) RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; - if (RAC.knownSites.isEmpty() && !RAC.RUN_WITHOUT_DBSNP) // Warn the user if no dbSNP file or other variant mask was specified throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION); @@ -167,6 +166,34 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() ) numReadGroups += header.getReadGroups().size(); recalibrationTables = new RecalibrationTables(requestedCovariates, numReadGroups); + + recalibrationEngine = initializeRecalibrationEngine(); + recalibrationEngine.initialize(requestedCovariates, recalibrationTables); + } + + private RecalibrationEngine initializeRecalibrationEngine() { + List> REclasses = new PluginManager(RecalibrationEngine.class).getPlugins(); + if ( REclasses.isEmpty() ) + throw new ReviewedStingException("There are no classes found that extend RecalibrationEngine; repository must be corrupted"); + + Class c = null; + for ( Class REclass : REclasses ) { + if ( REclass.isAssignableFrom(ProtectedPackageSource.class) ) { + c = REclass; + break; + } + } + if ( c == null ) + c = REclasses.get(0); + + try { + Constructor constructor = c.getDeclaredConstructor((Class[])null); + constructor.setAccessible(true); + return (RecalibrationEngine)constructor.newInstance(); + } + catch (Exception e) { + throw new ReviewedStingException("Unable to create RecalibrationEngine class instance " + c.getSimpleName()); + } } private boolean readHasBeenSkipped(GATKSAMRecord read) { @@ -210,12 +237,10 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(read, requestedCovariates)); } - final byte refBase = ref.getBase(); - if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING || RecalDataManager.isColorSpaceConsistent(read, offset)) - updateDataForPileupElement(p, refBase); // This base finally passed all the checks for a good base, so add it to the big data hashmap + recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap } countedSites++; } @@ -283,81 +308,6 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem quantizationInfo = new QuantizationInfo(recalibrationTables, RAC.QUANTIZING_LEVELS); } - /** - * Loop through the list of requested covariates and pick out the value from the read, offset, and reference - * Using the list of covariate values as a key, pick out the RecalDatum and increment, - * adding one to the number of observations and potentially one to the number of mismatches for all three - * categories (mismatches, insertions and deletions). - * - * @param pileupElement The pileup element to update - * @param refBase The reference base at this locus - */ - private synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) { - final int offset = pileupElement.getOffset(); - final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead()); - - tempQualArray[EventType.BASE_SUBSTITUTION.index] = pileupElement.getQual(); - tempErrorArray[EventType.BASE_SUBSTITUTION.index] = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase); - tempQualArray[EventType.BASE_INSERTION.index] = pileupElement.getBaseInsertionQual(); - tempErrorArray[EventType.BASE_INSERTION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterInsertion() : pileupElement.isBeforeInsertion(); - tempQualArray[EventType.BASE_DELETION.index] = pileupElement.getBaseDeletionQual(); - tempErrorArray[EventType.BASE_DELETION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterDeletedBase() : pileupElement.isBeforeDeletedBase(); - - for (final EventType eventType : EventType.values()) { - final int[] keys = readCovariates.getKeySet(offset, eventType); - final int eventIndex = eventType.index; - final byte qual = tempQualArray[eventIndex]; - final boolean isError = tempErrorArray[eventIndex]; - - final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); - final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); - final RecalDatum rgThisDatum = createDatumObject(qual, isError); - if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it - rgRecalTable.put(rgThisDatum, keys[0], eventIndex); - else - rgPreviousDatum.combine(rgThisDatum); - - final NestedIntegerArray qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); - final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex); - if (qualPreviousDatum == null) - qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex); - else - qualPreviousDatum.increment(isError); - - for (int i = 2; i < requestedCovariates.length; i++) { - if (keys[i] < 0) - continue; - final NestedIntegerArray covRecalTable = recalibrationTables.getTable(i); - final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex); - if (covPreviousDatum == null) - covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex); - else - covPreviousDatum.increment(isError); - } - } - } - - /** - * creates a datum object with one observation and one or zero error - * - * @param reportedQual the quality score reported by the instrument for this base - * @param isError whether or not the observation is an error - * @return a new RecalDatum object with the observation and the error - */ - private RecalDatum createDatumObject(final byte reportedQual, final boolean isError) { - return new RecalDatum(1, isError ? 1:0, reportedQual); - } - - /** - * Get the covariate key set from a read - * - * @param read the read - * @return the covariate keysets for this read - */ - private ReadCovariates covariateKeySetFrom(GATKSAMRecord read) { - return (ReadCovariates) read.getTemporaryAttribute(COVARS_ATTRIBUTE); - } - private void generateReport() { PrintStream output; try { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java new file mode 100644 index 000000000..16e59ec30 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java @@ -0,0 +1,110 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.classloader.PublicPackageSource; +import org.broadinstitute.sting.utils.collections.NestedIntegerArray; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +public class RecalibrationEngine implements PublicPackageSource { + + protected Covariate[] covariates; + protected RecalibrationTables recalibrationTables; + + public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { + this.covariates = covariates; + this.recalibrationTables = recalibrationTables; + } + + /** + * Loop through the list of requested covariates and pick out the value from the read, offset, and reference + * Using the list of covariate values as a key, pick out the RecalDatum and increment, + * adding one to the number of observations and potentially one to the number of mismatches for mismatches only. + * + * @param pileupElement The pileup element to update + * @param refBase The reference base at this locus + */ + public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) { + final int offset = pileupElement.getOffset(); + final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead()); + + final byte qual = pileupElement.getQual(); + final boolean isError = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase); + + final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION); + final int eventIndex = EventType.BASE_SUBSTITUTION.index; + + final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); + final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); + final RecalDatum rgThisDatum = createDatumObject(qual, isError); + if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it + rgRecalTable.put(rgThisDatum, keys[0], eventIndex); + else + rgPreviousDatum.combine(rgThisDatum); + + final NestedIntegerArray qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); + final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex); + if (qualPreviousDatum == null) + qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex); + else + qualPreviousDatum.increment(isError); + + for (int i = 2; i < covariates.length; i++) { + if (keys[i] < 0) + continue; + final NestedIntegerArray covRecalTable = recalibrationTables.getTable(i); + final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex); + if (covPreviousDatum == null) + covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex); + else + covPreviousDatum.increment(isError); + } + } + + /** + * creates a datum object with one observation and one or zero error + * + * @param reportedQual the quality score reported by the instrument for this base + * @param isError whether or not the observation is an error + * @return a new RecalDatum object with the observation and the error + */ + protected RecalDatum createDatumObject(final byte reportedQual, final boolean isError) { + return new RecalDatum(1, isError ? 1:0, reportedQual); + } + + /** + * Get the covariate key set from a read + * + * @param read the read + * @return the covariate keysets for this read + */ + protected ReadCovariates covariateKeySetFrom(GATKSAMRecord read) { + return (ReadCovariates) read.getTemporaryAttribute(BaseQualityScoreRecalibrator.COVARS_ATTRIBUTE); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/ProtectedPackageSource.java b/public/java/src/org/broadinstitute/sting/utils/classloader/ProtectedPackageSource.java new file mode 100755 index 000000000..c1da5fb02 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/ProtectedPackageSource.java @@ -0,0 +1,3 @@ +package org.broadinstitute.sting.utils.classloader; + +public interface ProtectedPackageSource {} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PublicPackageSource.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PublicPackageSource.java new file mode 100755 index 000000000..9e2b33aae --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PublicPackageSource.java @@ -0,0 +1,3 @@ +package org.broadinstitute.sting.utils.classloader; + +public interface PublicPackageSource {} \ No newline at end of file