diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java new file mode 100644 index 000000000..04e805988 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java @@ -0,0 +1,99 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; +import org.broadinstitute.sting.utils.collections.NestedIntegerArray; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; + +public class AdvancedRecalibrationEngine extends RecalibrationEngine implements ProtectedPackageSource { + + // optimizations: don't reallocate an array each time + private byte[] tempQualArray; + private boolean[] tempErrorArray; + + public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { + super.initialize(covariates, recalibrationTables); + tempQualArray = new byte[EventType.values().length]; + tempErrorArray = new boolean[EventType.values().length]; + } + + /** + * Loop through the list of requested covariates and pick out the value from the read, offset, and reference + * Using the list of covariate values as a key, pick out the RecalDatum and increment, + * adding one to the number of observations and potentially one to the number of mismatches for all three + * categories (mismatches, insertions and deletions). + * + * @param pileupElement The pileup element to update + * @param refBase The reference base at this locus + */ + public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) { + final int offset = pileupElement.getOffset(); + final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead()); + + tempQualArray[EventType.BASE_SUBSTITUTION.index] = pileupElement.getQual(); + tempErrorArray[EventType.BASE_SUBSTITUTION.index] = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase); + tempQualArray[EventType.BASE_INSERTION.index] = pileupElement.getBaseInsertionQual(); + tempErrorArray[EventType.BASE_INSERTION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterInsertion() : pileupElement.isBeforeInsertion(); + tempQualArray[EventType.BASE_DELETION.index] = pileupElement.getBaseDeletionQual(); + tempErrorArray[EventType.BASE_DELETION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterDeletedBase() : pileupElement.isBeforeDeletedBase(); + + for (final EventType eventType : EventType.values()) { + final int[] keys = readCovariates.getKeySet(offset, eventType); + final int eventIndex = eventType.index; + final byte qual = tempQualArray[eventIndex]; + final boolean isError = tempErrorArray[eventIndex]; + + final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); + final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); + final RecalDatum rgThisDatum = createDatumObject(qual, isError); + if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it + rgRecalTable.put(rgThisDatum, keys[0], eventIndex); + else + rgPreviousDatum.combine(rgThisDatum); + + final NestedIntegerArray qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); + final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex); + if (qualPreviousDatum == null) + qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex); + else + qualPreviousDatum.increment(isError); + + for (int i = 2; i < covariates.length; i++) { + if (keys[i] < 0) + continue; + final NestedIntegerArray covRecalTable = recalibrationTables.getTable(i); + final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex); + if (covPreviousDatum == null) + covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex); + else + covPreviousDatum.increment(isError); + } + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java index afa9620fd..fe88320c9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java @@ -33,11 +33,12 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.collections.NestedIntegerArray; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; @@ -47,6 +48,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils; import java.io.File; import java.io.FileNotFoundException; import java.io.PrintStream; +import java.lang.reflect.Constructor; import java.util.*; /** @@ -116,17 +118,15 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental) - private static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. - private static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. - private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ + private RecalibrationEngine recalibrationEngine; - private final byte[] tempQualArray = new byte[EventType.values().length]; // optimization: don't reallocate an array each time - private final boolean[] tempErrorArray = new boolean[EventType.values().length]; // optimization: don't reallocate an array each time + protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. + protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. + protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation."; - /** * Parse the -cov arguments and create a list of covariates to be used here * Based on the covariates' estimates for initial capacity allocate the data hashmap @@ -136,7 +136,6 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem if (RAC.FORCE_PLATFORM != null) RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; - if (RAC.knownSites.isEmpty() && !RAC.RUN_WITHOUT_DBSNP) // Warn the user if no dbSNP file or other variant mask was specified throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION); @@ -167,6 +166,34 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() ) numReadGroups += header.getReadGroups().size(); recalibrationTables = new RecalibrationTables(requestedCovariates, numReadGroups); + + recalibrationEngine = initializeRecalibrationEngine(); + recalibrationEngine.initialize(requestedCovariates, recalibrationTables); + } + + private RecalibrationEngine initializeRecalibrationEngine() { + List> REclasses = new PluginManager(RecalibrationEngine.class).getPlugins(); + if ( REclasses.isEmpty() ) + throw new ReviewedStingException("There are no classes found that extend RecalibrationEngine; repository must be corrupted"); + + Class c = null; + for ( Class REclass : REclasses ) { + if ( REclass.isAssignableFrom(ProtectedPackageSource.class) ) { + c = REclass; + break; + } + } + if ( c == null ) + c = REclasses.get(0); + + try { + Constructor constructor = c.getDeclaredConstructor((Class[])null); + constructor.setAccessible(true); + return (RecalibrationEngine)constructor.newInstance(); + } + catch (Exception e) { + throw new ReviewedStingException("Unable to create RecalibrationEngine class instance " + c.getSimpleName()); + } } private boolean readHasBeenSkipped(GATKSAMRecord read) { @@ -210,12 +237,10 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(read, requestedCovariates)); } - final byte refBase = ref.getBase(); - if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING || RecalDataManager.isColorSpaceConsistent(read, offset)) - updateDataForPileupElement(p, refBase); // This base finally passed all the checks for a good base, so add it to the big data hashmap + recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap } countedSites++; } @@ -283,81 +308,6 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem quantizationInfo = new QuantizationInfo(recalibrationTables, RAC.QUANTIZING_LEVELS); } - /** - * Loop through the list of requested covariates and pick out the value from the read, offset, and reference - * Using the list of covariate values as a key, pick out the RecalDatum and increment, - * adding one to the number of observations and potentially one to the number of mismatches for all three - * categories (mismatches, insertions and deletions). - * - * @param pileupElement The pileup element to update - * @param refBase The reference base at this locus - */ - private synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) { - final int offset = pileupElement.getOffset(); - final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead()); - - tempQualArray[EventType.BASE_SUBSTITUTION.index] = pileupElement.getQual(); - tempErrorArray[EventType.BASE_SUBSTITUTION.index] = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase); - tempQualArray[EventType.BASE_INSERTION.index] = pileupElement.getBaseInsertionQual(); - tempErrorArray[EventType.BASE_INSERTION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterInsertion() : pileupElement.isBeforeInsertion(); - tempQualArray[EventType.BASE_DELETION.index] = pileupElement.getBaseDeletionQual(); - tempErrorArray[EventType.BASE_DELETION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterDeletedBase() : pileupElement.isBeforeDeletedBase(); - - for (final EventType eventType : EventType.values()) { - final int[] keys = readCovariates.getKeySet(offset, eventType); - final int eventIndex = eventType.index; - final byte qual = tempQualArray[eventIndex]; - final boolean isError = tempErrorArray[eventIndex]; - - final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); - final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); - final RecalDatum rgThisDatum = createDatumObject(qual, isError); - if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it - rgRecalTable.put(rgThisDatum, keys[0], eventIndex); - else - rgPreviousDatum.combine(rgThisDatum); - - final NestedIntegerArray qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); - final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex); - if (qualPreviousDatum == null) - qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex); - else - qualPreviousDatum.increment(isError); - - for (int i = 2; i < requestedCovariates.length; i++) { - if (keys[i] < 0) - continue; - final NestedIntegerArray covRecalTable = recalibrationTables.getTable(i); - final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex); - if (covPreviousDatum == null) - covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex); - else - covPreviousDatum.increment(isError); - } - } - } - - /** - * creates a datum object with one observation and one or zero error - * - * @param reportedQual the quality score reported by the instrument for this base - * @param isError whether or not the observation is an error - * @return a new RecalDatum object with the observation and the error - */ - private RecalDatum createDatumObject(final byte reportedQual, final boolean isError) { - return new RecalDatum(1, isError ? 1:0, reportedQual); - } - - /** - * Get the covariate key set from a read - * - * @param read the read - * @return the covariate keysets for this read - */ - private ReadCovariates covariateKeySetFrom(GATKSAMRecord read) { - return (ReadCovariates) read.getTemporaryAttribute(COVARS_ATTRIBUTE); - } - private void generateReport() { PrintStream output; try { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java new file mode 100644 index 000000000..16e59ec30 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java @@ -0,0 +1,110 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.classloader.PublicPackageSource; +import org.broadinstitute.sting.utils.collections.NestedIntegerArray; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +public class RecalibrationEngine implements PublicPackageSource { + + protected Covariate[] covariates; + protected RecalibrationTables recalibrationTables; + + public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { + this.covariates = covariates; + this.recalibrationTables = recalibrationTables; + } + + /** + * Loop through the list of requested covariates and pick out the value from the read, offset, and reference + * Using the list of covariate values as a key, pick out the RecalDatum and increment, + * adding one to the number of observations and potentially one to the number of mismatches for mismatches only. + * + * @param pileupElement The pileup element to update + * @param refBase The reference base at this locus + */ + public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) { + final int offset = pileupElement.getOffset(); + final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead()); + + final byte qual = pileupElement.getQual(); + final boolean isError = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase); + + final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION); + final int eventIndex = EventType.BASE_SUBSTITUTION.index; + + final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); + final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); + final RecalDatum rgThisDatum = createDatumObject(qual, isError); + if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it + rgRecalTable.put(rgThisDatum, keys[0], eventIndex); + else + rgPreviousDatum.combine(rgThisDatum); + + final NestedIntegerArray qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); + final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex); + if (qualPreviousDatum == null) + qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex); + else + qualPreviousDatum.increment(isError); + + for (int i = 2; i < covariates.length; i++) { + if (keys[i] < 0) + continue; + final NestedIntegerArray covRecalTable = recalibrationTables.getTable(i); + final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex); + if (covPreviousDatum == null) + covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex); + else + covPreviousDatum.increment(isError); + } + } + + /** + * creates a datum object with one observation and one or zero error + * + * @param reportedQual the quality score reported by the instrument for this base + * @param isError whether or not the observation is an error + * @return a new RecalDatum object with the observation and the error + */ + protected RecalDatum createDatumObject(final byte reportedQual, final boolean isError) { + return new RecalDatum(1, isError ? 1:0, reportedQual); + } + + /** + * Get the covariate key set from a read + * + * @param read the read + * @return the covariate keysets for this read + */ + protected ReadCovariates covariateKeySetFrom(GATKSAMRecord read) { + return (ReadCovariates) read.getTemporaryAttribute(BaseQualityScoreRecalibrator.COVARS_ATTRIBUTE); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/ProtectedPackageSource.java b/public/java/src/org/broadinstitute/sting/utils/classloader/ProtectedPackageSource.java new file mode 100755 index 000000000..c1da5fb02 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/ProtectedPackageSource.java @@ -0,0 +1,3 @@ +package org.broadinstitute.sting.utils.classloader; + +public interface ProtectedPackageSource {} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PublicPackageSource.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PublicPackageSource.java new file mode 100755 index 000000000..9e2b33aae --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PublicPackageSource.java @@ -0,0 +1,3 @@ +package org.broadinstitute.sting.utils.classloader; + +public interface PublicPackageSource {} \ No newline at end of file