Total rewrite of the isGATKLite() functionality with help of Khalid/David. PluginManager was not working for us.

This commit is contained in:
Eric Banks 2012-07-17 15:11:03 -04:00
parent 110886e8b9
commit 305db8c0d1
8 changed files with 171 additions and 122 deletions

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.utils;
package org.broadinstitute.sting.gatk;
/*
* Copyright (c) 2009 The Broad Institute
@ -27,7 +27,7 @@ package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
public class DummyProtectedWalker implements ProtectedPackageSource {
public class DummyProtectedClass implements ProtectedPackageSource {
// THIS CLASS IS USED JUST SO THAT WE CAN TEST WHETHER WE ARE USING THE LITE OR FULL VERSION OF THE GATK
// **** DO NOT REMOVE! ****

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
public class AdvancedRecalibrationEngine extends RecalibrationEngine implements ProtectedPackageSource {
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
// optimizations: don't reallocate an array each time
private byte[] tempQualArray;

View File

@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.collections.Pair;
@ -199,20 +199,15 @@ public class GenomeAnalysisEngine {
public BaseRecalibration getBaseRecalibration() { return baseRecalibration; }
public boolean hasBaseRecalibration() { return baseRecalibration != null; }
public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan) {
baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, disableIndelQuals, preserveQLessThan, isGATKLite());
baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, disableIndelQuals, preserveQLessThan);
}
/**
* Utility method to determine whether this is the lite version of the GATK
*/
public boolean isGATKLite() {
if ( isLiteVersion == null ) {
isLiteVersion = !(new PluginManager<Object>(Object.class).exists(DummyProtectedWalkerName));
}
return isLiteVersion;
return JVMUtils.isGATKLite();
}
private static final String DummyProtectedWalkerName = "DummyProtectedWalker";
private static Boolean isLiteVersion = null;
/**
* Actually run the GATK with the specified walker.

View File

@ -33,7 +33,6 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
@ -120,6 +119,8 @@ public class BaseQualityScoreRecalibrator extends LocusWalker<Long, Long> implem
private RecalibrationEngine recalibrationEngine;
private int minimumQToUse;
protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
@ -133,6 +134,10 @@ public class BaseQualityScoreRecalibrator extends LocusWalker<Long, Long> implem
*/
public void initialize() {
// check for unsupported access
if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals)
throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument");
if (RAC.FORCE_PLATFORM != null)
RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
@ -169,12 +174,14 @@ public class BaseQualityScoreRecalibrator extends LocusWalker<Long, Long> implem
recalibrationEngine = initializeRecalibrationEngine();
recalibrationEngine.initialize(requestedCovariates, recalibrationTables);
minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN;
}
private RecalibrationEngine initializeRecalibrationEngine() {
List<Class<? extends RecalibrationEngine>> REclasses = new PluginManager<RecalibrationEngine>(RecalibrationEngine.class).getPlugins();
if ( REclasses.isEmpty() )
throw new ReviewedStingException("There are no classes found that extend RecalibrationEngine; repository must be corrupted");
throw new ReviewedStingException("The RecalibrationEngine class is not found; repository must be corrupted");
Class c = null;
for ( Class<? extends RecalibrationEngine> REclass : REclasses ) {
@ -201,7 +208,7 @@ public class BaseQualityScoreRecalibrator extends LocusWalker<Long, Long> implem
}
private boolean isLowQualityBase(GATKSAMRecord read, int offset) {
return read.getBaseQualities()[offset] < QualityUtils.MIN_USABLE_Q_SCORE;
return read.getBaseQualities()[offset] < minimumQToUse;
}
private boolean readNotSeen(GATKSAMRecord read) {

View File

@ -1,110 +1,36 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
public class RecalibrationEngine implements PublicPackageSource {
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
public interface RecalibrationEngine {
protected Covariate[] covariates;
protected RecalibrationTables recalibrationTables;
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables);
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
this.covariates = covariates;
this.recalibrationTables = recalibrationTables;
}
public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase);
/**
* Loop through the list of requested covariates and pick out the value from the read, offset, and reference
* Using the list of covariate values as a key, pick out the RecalDatum and increment,
* adding one to the number of observations and potentially one to the number of mismatches for mismatches only.
*
* @param pileupElement The pileup element to update
* @param refBase The reference base at this locus
*/
public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
final int offset = pileupElement.getOffset();
final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead());
final byte qual = pileupElement.getQual();
final boolean isError = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase);
final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION);
final int eventIndex = EventType.BASE_SUBSTITUTION.index;
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else
rgPreviousDatum.combine(rgThisDatum);
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
if (qualPreviousDatum == null)
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
else
qualPreviousDatum.increment(isError);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
if (covPreviousDatum == null)
covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
else
covPreviousDatum.increment(isError);
}
}
/**
* creates a datum object with one observation and one or zero error
*
* @param reportedQual the quality score reported by the instrument for this base
* @param isError whether or not the observation is an error
* @return a new RecalDatum object with the observation and the error
*/
protected RecalDatum createDatumObject(final byte reportedQual, final boolean isError) {
return new RecalDatum(1, isError ? 1:0, reportedQual);
}
/**
* Get the covariate key set from a read
*
* @param read the read
* @return the covariate keysets for this read
*/
protected ReadCovariates covariateKeySetFrom(GATKSAMRecord read) {
return (ReadCovariates) read.getTemporaryAttribute(BaseQualityScoreRecalibrator.COVARS_ATTRIBUTE);
}
}

View File

@ -0,0 +1,110 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
public class StandardRecalibrationEngine implements RecalibrationEngine, PublicPackageSource {
protected Covariate[] covariates;
protected RecalibrationTables recalibrationTables;
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
this.covariates = covariates;
this.recalibrationTables = recalibrationTables;
}
/**
* Loop through the list of requested covariates and pick out the value from the read, offset, and reference
* Using the list of covariate values as a key, pick out the RecalDatum and increment,
* adding one to the number of observations and potentially one to the number of mismatches for mismatches only.
*
* @param pileupElement The pileup element to update
* @param refBase The reference base at this locus
*/
public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
final int offset = pileupElement.getOffset();
final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead());
final byte qual = pileupElement.getQual();
final boolean isError = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase);
final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION);
final int eventIndex = EventType.BASE_SUBSTITUTION.index;
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else
rgPreviousDatum.combine(rgThisDatum);
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
if (qualPreviousDatum == null)
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
else
qualPreviousDatum.increment(isError);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
if (covPreviousDatum == null)
covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
else
covPreviousDatum.increment(isError);
}
}
/**
* creates a datum object with one observation and one or zero error
*
* @param reportedQual the quality score reported by the instrument for this base
* @param isError whether or not the observation is an error
* @return a new RecalDatum object with the observation and the error
*/
protected RecalDatum createDatumObject(final byte reportedQual, final boolean isError) {
return new RecalDatum(1, isError ? 1:0, reportedQual);
}
/**
* Get the covariate key set from a read
*
* @param read the read
* @return the covariate keysets for this read
*/
protected ReadCovariates covariateKeySetFrom(GATKSAMRecord read) {
return (ReadCovariates) read.getTemporaryAttribute(BaseQualityScoreRecalibrator.COVARS_ATTRIBUTE);
}
}

View File

@ -105,6 +105,23 @@ public class JVMUtils {
return allFields;
}
/**
* Utility method to determine whether this is the lite version of the GATK
*/
public static boolean isGATKLite() {
if ( isLiteVersion == null ) {
try {
Class.forName(DummyProtectedClassName);
isLiteVersion = false;
} catch ( ClassNotFoundException e) {
isLiteVersion = true;
}
}
return isLiteVersion;
}
private static final String DummyProtectedClassName = "org.broadinstitute.sting.gatk.DummyProtectedClass";
private static Boolean isLiteVersion = null;
/**
* Find the field with the given name in the class. Will inspect all fields, independent
* of access level.

View File

@ -30,7 +30,6 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.collections.NestedHashMap;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File;
@ -66,13 +65,8 @@ public class BaseRecalibration {
* @param quantizationLevels number of bins to quantize the quality scores
* @param disableIndelQuals if true, do not emit base indel qualities
* @param preserveQLessThan preserve quality scores less than this value
* @param isGATKLite is this being called from the full or Lite version of the GATK
*/
public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan, final boolean isGATKLite) {
// check for unsupported access
if (isGATKLite && !disableIndelQuals)
throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument");
public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan) {
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
recalibrationTables = recalibrationReport.getRecalibrationTables();