Total rewrite of the isGATKLite() functionality with help of Khalid/David. PluginManager was not working for us.

This commit is contained in:
Eric Banks 2012-07-17 15:11:03 -04:00
parent 110886e8b9
commit 305db8c0d1
8 changed files with 171 additions and 122 deletions

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.utils; package org.broadinstitute.sting.gatk;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
@ -27,7 +27,7 @@ package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
public class DummyProtectedWalker implements ProtectedPackageSource { public class DummyProtectedClass implements ProtectedPackageSource {
// THIS CLASS IS USED JUST SO THAT WE CAN TEST WHETHER WE ARE USING THE LITE OR FULL VERSION OF THE GATK // THIS CLASS IS USED JUST SO THAT WE CAN TEST WHETHER WE ARE USING THE LITE OR FULL VERSION OF THE GATK
// **** DO NOT REMOVE! **** // **** DO NOT REMOVE! ****

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
public class AdvancedRecalibrationEngine extends RecalibrationEngine implements ProtectedPackageSource { public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
// optimizations: don't reallocate an array each time // optimizations: don't reallocate an array each time
private byte[] tempQualArray; private byte[] tempQualArray;

View File

@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
@ -199,20 +199,15 @@ public class GenomeAnalysisEngine {
public BaseRecalibration getBaseRecalibration() { return baseRecalibration; } public BaseRecalibration getBaseRecalibration() { return baseRecalibration; }
public boolean hasBaseRecalibration() { return baseRecalibration != null; } public boolean hasBaseRecalibration() { return baseRecalibration != null; }
public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan) { public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan) {
baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, disableIndelQuals, preserveQLessThan, isGATKLite()); baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, disableIndelQuals, preserveQLessThan);
} }
/** /**
* Utility method to determine whether this is the lite version of the GATK * Utility method to determine whether this is the lite version of the GATK
*/ */
public boolean isGATKLite() { public boolean isGATKLite() {
if ( isLiteVersion == null ) { return JVMUtils.isGATKLite();
isLiteVersion = !(new PluginManager<Object>(Object.class).exists(DummyProtectedWalkerName));
}
return isLiteVersion;
} }
private static final String DummyProtectedWalkerName = "DummyProtectedWalker";
private static Boolean isLiteVersion = null;
/** /**
* Actually run the GATK with the specified walker. * Actually run the GATK with the specified walker.

View File

@ -33,7 +33,6 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
@ -120,6 +119,8 @@ public class BaseQualityScoreRecalibrator extends LocusWalker<Long, Long> implem
private RecalibrationEngine recalibrationEngine; private RecalibrationEngine recalibrationEngine;
private int minimumQToUse;
protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
@ -133,6 +134,10 @@ public class BaseQualityScoreRecalibrator extends LocusWalker<Long, Long> implem
*/ */
public void initialize() { public void initialize() {
// check for unsupported access
if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals)
throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument");
if (RAC.FORCE_PLATFORM != null) if (RAC.FORCE_PLATFORM != null)
RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
@ -169,12 +174,14 @@ public class BaseQualityScoreRecalibrator extends LocusWalker<Long, Long> implem
recalibrationEngine = initializeRecalibrationEngine(); recalibrationEngine = initializeRecalibrationEngine();
recalibrationEngine.initialize(requestedCovariates, recalibrationTables); recalibrationEngine.initialize(requestedCovariates, recalibrationTables);
minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN;
} }
private RecalibrationEngine initializeRecalibrationEngine() { private RecalibrationEngine initializeRecalibrationEngine() {
List<Class<? extends RecalibrationEngine>> REclasses = new PluginManager<RecalibrationEngine>(RecalibrationEngine.class).getPlugins(); List<Class<? extends RecalibrationEngine>> REclasses = new PluginManager<RecalibrationEngine>(RecalibrationEngine.class).getPlugins();
if ( REclasses.isEmpty() ) if ( REclasses.isEmpty() )
throw new ReviewedStingException("There are no classes found that extend RecalibrationEngine; repository must be corrupted"); throw new ReviewedStingException("The RecalibrationEngine class is not found; repository must be corrupted");
Class c = null; Class c = null;
for ( Class<? extends RecalibrationEngine> REclass : REclasses ) { for ( Class<? extends RecalibrationEngine> REclass : REclasses ) {
@ -201,7 +208,7 @@ public class BaseQualityScoreRecalibrator extends LocusWalker<Long, Long> implem
} }
private boolean isLowQualityBase(GATKSAMRecord read, int offset) { private boolean isLowQualityBase(GATKSAMRecord read, int offset) {
return read.getBaseQualities()[offset] < QualityUtils.MIN_USABLE_Q_SCORE; return read.getBaseQualities()[offset] < minimumQToUse;
} }
private boolean readNotSeen(GATKSAMRecord read) { private boolean readNotSeen(GATKSAMRecord read) {

View File

@ -1,110 +1,36 @@
package org.broadinstitute.sting.gatk.walkers.bqsr; package org.broadinstitute.sting.gatk.walkers.bqsr;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
public class RecalibrationEngine implements PublicPackageSource { /*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
public interface RecalibrationEngine {
protected Covariate[] covariates; public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables);
protected RecalibrationTables recalibrationTables;
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase);
this.covariates = covariates;
this.recalibrationTables = recalibrationTables;
}
/**
* Loop through the list of requested covariates and pick out the value from the read, offset, and reference
* Using the list of covariate values as a key, pick out the RecalDatum and increment,
* adding one to the number of observations and potentially one to the number of mismatches for mismatches only.
*
* @param pileupElement The pileup element to update
* @param refBase The reference base at this locus
*/
public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
final int offset = pileupElement.getOffset();
final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead());
final byte qual = pileupElement.getQual();
final boolean isError = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase);
final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION);
final int eventIndex = EventType.BASE_SUBSTITUTION.index;
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else
rgPreviousDatum.combine(rgThisDatum);
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
if (qualPreviousDatum == null)
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
else
qualPreviousDatum.increment(isError);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
if (covPreviousDatum == null)
covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
else
covPreviousDatum.increment(isError);
}
}
/**
* creates a datum object with one observation and one or zero error
*
* @param reportedQual the quality score reported by the instrument for this base
* @param isError whether or not the observation is an error
* @return a new RecalDatum object with the observation and the error
*/
protected RecalDatum createDatumObject(final byte reportedQual, final boolean isError) {
return new RecalDatum(1, isError ? 1:0, reportedQual);
}
/**
* Get the covariate key set from a read
*
* @param read the read
* @return the covariate keysets for this read
*/
protected ReadCovariates covariateKeySetFrom(GATKSAMRecord read) {
return (ReadCovariates) read.getTemporaryAttribute(BaseQualityScoreRecalibrator.COVARS_ATTRIBUTE);
}
} }

View File

@ -0,0 +1,110 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
public class StandardRecalibrationEngine implements RecalibrationEngine, PublicPackageSource {
protected Covariate[] covariates;
protected RecalibrationTables recalibrationTables;
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
this.covariates = covariates;
this.recalibrationTables = recalibrationTables;
}
/**
* Loop through the list of requested covariates and pick out the value from the read, offset, and reference
* Using the list of covariate values as a key, pick out the RecalDatum and increment,
* adding one to the number of observations and potentially one to the number of mismatches for mismatches only.
*
* @param pileupElement The pileup element to update
* @param refBase The reference base at this locus
*/
public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
final int offset = pileupElement.getOffset();
final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead());
final byte qual = pileupElement.getQual();
final boolean isError = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase);
final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION);
final int eventIndex = EventType.BASE_SUBSTITUTION.index;
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else
rgPreviousDatum.combine(rgThisDatum);
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
if (qualPreviousDatum == null)
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
else
qualPreviousDatum.increment(isError);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
if (covPreviousDatum == null)
covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
else
covPreviousDatum.increment(isError);
}
}
/**
* creates a datum object with one observation and one or zero error
*
* @param reportedQual the quality score reported by the instrument for this base
* @param isError whether or not the observation is an error
* @return a new RecalDatum object with the observation and the error
*/
protected RecalDatum createDatumObject(final byte reportedQual, final boolean isError) {
return new RecalDatum(1, isError ? 1:0, reportedQual);
}
/**
* Get the covariate key set from a read
*
* @param read the read
* @return the covariate keysets for this read
*/
protected ReadCovariates covariateKeySetFrom(GATKSAMRecord read) {
return (ReadCovariates) read.getTemporaryAttribute(BaseQualityScoreRecalibrator.COVARS_ATTRIBUTE);
}
}

View File

@ -105,6 +105,23 @@ public class JVMUtils {
return allFields; return allFields;
} }
/**
* Utility method to determine whether this is the lite version of the GATK
*/
public static boolean isGATKLite() {
if ( isLiteVersion == null ) {
try {
Class.forName(DummyProtectedClassName);
isLiteVersion = false;
} catch ( ClassNotFoundException e) {
isLiteVersion = true;
}
}
return isLiteVersion;
}
private static final String DummyProtectedClassName = "org.broadinstitute.sting.gatk.DummyProtectedClass";
private static Boolean isLiteVersion = null;
/** /**
* Find the field with the given name in the class. Will inspect all fields, independent * Find the field with the given name in the class. Will inspect all fields, independent
* of access level. * of access level.

View File

@ -30,7 +30,6 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.collections.NestedHashMap;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File; import java.io.File;
@ -66,13 +65,8 @@ public class BaseRecalibration {
* @param quantizationLevels number of bins to quantize the quality scores * @param quantizationLevels number of bins to quantize the quality scores
* @param disableIndelQuals if true, do not emit base indel qualities * @param disableIndelQuals if true, do not emit base indel qualities
* @param preserveQLessThan preserve quality scores less than this value * @param preserveQLessThan preserve quality scores less than this value
* @param isGATKLite is this being called from the full or Lite version of the GATK
*/ */
public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan, final boolean isGATKLite) { public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan) {
// check for unsupported access
if (isGATKLite && !disableIndelQuals)
throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument");
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE); RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
recalibrationTables = recalibrationReport.getRecalibrationTables(); recalibrationTables = recalibrationReport.getRecalibrationTables();