Changing the commenting style in the BQSR
This commit is contained in:
parent
19cc0b373e
commit
e12ae65d33
|
|
@ -80,7 +80,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
|||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
||||
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
|
||||
else
|
||||
rgPreviousDatum.combine(rgThisDatum);
|
||||
|
|
@ -126,7 +126,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
|||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
||||
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
|
||||
else
|
||||
rgPreviousDatum.combine(rgThisDatum);
|
||||
|
|
|
|||
|
|
@ -32,13 +32,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -299,6 +297,4 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
|
||||
return table;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -106,26 +106,26 @@ import java.util.ArrayList;
|
|||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||
@By(DataSource.READS)
|
||||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
|
||||
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
|
||||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
|
||||
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
|
||||
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> {
|
||||
@ArgumentCollection
|
||||
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
|
||||
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
|
||||
|
||||
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
|
||||
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
|
||||
|
||||
private RecalibrationTables recalibrationTables;
|
||||
|
||||
private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental)
|
||||
private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental)
|
||||
|
||||
private RecalibrationEngine recalibrationEngine;
|
||||
|
||||
private int minimumQToUse;
|
||||
|
||||
protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
|
||||
protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
|
||||
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
|
||||
protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
|
||||
protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
|
||||
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
|
||||
|
||||
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.";
|
||||
|
||||
|
|
@ -143,16 +143,16 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
if (RAC.FORCE_PLATFORM != null)
|
||||
RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
|
||||
|
||||
if (RAC.knownSites.isEmpty() && !RAC.RUN_WITHOUT_DBSNP) // Warn the user if no dbSNP file or other variant mask was specified
|
||||
if (RAC.knownSites.isEmpty() && !RAC.RUN_WITHOUT_DBSNP) // Warn the user if no dbSNP file or other variant mask was specified
|
||||
throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);
|
||||
|
||||
if (RAC.LIST_ONLY) {
|
||||
RecalUtils.listAvailableCovariates(logger);
|
||||
System.exit(0);
|
||||
}
|
||||
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
|
||||
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
|
||||
|
||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
||||
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
||||
|
||||
|
|
@ -164,9 +164,9 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
requestedCovariates[covariateIndex++] = covariate;
|
||||
|
||||
logger.info("The covariates being used here: ");
|
||||
for (Covariate cov : requestedCovariates) { // list all the covariates being used
|
||||
for (Covariate cov : requestedCovariates) { // list all the covariates being used
|
||||
logger.info("\t" + cov.getClass().getSimpleName());
|
||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||
}
|
||||
|
||||
int numReadGroups = 0;
|
||||
|
|
@ -216,12 +216,14 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
*/
|
||||
public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
long countedSites = 0L;
|
||||
if (tracker.getValues(RAC.knownSites).size() == 0) { // Only analyze sites not present in the provided known sites
|
||||
// Only analyze sites not present in the provided known sites
|
||||
if (tracker.getValues(RAC.knownSites).size() == 0) {
|
||||
for (final PileupElement p : context.getBasePileup()) {
|
||||
final GATKSAMRecord read = p.getRead();
|
||||
final int offset = p.getOffset();
|
||||
|
||||
if (readHasBeenSkipped(read) || isLowQualityBase(read, offset)) // This read has been marked to be skipped or base is low quality (we don't recalibrate low quality bases)
|
||||
// This read has been marked to be skipped or base is low quality (we don't recalibrate low quality bases)
|
||||
if (readHasBeenSkipped(read) || isLowQualityBase(read, offset))
|
||||
continue;
|
||||
|
||||
if (readNotSeen(read)) {
|
||||
|
|
@ -234,10 +236,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
|
||||
}
|
||||
|
||||
if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
|
||||
// SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
|
||||
if (!ReadUtils.isSOLiDRead(read) ||
|
||||
RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
|
||||
RecalUtils.isColorSpaceConsistent(read, offset))
|
||||
recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap
|
||||
// This base finally passed all the checks for a good base, so add it to the big data hashmap
|
||||
recalibrationEngine.updateDataForPileupElement(p, ref.getBase());
|
||||
}
|
||||
countedSites++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
|||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
||||
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
|
||||
else
|
||||
rgPreviousDatum.combine(rgThisDatum);
|
||||
|
|
|
|||
|
|
@ -48,9 +48,9 @@ public class BaseRecalibration {
|
|||
private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000;
|
||||
private final ReadCovariates readCovariates;
|
||||
|
||||
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
||||
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
||||
private final RecalibrationTables recalibrationTables;
|
||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||
|
||||
private final boolean disableIndelQuals;
|
||||
private final int preserveQLessThan;
|
||||
|
|
@ -76,9 +76,9 @@ public class BaseRecalibration {
|
|||
recalibrationTables = recalibrationReport.getRecalibrationTables();
|
||||
requestedCovariates = recalibrationReport.getRequestedCovariates();
|
||||
quantizationInfo = recalibrationReport.getQuantizationInfo();
|
||||
if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores
|
||||
if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores
|
||||
quantizationInfo.noQuantization();
|
||||
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report.
|
||||
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report.
|
||||
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
||||
|
||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
|
|
@ -103,24 +103,26 @@ public class BaseRecalibration {
|
|||
}
|
||||
}
|
||||
|
||||
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
// Compute all covariates for the read
|
||||
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
|
||||
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
|
||||
read.setBaseQualities(null, errorModel);
|
||||
continue;
|
||||
}
|
||||
|
||||
final byte[] quals = read.getBaseQualities(errorModel);
|
||||
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model
|
||||
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model
|
||||
|
||||
final int readLength = read.getReadLength();
|
||||
for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read
|
||||
for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read
|
||||
|
||||
final byte originalQualityScore = quals[offset];
|
||||
|
||||
if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model
|
||||
final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model
|
||||
final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
quals[offset] = recalibratedQualityScore;
|
||||
}
|
||||
}
|
||||
|
|
@ -152,10 +154,10 @@ public class BaseRecalibration {
|
|||
final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead);
|
||||
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, key, errorModel, globalDeltaQ, deltaQReported, qualFromRead);
|
||||
|
||||
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
||||
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
||||
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
||||
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
||||
|
||||
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
||||
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
||||
}
|
||||
|
||||
private double calculateGlobalDeltaQ(final NestedIntegerArray<RecalDatum> table, final int[] key, final EventType errorModel) {
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ public class QuantizationInfo {
|
|||
}
|
||||
|
||||
public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) {
|
||||
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
|
||||
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
|
||||
for (int i = 0; i < qualHistogram.length; i++)
|
||||
qualHistogram[i] = 0L;
|
||||
|
||||
|
|
@ -38,10 +38,10 @@ public class QuantizationInfo {
|
|||
|
||||
for (final RecalDatum value : qualTable.getAllValues()) {
|
||||
final RecalDatum datum = value;
|
||||
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
|
||||
qualHistogram[empiricalQual] += (long) datum.getNumObservations(); // add the number of observations for every key
|
||||
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
|
||||
qualHistogram[empiricalQual] += (long) datum.getNumObservations(); // add the number of observations for every key
|
||||
}
|
||||
empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities
|
||||
empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities
|
||||
quantizeQualityScores(quantizationLevels);
|
||||
|
||||
this.quantizationLevels = quantizationLevels;
|
||||
|
|
@ -49,8 +49,8 @@ public class QuantizationInfo {
|
|||
|
||||
|
||||
public void quantizeQualityScores(int nLevels) {
|
||||
QualQuantizer quantizer = new QualQuantizer(empiricalQualCounts, nLevels, QualityUtils.MIN_USABLE_Q_SCORE); // quantize the qualities to the desired number of levels
|
||||
quantizedQuals = quantizer.getOriginalToQuantizedMap(); // map with the original to quantized qual map (using the standard number of levels in the RAC)
|
||||
QualQuantizer quantizer = new QualQuantizer(empiricalQualCounts, nLevels, QualityUtils.MIN_USABLE_Q_SCORE); // quantize the qualities to the desired number of levels
|
||||
quantizedQuals = quantizer.getOriginalToQuantizedMap(); // map with the original to quantized qual map (using the standard number of levels in the RAC)
|
||||
}
|
||||
|
||||
public void noQuantization() {
|
||||
|
|
|
|||
|
|
@ -81,8 +81,8 @@ public class RecalUtils {
|
|||
public final static String NUMBER_OBSERVATIONS_COLUMN_NAME = "Observations";
|
||||
public final static String NUMBER_ERRORS_COLUMN_NAME = "Errors";
|
||||
|
||||
private final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
||||
private final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
||||
private final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
||||
private final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
||||
private static boolean warnUserNullPlatform = false;
|
||||
|
||||
private static final String SCRIPT_FILE = "BQSR.R";
|
||||
|
|
@ -111,12 +111,13 @@ public class RecalUtils {
|
|||
final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
|
||||
final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
|
||||
|
||||
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
|
||||
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
|
||||
ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES)
|
||||
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
|
||||
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
|
||||
|
||||
if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified
|
||||
// parse the -cov arguments that were provided, skipping over the ones already specified
|
||||
if (argumentCollection.COVARIATES != null) {
|
||||
for (String requestedCovariateString : argumentCollection.COVARIATES) {
|
||||
// help the transition from BQSR v1 to BQSR v2
|
||||
if ( requestedCovariateString.equals("DinucCovariate") )
|
||||
|
|
@ -126,12 +127,12 @@ public class RecalUtils {
|
|||
|
||||
boolean foundClass = false;
|
||||
for (Class<? extends Covariate> covClass : covariateClasses) {
|
||||
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
|
||||
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
|
||||
foundClass = true;
|
||||
if (!requiredClasses.contains(covClass) &&
|
||||
(argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
|
||||
try {
|
||||
final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it
|
||||
final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it
|
||||
optionalCovariates.add(covariate);
|
||||
} catch (Exception e) {
|
||||
throw new DynamicClassResolutionException(covClass, e);
|
||||
|
|
@ -161,7 +162,7 @@ public class RecalUtils {
|
|||
if (classes.size() != 2)
|
||||
throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected");
|
||||
|
||||
dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next.
|
||||
dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next.
|
||||
dest.add(new QualityScoreCovariate());
|
||||
return dest;
|
||||
}
|
||||
|
|
@ -266,20 +267,20 @@ public class RecalUtils {
|
|||
|
||||
for (int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++) {
|
||||
|
||||
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
||||
columnNames.add(covariateValue);
|
||||
columnNames.add(covariateName);
|
||||
}
|
||||
}
|
||||
|
||||
columnNames.add(eventType); // the order of these column names is important here
|
||||
columnNames.add(eventType); // the order of these column names is important here
|
||||
columnNames.add(empiricalQuality);
|
||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
||||
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
|
||||
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
|
||||
columnNames.add(nObservations);
|
||||
columnNames.add(nErrors);
|
||||
|
||||
|
|
@ -288,7 +289,7 @@ public class RecalUtils {
|
|||
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size());
|
||||
for (final Pair<String, String> columnName : columnNames)
|
||||
reportTable.addColumn(columnName.getFirst(), columnName.getSecond());
|
||||
rowIndex = 0; // reset the row index since we're starting with a new table
|
||||
rowIndex = 0; // reset the row index since we're starting with a new table
|
||||
} else {
|
||||
reportTable = result.get(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index);
|
||||
}
|
||||
|
|
@ -316,7 +317,7 @@ public class RecalUtils {
|
|||
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
|
||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), Math.round(datum.getNumObservations()));
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), Math.round(datum.getNumMismatches()));
|
||||
|
||||
|
|
@ -349,7 +350,6 @@ public class RecalUtils {
|
|||
return Utils.join(",", names);
|
||||
}
|
||||
|
||||
|
||||
public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
|
||||
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
|
||||
}
|
||||
|
|
@ -410,13 +410,13 @@ public class RecalUtils {
|
|||
|
||||
// add the quality score table to the delta table
|
||||
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
for (final NestedIntegerArray.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table
|
||||
for (final NestedIntegerArray.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table
|
||||
final int[] newCovs = new int[4];
|
||||
newCovs[0] = leaf.keys[0];
|
||||
newCovs[1] = requestedCovariates.length; // replace the covariate name with an arbitrary (unused) index for QualityScore
|
||||
newCovs[1] = requestedCovariates.length; // replace the covariate name with an arbitrary (unused) index for QualityScore
|
||||
newCovs[2] = leaf.keys[1];
|
||||
newCovs[3] = leaf.keys[2];
|
||||
addToDeltaTable(deltaTable, newCovs, (RecalDatum)leaf.value); // add this covariate to the delta table
|
||||
addToDeltaTable(deltaTable, newCovs, (RecalDatum)leaf.value); // add this covariate to the delta table
|
||||
}
|
||||
|
||||
// add the optional covariates to the delta table
|
||||
|
|
@ -425,10 +425,10 @@ public class RecalUtils {
|
|||
for (final NestedIntegerArray.Leaf leaf : covTable.getAllLeaves()) {
|
||||
final int[] covs = new int[4];
|
||||
covs[0] = leaf.keys[0];
|
||||
covs[1] = i; // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
||||
covs[1] = i; // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
||||
covs[2] = leaf.keys[2];
|
||||
covs[3] = leaf.keys[3];
|
||||
addToDeltaTable(deltaTable, covs, (RecalDatum) leaf.value); // add this covariate to the delta table
|
||||
addToDeltaTable(deltaTable, covs, (RecalDatum) leaf.value); // add this covariate to the delta table
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -486,11 +486,11 @@ public class RecalUtils {
|
|||
*/
|
||||
private static void addToDeltaTable(final NestedHashMap deltaTable, final int[] deltaKey, final RecalDatum recalDatum) {
|
||||
Object[] wrappedKey = wrapKeys(deltaKey);
|
||||
final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(wrappedKey); // check if we already have a RecalDatum for this key
|
||||
final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(wrappedKey); // check if we already have a RecalDatum for this key
|
||||
if (deltaDatum == null)
|
||||
deltaTable.put(new RecalDatum(recalDatum), wrappedKey); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||
deltaTable.put(new RecalDatum(recalDatum), wrappedKey); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||
else
|
||||
deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one.
|
||||
deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one.
|
||||
}
|
||||
|
||||
private static Object[] wrapKeys(final int[] keys) {
|
||||
|
|
@ -539,10 +539,11 @@ public class RecalUtils {
|
|||
* @return true if this read is consistent or false if this read should be skipped
|
||||
*/
|
||||
public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
|
||||
if (!ReadUtils.isSOLiDRead(read)) // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
||||
if (!ReadUtils.isSOLiDRead(read)) // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
||||
return true;
|
||||
|
||||
if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
|
||||
// Haven't calculated the inconsistency array yet for this read
|
||||
if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) {
|
||||
final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||
if (attr != null) {
|
||||
byte[] colorSpace;
|
||||
|
|
@ -562,13 +563,13 @@ public class RecalUtils {
|
|||
}
|
||||
}
|
||||
|
||||
byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
|
||||
byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
|
||||
if (read.getReadNegativeStrandFlag())
|
||||
readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
|
||||
|
||||
final byte[] inconsistency = new byte[readBases.length];
|
||||
int i;
|
||||
byte prevBase = colorSpace[0]; // The sentinel
|
||||
byte prevBase = colorSpace[0]; // The sentinel
|
||||
for (i = 0; i < readBases.length; i++) {
|
||||
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]);
|
||||
inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1);
|
||||
|
|
@ -576,11 +577,11 @@ public class RecalUtils {
|
|||
}
|
||||
read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
|
||||
}
|
||||
else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it
|
||||
else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it
|
||||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||
|
||||
else
|
||||
return false; // otherwise, just skip the read
|
||||
return false; // otherwise, just skip the read
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -774,6 +775,4 @@ public class RecalUtils {
|
|||
return base;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,13 +19,13 @@ import java.util.*;
|
|||
* @since 3/26/12
|
||||
*/
|
||||
public class RecalibrationReport {
|
||||
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
|
||||
private final RecalibrationTables recalibrationTables; // quick access reference to the tables
|
||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
|
||||
private final RecalibrationTables recalibrationTables; // quick access reference to the tables
|
||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||
private final HashMap<String, Integer> optionalCovariateIndexes;
|
||||
|
||||
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
|
||||
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
|
||||
|
||||
private final int[] tempRGarray = new int[2];
|
||||
private final int[] tempQUALarray = new int[3];
|
||||
|
|
@ -40,7 +40,7 @@ public class RecalibrationReport {
|
|||
GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE);
|
||||
quantizationInfo = initializeQuantizationTable(quantizedTable);
|
||||
|
||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
||||
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
||||
requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
|
||||
|
|
@ -50,13 +50,13 @@ public class RecalibrationReport {
|
|||
requestedCovariates[covariateIndex++] = covariate;
|
||||
for (final Covariate covariate : optionalCovariates) {
|
||||
requestedCovariates[covariateIndex] = covariate;
|
||||
final String covariateName = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||
final String covariateName = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||
optionalCovariateIndexes.put(covariateName, covariateIndex-2);
|
||||
covariateIndex++;
|
||||
}
|
||||
|
||||
for (Covariate cov : requestedCovariates)
|
||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||
|
||||
recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE)));
|
||||
|
||||
|
|
@ -198,9 +198,10 @@ public class RecalibrationReport {
|
|||
final long nErrors = (Long) reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
|
||||
final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME);
|
||||
|
||||
final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table
|
||||
(Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
||||
Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
||||
// the estimatedQreported column only exists in the ReadGroup table
|
||||
final double estimatedQReported = hasEstimatedQReportedColumn ?
|
||||
(Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
||||
Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
||||
|
||||
final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1);
|
||||
datum.setEstimatedQReported(estimatedQReported);
|
||||
|
|
@ -242,7 +243,7 @@ public class RecalibrationReport {
|
|||
final String argument = table.get(i, "Argument").toString();
|
||||
Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
if (value.equals("null"))
|
||||
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
|
||||
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
|
||||
|
||||
if (argument.equals("covariate") && value != null)
|
||||
RAC.COVARIATES = value.toString().split(",");
|
||||
|
|
|
|||
|
|
@ -87,7 +87,8 @@ public class ContextCovariate implements StandardCovariate {
|
|||
|
||||
// store the original bases and then write Ns over low quality ones
|
||||
final byte[] originalBases = read.getReadBases().clone();
|
||||
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
|
||||
// Write N's over the low quality tail of the reads to avoid adding them into the context
|
||||
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS);
|
||||
|
||||
final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag();
|
||||
byte[] bases = clippedRead.getReadBases();
|
||||
|
|
@ -115,7 +116,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
|
||||
@Override
|
||||
public String formatKey(final int key) {
|
||||
if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file
|
||||
if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file
|
||||
return null;
|
||||
|
||||
return contextFromKey(key);
|
||||
|
|
@ -176,9 +177,9 @@ public class ContextCovariate implements StandardCovariate {
|
|||
|
||||
for (int currentIndex = contextSize; currentIndex < readLength; currentIndex++) {
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(bases[currentIndex]);
|
||||
if (baseIndex == -1) { // ignore non-ACGT bases
|
||||
if (baseIndex == -1) { // ignore non-ACGT bases
|
||||
currentNPenalty = contextSize;
|
||||
currentKey = 0; // reset the key
|
||||
currentKey = 0; // reset the key
|
||||
} else {
|
||||
// push this base's contribution onto the key: shift everything 2 bits, mask out the non-context bits, and add the new base and the length in
|
||||
currentKey = (currentKey >> 2) & mask;
|
||||
|
|
@ -215,7 +216,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
int bitOffset = LENGTH_BITS;
|
||||
for (int i = start; i < end; i++) {
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]);
|
||||
if (baseIndex == -1) // ignore non-ACGT bases
|
||||
if (baseIndex == -1) // ignore non-ACGT bases
|
||||
return -1;
|
||||
key |= (baseIndex << bitOffset);
|
||||
bitOffset += 2;
|
||||
|
|
@ -233,15 +234,15 @@ public class ContextCovariate implements StandardCovariate {
|
|||
if (key < 0)
|
||||
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
||||
|
||||
final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context
|
||||
int mask = 48; // use the mask to pull out bases
|
||||
final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context
|
||||
int mask = 48; // use the mask to pull out bases
|
||||
int offset = LENGTH_BITS;
|
||||
|
||||
StringBuilder dna = new StringBuilder();
|
||||
for (int i = 0; i < length; i++) {
|
||||
final int baseIndex = (key & mask) >> offset;
|
||||
dna.append((char)BaseUtils.baseIndexToSimpleBase(baseIndex));
|
||||
mask = mask << 2; // move the mask over to the next 2 bits
|
||||
mask = mask << 2; // move the mask over to the next 2 bits
|
||||
offset += 2;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
// the current sequential model would consider the effects independently instead of jointly.
|
||||
final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();
|
||||
|
||||
int cycle = multiplyByNegative1 ? -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms.
|
||||
int cycle = multiplyByNegative1 ? -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms.
|
||||
|
||||
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
|
||||
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
|
||||
|
|
@ -201,9 +201,9 @@ public class CycleCovariate implements StandardCovariate {
|
|||
|
||||
@Override
|
||||
public String formatKey(final int key) {
|
||||
int cycle = key >> 1; // shift so we can remove the "sign" bit
|
||||
if ( (key & 1) != 0 ) // is the last bit set?
|
||||
cycle *= -1; // then the cycle is negative
|
||||
int cycle = key >> 1; // shift so we can remove the "sign" bit
|
||||
if ( (key & 1) != 0 ) // is the last bit set?
|
||||
cycle *= -1; // then the cycle is negative
|
||||
return String.format("%d", cycle);
|
||||
}
|
||||
|
||||
|
|
@ -222,7 +222,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
int result = Math.abs(cycle);
|
||||
result = result << 1; // shift so we can add the "sign" bit
|
||||
if ( cycle < 0 )
|
||||
result++; // negative cycles get the lower-most bit set
|
||||
result++; // negative cycles get the lower-most bit set
|
||||
return result;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue