Changing the commenting style in the BQSR

This commit is contained in:
Ryan Poplin 2012-08-29 11:27:45 -04:00
parent 19cc0b373e
commit e12ae65d33
10 changed files with 105 additions and 102 deletions

View File

@ -80,7 +80,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else
rgPreviousDatum.combine(rgThisDatum);
@ -126,7 +126,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else
rgPreviousDatum.combine(rgThisDatum);

View File

@ -32,13 +32,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -299,6 +297,4 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
return table;
}
}

View File

@ -106,26 +106,26 @@ import java.util.ArrayList;
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@By(DataSource.READS)
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> {
@ArgumentCollection
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
private RecalibrationTables recalibrationTables;
private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental)
private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental)
private RecalibrationEngine recalibrationEngine;
private int minimumQToUse;
protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.";
@ -143,16 +143,16 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
if (RAC.FORCE_PLATFORM != null)
RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
if (RAC.knownSites.isEmpty() && !RAC.RUN_WITHOUT_DBSNP) // Warn the user if no dbSNP file or other variant mask was specified
if (RAC.knownSites.isEmpty() && !RAC.RUN_WITHOUT_DBSNP) // Warn the user if no dbSNP file or other variant mask was specified
throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);
if (RAC.LIST_ONLY) {
RecalUtils.listAvailableCovariates(logger);
System.exit(0);
}
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
@ -164,9 +164,9 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
requestedCovariates[covariateIndex++] = covariate;
logger.info("The covariates being used here: ");
for (Covariate cov : requestedCovariates) { // list all the covariates being used
for (Covariate cov : requestedCovariates) { // list all the covariates being used
logger.info("\t" + cov.getClass().getSimpleName());
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
}
int numReadGroups = 0;
@ -216,12 +216,14 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
*/
public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
long countedSites = 0L;
if (tracker.getValues(RAC.knownSites).size() == 0) { // Only analyze sites not present in the provided known sites
// Only analyze sites not present in the provided known sites
if (tracker.getValues(RAC.knownSites).size() == 0) {
for (final PileupElement p : context.getBasePileup()) {
final GATKSAMRecord read = p.getRead();
final int offset = p.getOffset();
if (readHasBeenSkipped(read) || isLowQualityBase(read, offset)) // This read has been marked to be skipped or base is low quality (we don't recalibrate low quality bases)
// This read has been marked to be skipped or base is low quality (we don't recalibrate low quality bases)
if (readHasBeenSkipped(read) || isLowQualityBase(read, offset))
continue;
if (readNotSeen(read)) {
@ -234,10 +236,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
}
if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
// SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
if (!ReadUtils.isSOLiDRead(read) ||
RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
RecalUtils.isColorSpaceConsistent(read, offset))
recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap
// This base finally passed all the checks for a good base, so add it to the big data hashmap
recalibrationEngine.updateDataForPileupElement(p, ref.getBase());
}
countedSites++;
}

View File

@ -68,7 +68,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else
rgPreviousDatum.combine(rgThisDatum);

View File

@ -48,9 +48,9 @@ public class BaseRecalibration {
private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000;
private final ReadCovariates readCovariates;
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
private final RecalibrationTables recalibrationTables;
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
private final boolean disableIndelQuals;
private final int preserveQLessThan;
@ -76,9 +76,9 @@ public class BaseRecalibration {
recalibrationTables = recalibrationReport.getRecalibrationTables();
requestedCovariates = recalibrationReport.getRequestedCovariates();
quantizationInfo = recalibrationReport.getQuantizationInfo();
if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores
if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores
quantizationInfo.noQuantization();
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report.
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report.
quantizationInfo.quantizeQualityScores(quantizationLevels);
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
@ -103,24 +103,26 @@ public class BaseRecalibration {
}
}
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
// Compute all covariates for the read
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
read.setBaseQualities(null, errorModel);
continue;
}
final byte[] quals = read.getBaseQualities(errorModel);
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model
final int readLength = read.getReadLength();
for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read
for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read
final byte originalQualityScore = quals[offset];
if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model
final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model
final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
quals[offset] = recalibratedQualityScore;
}
}
@ -152,10 +154,10 @@ public class BaseRecalibration {
final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead);
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, key, errorModel, globalDeltaQ, deltaQReported, qualFromRead);
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
}
private double calculateGlobalDeltaQ(final NestedIntegerArray<RecalDatum> table, final int[] key, final EventType errorModel) {

View File

@ -30,7 +30,7 @@ public class QuantizationInfo {
}
public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) {
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
for (int i = 0; i < qualHistogram.length; i++)
qualHistogram[i] = 0L;
@ -38,10 +38,10 @@ public class QuantizationInfo {
for (final RecalDatum value : qualTable.getAllValues()) {
final RecalDatum datum = value;
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
qualHistogram[empiricalQual] += (long) datum.getNumObservations(); // add the number of observations for every key
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
qualHistogram[empiricalQual] += (long) datum.getNumObservations(); // add the number of observations for every key
}
empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities
empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities
quantizeQualityScores(quantizationLevels);
this.quantizationLevels = quantizationLevels;
@ -49,8 +49,8 @@ public class QuantizationInfo {
public void quantizeQualityScores(int nLevels) {
QualQuantizer quantizer = new QualQuantizer(empiricalQualCounts, nLevels, QualityUtils.MIN_USABLE_Q_SCORE); // quantize the qualities to the desired number of levels
quantizedQuals = quantizer.getOriginalToQuantizedMap(); // map with the original to quantized qual map (using the standard number of levels in the RAC)
QualQuantizer quantizer = new QualQuantizer(empiricalQualCounts, nLevels, QualityUtils.MIN_USABLE_Q_SCORE); // quantize the qualities to the desired number of levels
quantizedQuals = quantizer.getOriginalToQuantizedMap(); // map with the original to quantized qual map (using the standard number of levels in the RAC)
}
public void noQuantization() {

View File

@ -81,8 +81,8 @@ public class RecalUtils {
public final static String NUMBER_OBSERVATIONS_COLUMN_NAME = "Observations";
public final static String NUMBER_ERRORS_COLUMN_NAME = "Errors";
private final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
private final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
private final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
private final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
private static boolean warnUserNullPlatform = false;
private static final String SCRIPT_FILE = "BQSR.R";
@ -111,12 +111,13 @@ public class RecalUtils {
final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES)
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified
// parse the -cov arguments that were provided, skipping over the ones already specified
if (argumentCollection.COVARIATES != null) {
for (String requestedCovariateString : argumentCollection.COVARIATES) {
// help the transition from BQSR v1 to BQSR v2
if ( requestedCovariateString.equals("DinucCovariate") )
@ -126,12 +127,12 @@ public class RecalUtils {
boolean foundClass = false;
for (Class<? extends Covariate> covClass : covariateClasses) {
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
foundClass = true;
if (!requiredClasses.contains(covClass) &&
(argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
try {
final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it
final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it
optionalCovariates.add(covariate);
} catch (Exception e) {
throw new DynamicClassResolutionException(covClass, e);
@ -161,7 +162,7 @@ public class RecalUtils {
if (classes.size() != 2)
throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected");
dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next.
dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next.
dest.add(new QualityScoreCovariate());
return dest;
}
@ -266,20 +267,20 @@ public class RecalUtils {
for (int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++) {
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
columnNames.add(covariateValue);
columnNames.add(covariateName);
}
}
columnNames.add(eventType); // the order of these column names is important here
columnNames.add(eventType); // the order of these column names is important here
columnNames.add(empiricalQuality);
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
columnNames.add(nObservations);
columnNames.add(nErrors);
@ -288,7 +289,7 @@ public class RecalUtils {
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size());
for (final Pair<String, String> columnName : columnNames)
reportTable.addColumn(columnName.getFirst(), columnName.getSecond());
rowIndex = 0; // reset the row index since we're starting with a new table
rowIndex = 0; // reset the row index since we're starting with a new table
} else {
reportTable = result.get(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index);
}
@ -316,7 +317,7 @@ public class RecalUtils {
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), Math.round(datum.getNumObservations()));
reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), Math.round(datum.getNumMismatches()));
@ -349,7 +350,6 @@ public class RecalUtils {
return Utils.join(",", names);
}
public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
}
@ -410,13 +410,13 @@ public class RecalUtils {
// add the quality score table to the delta table
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
for (final NestedIntegerArray.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table
for (final NestedIntegerArray.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table
final int[] newCovs = new int[4];
newCovs[0] = leaf.keys[0];
newCovs[1] = requestedCovariates.length; // replace the covariate name with an arbitrary (unused) index for QualityScore
newCovs[1] = requestedCovariates.length; // replace the covariate name with an arbitrary (unused) index for QualityScore
newCovs[2] = leaf.keys[1];
newCovs[3] = leaf.keys[2];
addToDeltaTable(deltaTable, newCovs, (RecalDatum)leaf.value); // add this covariate to the delta table
addToDeltaTable(deltaTable, newCovs, (RecalDatum)leaf.value); // add this covariate to the delta table
}
// add the optional covariates to the delta table
@ -425,10 +425,10 @@ public class RecalUtils {
for (final NestedIntegerArray.Leaf leaf : covTable.getAllLeaves()) {
final int[] covs = new int[4];
covs[0] = leaf.keys[0];
covs[1] = i; // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
covs[1] = i; // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
covs[2] = leaf.keys[2];
covs[3] = leaf.keys[3];
addToDeltaTable(deltaTable, covs, (RecalDatum) leaf.value); // add this covariate to the delta table
addToDeltaTable(deltaTable, covs, (RecalDatum) leaf.value); // add this covariate to the delta table
}
}
@ -486,11 +486,11 @@ public class RecalUtils {
*/
private static void addToDeltaTable(final NestedHashMap deltaTable, final int[] deltaKey, final RecalDatum recalDatum) {
Object[] wrappedKey = wrapKeys(deltaKey);
final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(wrappedKey); // check if we already have a RecalDatum for this key
final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(wrappedKey); // check if we already have a RecalDatum for this key
if (deltaDatum == null)
deltaTable.put(new RecalDatum(recalDatum), wrappedKey); // if we don't have a key yet, create a new one with the same values as the curent datum
deltaTable.put(new RecalDatum(recalDatum), wrappedKey); // if we don't have a key yet, create a new one with the same values as the curent datum
else
deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one.
deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one.
}
private static Object[] wrapKeys(final int[] keys) {
@ -539,10 +539,11 @@ public class RecalUtils {
* @return true if this read is consistent or false if this read should be skipped
*/
public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
if (!ReadUtils.isSOLiDRead(read)) // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
if (!ReadUtils.isSOLiDRead(read)) // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
return true;
if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
// Haven't calculated the inconsistency array yet for this read
if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) {
final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG);
if (attr != null) {
byte[] colorSpace;
@ -562,13 +563,13 @@ public class RecalUtils {
}
}
byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
if (read.getReadNegativeStrandFlag())
readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
final byte[] inconsistency = new byte[readBases.length];
int i;
byte prevBase = colorSpace[0]; // The sentinel
byte prevBase = colorSpace[0]; // The sentinel
for (i = 0; i < readBases.length; i++) {
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]);
inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1);
@ -576,11 +577,11 @@ public class RecalUtils {
}
read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
}
else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it
else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
else
return false; // otherwise, just skip the read
return false; // otherwise, just skip the read
}
return true;
@ -774,6 +775,4 @@ public class RecalUtils {
return base;
}
}
}

View File

@ -19,13 +19,13 @@ import java.util.*;
* @since 3/26/12
*/
public class RecalibrationReport {
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
private final RecalibrationTables recalibrationTables; // quick access reference to the tables
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
private final RecalibrationTables recalibrationTables; // quick access reference to the tables
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
private final HashMap<String, Integer> optionalCovariateIndexes;
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
private final int[] tempRGarray = new int[2];
private final int[] tempQUALarray = new int[3];
@ -40,7 +40,7 @@ public class RecalibrationReport {
GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE);
quantizationInfo = initializeQuantizationTable(quantizedTable);
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
@ -50,13 +50,13 @@ public class RecalibrationReport {
requestedCovariates[covariateIndex++] = covariate;
for (final Covariate covariate : optionalCovariates) {
requestedCovariates[covariateIndex] = covariate;
final String covariateName = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
final String covariateName = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
optionalCovariateIndexes.put(covariateName, covariateIndex-2);
covariateIndex++;
}
for (Covariate cov : requestedCovariates)
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE)));
@ -198,9 +198,10 @@ public class RecalibrationReport {
final long nErrors = (Long) reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME);
final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table
(Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
// the estimatedQreported column only exists in the ReadGroup table
final double estimatedQReported = hasEstimatedQReportedColumn ?
(Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1);
datum.setEstimatedQReported(estimatedQReported);
@ -242,7 +243,7 @@ public class RecalibrationReport {
final String argument = table.get(i, "Argument").toString();
Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
if (value.equals("null"))
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
if (argument.equals("covariate") && value != null)
RAC.COVARIATES = value.toString().split(",");

View File

@ -87,7 +87,8 @@ public class ContextCovariate implements StandardCovariate {
// store the original bases and then write Ns over low quality ones
final byte[] originalBases = read.getReadBases().clone();
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
// Write N's over the low quality tail of the reads to avoid adding them into the context
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS);
final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag();
byte[] bases = clippedRead.getReadBases();
@ -115,7 +116,7 @@ public class ContextCovariate implements StandardCovariate {
@Override
public String formatKey(final int key) {
if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file
if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file
return null;
return contextFromKey(key);
@ -176,9 +177,9 @@ public class ContextCovariate implements StandardCovariate {
for (int currentIndex = contextSize; currentIndex < readLength; currentIndex++) {
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(bases[currentIndex]);
if (baseIndex == -1) { // ignore non-ACGT bases
if (baseIndex == -1) { // ignore non-ACGT bases
currentNPenalty = contextSize;
currentKey = 0; // reset the key
currentKey = 0; // reset the key
} else {
// push this base's contribution onto the key: shift everything 2 bits, mask out the non-context bits, and add the new base and the length in
currentKey = (currentKey >> 2) & mask;
@ -215,7 +216,7 @@ public class ContextCovariate implements StandardCovariate {
int bitOffset = LENGTH_BITS;
for (int i = start; i < end; i++) {
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]);
if (baseIndex == -1) // ignore non-ACGT bases
if (baseIndex == -1) // ignore non-ACGT bases
return -1;
key |= (baseIndex << bitOffset);
bitOffset += 2;
@ -233,15 +234,15 @@ public class ContextCovariate implements StandardCovariate {
if (key < 0)
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context
int mask = 48; // use the mask to pull out bases
final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context
int mask = 48; // use the mask to pull out bases
int offset = LENGTH_BITS;
StringBuilder dna = new StringBuilder();
for (int i = 0; i < length; i++) {
final int baseIndex = (key & mask) >> offset;
dna.append((char)BaseUtils.baseIndexToSimpleBase(baseIndex));
mask = mask << 2; // move the mask over to the next 2 bits
mask = mask << 2; // move the mask over to the next 2 bits
offset += 2;
}

View File

@ -108,7 +108,7 @@ public class CycleCovariate implements StandardCovariate {
// the current sequential model would consider the effects independently instead of jointly.
final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();
int cycle = multiplyByNegative1 ? -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms.
int cycle = multiplyByNegative1 ? -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms.
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
@ -201,9 +201,9 @@ public class CycleCovariate implements StandardCovariate {
@Override
public String formatKey(final int key) {
int cycle = key >> 1; // shift so we can remove the "sign" bit
if ( (key & 1) != 0 ) // is the last bit set?
cycle *= -1; // then the cycle is negative
int cycle = key >> 1; // shift so we can remove the "sign" bit
if ( (key & 1) != 0 ) // is the last bit set?
cycle *= -1; // then the cycle is negative
return String.format("%d", cycle);
}
@ -222,7 +222,7 @@ public class CycleCovariate implements StandardCovariate {
int result = Math.abs(cycle);
result = result << 1; // shift so we can add the "sign" bit
if ( cycle < 0 )
result++; // negative cycles get the lower-most bit set
result++; // negative cycles get the lower-most bit set
return result;
}
}