Changing the commenting style in the BQSR

This commit is contained in:
Ryan Poplin 2012-08-29 11:27:45 -04:00
parent 19cc0b373e
commit e12ae65d33
10 changed files with 105 additions and 102 deletions

View File

@ -80,7 +80,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError); final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex); rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else else
rgPreviousDatum.combine(rgThisDatum); rgPreviousDatum.combine(rgThisDatum);
@ -126,7 +126,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError); final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex); rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else else
rgPreviousDatum.combine(rgThisDatum); rgPreviousDatum.combine(rgThisDatum);

View File

@ -32,13 +32,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -299,6 +297,4 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
return table; return table;
} }
} }

View File

@ -106,26 +106,26 @@ import java.util.ArrayList;
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@By(DataSource.READS) @By(DataSource.READS)
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file @ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality @Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta @PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> { public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> {
@ArgumentCollection @ArgumentCollection
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
private RecalibrationTables recalibrationTables; private RecalibrationTables recalibrationTables;
private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental) private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental)
private RecalibrationEngine recalibrationEngine; private RecalibrationEngine recalibrationEngine;
private int minimumQToUse; private int minimumQToUse;
protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation."; private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.";
@ -143,16 +143,16 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
if (RAC.FORCE_PLATFORM != null) if (RAC.FORCE_PLATFORM != null)
RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
if (RAC.knownSites.isEmpty() && !RAC.RUN_WITHOUT_DBSNP) // Warn the user if no dbSNP file or other variant mask was specified if (RAC.knownSites.isEmpty() && !RAC.RUN_WITHOUT_DBSNP) // Warn the user if no dbSNP file or other variant mask was specified
throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION); throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);
if (RAC.LIST_ONLY) { if (RAC.LIST_ONLY) {
RecalUtils.listAvailableCovariates(logger); RecalUtils.listAvailableCovariates(logger);
System.exit(0); System.exit(0);
} }
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
ArrayList<Covariate> requiredCovariates = covariates.getFirst(); ArrayList<Covariate> requiredCovariates = covariates.getFirst();
ArrayList<Covariate> optionalCovariates = covariates.getSecond(); ArrayList<Covariate> optionalCovariates = covariates.getSecond();
@ -164,9 +164,9 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
requestedCovariates[covariateIndex++] = covariate; requestedCovariates[covariateIndex++] = covariate;
logger.info("The covariates being used here: "); logger.info("The covariates being used here: ");
for (Covariate cov : requestedCovariates) { // list all the covariates being used for (Covariate cov : requestedCovariates) { // list all the covariates being used
logger.info("\t" + cov.getClass().getSimpleName()); logger.info("\t" + cov.getClass().getSimpleName());
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
} }
int numReadGroups = 0; int numReadGroups = 0;
@ -216,12 +216,14 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
*/ */
public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
long countedSites = 0L; long countedSites = 0L;
if (tracker.getValues(RAC.knownSites).size() == 0) { // Only analyze sites not present in the provided known sites // Only analyze sites not present in the provided known sites
if (tracker.getValues(RAC.knownSites).size() == 0) {
for (final PileupElement p : context.getBasePileup()) { for (final PileupElement p : context.getBasePileup()) {
final GATKSAMRecord read = p.getRead(); final GATKSAMRecord read = p.getRead();
final int offset = p.getOffset(); final int offset = p.getOffset();
if (readHasBeenSkipped(read) || isLowQualityBase(read, offset)) // This read has been marked to be skipped or base is low quality (we don't recalibrate low quality bases) // This read has been marked to be skipped or base is low quality (we don't recalibrate low quality bases)
if (readHasBeenSkipped(read) || isLowQualityBase(read, offset))
continue; continue;
if (readNotSeen(read)) { if (readNotSeen(read)) {
@ -234,10 +236,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates)); read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
} }
if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
if (!ReadUtils.isSOLiDRead(read) ||
RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING || RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
RecalUtils.isColorSpaceConsistent(read, offset)) RecalUtils.isColorSpaceConsistent(read, offset))
recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap // This base finally passed all the checks for a good base, so add it to the big data hashmap
recalibrationEngine.updateDataForPileupElement(p, ref.getBase());
} }
countedSites++; countedSites++;
} }

View File

@ -68,7 +68,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError); final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex); rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else else
rgPreviousDatum.combine(rgThisDatum); rgPreviousDatum.combine(rgThisDatum);

View File

@ -48,9 +48,9 @@ public class BaseRecalibration {
private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000; private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000;
private final ReadCovariates readCovariates; private final ReadCovariates readCovariates;
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done) private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
private final RecalibrationTables recalibrationTables; private final RecalibrationTables recalibrationTables;
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
private final boolean disableIndelQuals; private final boolean disableIndelQuals;
private final int preserveQLessThan; private final int preserveQLessThan;
@ -76,9 +76,9 @@ public class BaseRecalibration {
recalibrationTables = recalibrationReport.getRecalibrationTables(); recalibrationTables = recalibrationReport.getRecalibrationTables();
requestedCovariates = recalibrationReport.getRequestedCovariates(); requestedCovariates = recalibrationReport.getRequestedCovariates();
quantizationInfo = recalibrationReport.getQuantizationInfo(); quantizationInfo = recalibrationReport.getQuantizationInfo();
if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores
quantizationInfo.noQuantization(); quantizationInfo.noQuantization();
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report. else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report.
quantizationInfo.quantizeQualityScores(quantizationLevels); quantizationInfo.quantizeQualityScores(quantizationLevels);
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
@ -103,24 +103,26 @@ public class BaseRecalibration {
} }
} }
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read // Compute all covariates for the read
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
read.setBaseQualities(null, errorModel); read.setBaseQualities(null, errorModel);
continue; continue;
} }
final byte[] quals = read.getBaseQualities(errorModel); final byte[] quals = read.getBaseQualities(errorModel);
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model
final int readLength = read.getReadLength(); final int readLength = read.getReadLength();
for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read
final byte originalQualityScore = quals[offset]; final byte originalQualityScore = quals[offset];
if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model
final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
quals[offset] = recalibratedQualityScore; quals[offset] = recalibratedQualityScore;
} }
} }
@ -152,10 +154,10 @@ public class BaseRecalibration {
final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead); final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead);
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, key, errorModel, globalDeltaQ, deltaQReported, qualFromRead); final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, key, errorModel, globalDeltaQ, deltaQReported, qualFromRead);
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
} }
private double calculateGlobalDeltaQ(final NestedIntegerArray<RecalDatum> table, final int[] key, final EventType errorModel) { private double calculateGlobalDeltaQ(final NestedIntegerArray<RecalDatum> table, final int[] key, final EventType errorModel) {

View File

@ -30,7 +30,7 @@ public class QuantizationInfo {
} }
public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) { public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) {
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
for (int i = 0; i < qualHistogram.length; i++) for (int i = 0; i < qualHistogram.length; i++)
qualHistogram[i] = 0L; qualHistogram[i] = 0L;
@ -38,10 +38,10 @@ public class QuantizationInfo {
for (final RecalDatum value : qualTable.getAllValues()) { for (final RecalDatum value : qualTable.getAllValues()) {
final RecalDatum datum = value; final RecalDatum datum = value;
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL ) final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
qualHistogram[empiricalQual] += (long) datum.getNumObservations(); // add the number of observations for every key qualHistogram[empiricalQual] += (long) datum.getNumObservations(); // add the number of observations for every key
} }
empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities
quantizeQualityScores(quantizationLevels); quantizeQualityScores(quantizationLevels);
this.quantizationLevels = quantizationLevels; this.quantizationLevels = quantizationLevels;
@ -49,8 +49,8 @@ public class QuantizationInfo {
public void quantizeQualityScores(int nLevels) { public void quantizeQualityScores(int nLevels) {
QualQuantizer quantizer = new QualQuantizer(empiricalQualCounts, nLevels, QualityUtils.MIN_USABLE_Q_SCORE); // quantize the qualities to the desired number of levels QualQuantizer quantizer = new QualQuantizer(empiricalQualCounts, nLevels, QualityUtils.MIN_USABLE_Q_SCORE); // quantize the qualities to the desired number of levels
quantizedQuals = quantizer.getOriginalToQuantizedMap(); // map with the original to quantized qual map (using the standard number of levels in the RAC) quantizedQuals = quantizer.getOriginalToQuantizedMap(); // map with the original to quantized qual map (using the standard number of levels in the RAC)
} }
public void noQuantization() { public void noQuantization() {

View File

@ -81,8 +81,8 @@ public class RecalUtils {
public final static String NUMBER_OBSERVATIONS_COLUMN_NAME = "Observations"; public final static String NUMBER_OBSERVATIONS_COLUMN_NAME = "Observations";
public final static String NUMBER_ERRORS_COLUMN_NAME = "Errors"; public final static String NUMBER_ERRORS_COLUMN_NAME = "Errors";
private final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams private final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
private final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color private final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
private static boolean warnUserNullPlatform = false; private static boolean warnUserNullPlatform = false;
private static final String SCRIPT_FILE = "BQSR.R"; private static final String SCRIPT_FILE = "BQSR.R";
@ -111,12 +111,13 @@ public class RecalUtils {
final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins(); final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins(); final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>(); ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES) if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES)
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified // parse the -cov arguments that were provided, skipping over the ones already specified
if (argumentCollection.COVARIATES != null) {
for (String requestedCovariateString : argumentCollection.COVARIATES) { for (String requestedCovariateString : argumentCollection.COVARIATES) {
// help the transition from BQSR v1 to BQSR v2 // help the transition from BQSR v1 to BQSR v2
if ( requestedCovariateString.equals("DinucCovariate") ) if ( requestedCovariateString.equals("DinucCovariate") )
@ -126,12 +127,12 @@ public class RecalUtils {
boolean foundClass = false; boolean foundClass = false;
for (Class<? extends Covariate> covClass : covariateClasses) { for (Class<? extends Covariate> covClass : covariateClasses) {
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
foundClass = true; foundClass = true;
if (!requiredClasses.contains(covClass) && if (!requiredClasses.contains(covClass) &&
(argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) { (argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
try { try {
final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it
optionalCovariates.add(covariate); optionalCovariates.add(covariate);
} catch (Exception e) { } catch (Exception e) {
throw new DynamicClassResolutionException(covClass, e); throw new DynamicClassResolutionException(covClass, e);
@ -161,7 +162,7 @@ public class RecalUtils {
if (classes.size() != 2) if (classes.size() != 2)
throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected"); throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected");
dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next. dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next.
dest.add(new QualityScoreCovariate()); dest.add(new QualityScoreCovariate());
return dest; return dest;
} }
@ -266,20 +267,20 @@ public class RecalUtils {
for (int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++) { for (int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++) {
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) { if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) { if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
columnNames.add(covariateValue); columnNames.add(covariateValue);
columnNames.add(covariateName); columnNames.add(covariateName);
} }
} }
columnNames.add(eventType); // the order of these column names is important here columnNames.add(eventType); // the order of these column names is important here
columnNames.add(empiricalQuality); columnNames.add(empiricalQuality);
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index) if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
columnNames.add(nObservations); columnNames.add(nObservations);
columnNames.add(nErrors); columnNames.add(nErrors);
@ -288,7 +289,7 @@ public class RecalUtils {
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size()); reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size());
for (final Pair<String, String> columnName : columnNames) for (final Pair<String, String> columnName : columnNames)
reportTable.addColumn(columnName.getFirst(), columnName.getSecond()); reportTable.addColumn(columnName.getFirst(), columnName.getSecond());
rowIndex = 0; // reset the row index since we're starting with a new table rowIndex = 0; // reset the row index since we're starting with a new table
} else { } else {
reportTable = result.get(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index); reportTable = result.get(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index);
} }
@ -316,7 +317,7 @@ public class RecalUtils {
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality()); reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index) if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), Math.round(datum.getNumObservations())); reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), Math.round(datum.getNumObservations()));
reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), Math.round(datum.getNumMismatches())); reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), Math.round(datum.getNumMismatches()));
@ -349,7 +350,6 @@ public class RecalUtils {
return Utils.join(",", names); return Utils.join(",", names);
} }
public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) { public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile); outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
} }
@ -410,13 +410,13 @@ public class RecalUtils {
// add the quality score table to the delta table // add the quality score table to the delta table
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
for (final NestedIntegerArray.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table for (final NestedIntegerArray.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table
final int[] newCovs = new int[4]; final int[] newCovs = new int[4];
newCovs[0] = leaf.keys[0]; newCovs[0] = leaf.keys[0];
newCovs[1] = requestedCovariates.length; // replace the covariate name with an arbitrary (unused) index for QualityScore newCovs[1] = requestedCovariates.length; // replace the covariate name with an arbitrary (unused) index for QualityScore
newCovs[2] = leaf.keys[1]; newCovs[2] = leaf.keys[1];
newCovs[3] = leaf.keys[2]; newCovs[3] = leaf.keys[2];
addToDeltaTable(deltaTable, newCovs, (RecalDatum)leaf.value); // add this covariate to the delta table addToDeltaTable(deltaTable, newCovs, (RecalDatum)leaf.value); // add this covariate to the delta table
} }
// add the optional covariates to the delta table // add the optional covariates to the delta table
@ -425,10 +425,10 @@ public class RecalUtils {
for (final NestedIntegerArray.Leaf leaf : covTable.getAllLeaves()) { for (final NestedIntegerArray.Leaf leaf : covTable.getAllLeaves()) {
final int[] covs = new int[4]; final int[] covs = new int[4];
covs[0] = leaf.keys[0]; covs[0] = leaf.keys[0];
covs[1] = i; // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS) covs[1] = i; // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
covs[2] = leaf.keys[2]; covs[2] = leaf.keys[2];
covs[3] = leaf.keys[3]; covs[3] = leaf.keys[3];
addToDeltaTable(deltaTable, covs, (RecalDatum) leaf.value); // add this covariate to the delta table addToDeltaTable(deltaTable, covs, (RecalDatum) leaf.value); // add this covariate to the delta table
} }
} }
@ -486,11 +486,11 @@ public class RecalUtils {
*/ */
private static void addToDeltaTable(final NestedHashMap deltaTable, final int[] deltaKey, final RecalDatum recalDatum) { private static void addToDeltaTable(final NestedHashMap deltaTable, final int[] deltaKey, final RecalDatum recalDatum) {
Object[] wrappedKey = wrapKeys(deltaKey); Object[] wrappedKey = wrapKeys(deltaKey);
final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(wrappedKey); // check if we already have a RecalDatum for this key final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(wrappedKey); // check if we already have a RecalDatum for this key
if (deltaDatum == null) if (deltaDatum == null)
deltaTable.put(new RecalDatum(recalDatum), wrappedKey); // if we don't have a key yet, create a new one with the same values as the curent datum deltaTable.put(new RecalDatum(recalDatum), wrappedKey); // if we don't have a key yet, create a new one with the same values as the curent datum
else else
deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one. deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one.
} }
private static Object[] wrapKeys(final int[] keys) { private static Object[] wrapKeys(final int[] keys) {
@ -539,10 +539,11 @@ public class RecalUtils {
* @return true if this read is consistent or false if this read should be skipped * @return true if this read is consistent or false if this read should be skipped
*/ */
public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) { public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
if (!ReadUtils.isSOLiDRead(read)) // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base if (!ReadUtils.isSOLiDRead(read)) // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
return true; return true;
if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read // Haven't calculated the inconsistency array yet for this read
if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) {
final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG); final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG);
if (attr != null) { if (attr != null) {
byte[] colorSpace; byte[] colorSpace;
@ -562,13 +563,13 @@ public class RecalUtils {
} }
} }
byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
if (read.getReadNegativeStrandFlag()) if (read.getReadNegativeStrandFlag())
readBases = BaseUtils.simpleReverseComplement(read.getReadBases()); readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
final byte[] inconsistency = new byte[readBases.length]; final byte[] inconsistency = new byte[readBases.length];
int i; int i;
byte prevBase = colorSpace[0]; // The sentinel byte prevBase = colorSpace[0]; // The sentinel
for (i = 0; i < readBases.length; i++) { for (i = 0; i < readBases.length; i++) {
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]); final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]);
inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1); inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1);
@ -576,11 +577,11 @@ public class RecalUtils {
} }
read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency); read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
} }
else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
else else
return false; // otherwise, just skip the read return false; // otherwise, just skip the read
} }
return true; return true;
@ -774,6 +775,4 @@ public class RecalUtils {
return base; return base;
} }
} }
} }

View File

@ -19,13 +19,13 @@ import java.util.*;
* @since 3/26/12 * @since 3/26/12
*/ */
public class RecalibrationReport { public class RecalibrationReport {
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done) private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
private final RecalibrationTables recalibrationTables; // quick access reference to the tables private final RecalibrationTables recalibrationTables; // quick access reference to the tables
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
private final HashMap<String, Integer> optionalCovariateIndexes; private final HashMap<String, Integer> optionalCovariateIndexes;
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
private final int[] tempRGarray = new int[2]; private final int[] tempRGarray = new int[2];
private final int[] tempQUALarray = new int[3]; private final int[] tempQUALarray = new int[3];
@ -40,7 +40,7 @@ public class RecalibrationReport {
GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE); GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE);
quantizationInfo = initializeQuantizationTable(quantizedTable); quantizationInfo = initializeQuantizationTable(quantizedTable);
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
ArrayList<Covariate> requiredCovariates = covariates.getFirst(); ArrayList<Covariate> requiredCovariates = covariates.getFirst();
ArrayList<Covariate> optionalCovariates = covariates.getSecond(); ArrayList<Covariate> optionalCovariates = covariates.getSecond();
requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()]; requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
@ -50,13 +50,13 @@ public class RecalibrationReport {
requestedCovariates[covariateIndex++] = covariate; requestedCovariates[covariateIndex++] = covariate;
for (final Covariate covariate : optionalCovariates) { for (final Covariate covariate : optionalCovariates) {
requestedCovariates[covariateIndex] = covariate; requestedCovariates[covariateIndex] = covariate;
final String covariateName = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport final String covariateName = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
optionalCovariateIndexes.put(covariateName, covariateIndex-2); optionalCovariateIndexes.put(covariateName, covariateIndex-2);
covariateIndex++; covariateIndex++;
} }
for (Covariate cov : requestedCovariates) for (Covariate cov : requestedCovariates)
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE))); recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE)));
@ -198,9 +198,10 @@ public class RecalibrationReport {
final long nErrors = (Long) reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); final long nErrors = (Long) reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME); final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME);
final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table // the estimatedQreported column only exists in the ReadGroup table
(Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table final double estimatedQReported = hasEstimatedQReportedColumn ?
Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table (Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1); final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1);
datum.setEstimatedQReported(estimatedQReported); datum.setEstimatedQReported(estimatedQReported);
@ -242,7 +243,7 @@ public class RecalibrationReport {
final String argument = table.get(i, "Argument").toString(); final String argument = table.get(i, "Argument").toString();
Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
if (value.equals("null")) if (value.equals("null"))
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
if (argument.equals("covariate") && value != null) if (argument.equals("covariate") && value != null)
RAC.COVARIATES = value.toString().split(","); RAC.COVARIATES = value.toString().split(",");

View File

@ -87,7 +87,8 @@ public class ContextCovariate implements StandardCovariate {
// store the original bases and then write Ns over low quality ones // store the original bases and then write Ns over low quality ones
final byte[] originalBases = read.getReadBases().clone(); final byte[] originalBases = read.getReadBases().clone();
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context // Write N's over the low quality tail of the reads to avoid adding them into the context
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS);
final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag(); final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag();
byte[] bases = clippedRead.getReadBases(); byte[] bases = clippedRead.getReadBases();
@ -115,7 +116,7 @@ public class ContextCovariate implements StandardCovariate {
@Override @Override
public String formatKey(final int key) { public String formatKey(final int key) {
if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file
return null; return null;
return contextFromKey(key); return contextFromKey(key);
@ -176,9 +177,9 @@ public class ContextCovariate implements StandardCovariate {
for (int currentIndex = contextSize; currentIndex < readLength; currentIndex++) { for (int currentIndex = contextSize; currentIndex < readLength; currentIndex++) {
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(bases[currentIndex]); final int baseIndex = BaseUtils.simpleBaseToBaseIndex(bases[currentIndex]);
if (baseIndex == -1) { // ignore non-ACGT bases if (baseIndex == -1) { // ignore non-ACGT bases
currentNPenalty = contextSize; currentNPenalty = contextSize;
currentKey = 0; // reset the key currentKey = 0; // reset the key
} else { } else {
// push this base's contribution onto the key: shift everything 2 bits, mask out the non-context bits, and add the new base and the length in // push this base's contribution onto the key: shift everything 2 bits, mask out the non-context bits, and add the new base and the length in
currentKey = (currentKey >> 2) & mask; currentKey = (currentKey >> 2) & mask;
@ -215,7 +216,7 @@ public class ContextCovariate implements StandardCovariate {
int bitOffset = LENGTH_BITS; int bitOffset = LENGTH_BITS;
for (int i = start; i < end; i++) { for (int i = start; i < end; i++) {
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]); final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]);
if (baseIndex == -1) // ignore non-ACGT bases if (baseIndex == -1) // ignore non-ACGT bases
return -1; return -1;
key |= (baseIndex << bitOffset); key |= (baseIndex << bitOffset);
bitOffset += 2; bitOffset += 2;
@ -233,15 +234,15 @@ public class ContextCovariate implements StandardCovariate {
if (key < 0) if (key < 0)
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?"); throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context
int mask = 48; // use the mask to pull out bases int mask = 48; // use the mask to pull out bases
int offset = LENGTH_BITS; int offset = LENGTH_BITS;
StringBuilder dna = new StringBuilder(); StringBuilder dna = new StringBuilder();
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
final int baseIndex = (key & mask) >> offset; final int baseIndex = (key & mask) >> offset;
dna.append((char)BaseUtils.baseIndexToSimpleBase(baseIndex)); dna.append((char)BaseUtils.baseIndexToSimpleBase(baseIndex));
mask = mask << 2; // move the mask over to the next 2 bits mask = mask << 2; // move the mask over to the next 2 bits
offset += 2; offset += 2;
} }

View File

@ -108,7 +108,7 @@ public class CycleCovariate implements StandardCovariate {
// the current sequential model would consider the effects independently instead of jointly. // the current sequential model would consider the effects independently instead of jointly.
final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag(); final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();
int cycle = multiplyByNegative1 ? -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms. int cycle = multiplyByNegative1 ? -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms.
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change // BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one // For example, AAAAAAA was probably read in two flow cycles but here we count it as one
@ -201,9 +201,9 @@ public class CycleCovariate implements StandardCovariate {
@Override @Override
public String formatKey(final int key) { public String formatKey(final int key) {
int cycle = key >> 1; // shift so we can remove the "sign" bit int cycle = key >> 1; // shift so we can remove the "sign" bit
if ( (key & 1) != 0 ) // is the last bit set? if ( (key & 1) != 0 ) // is the last bit set?
cycle *= -1; // then the cycle is negative cycle *= -1; // then the cycle is negative
return String.format("%d", cycle); return String.format("%d", cycle);
} }
@ -222,7 +222,7 @@ public class CycleCovariate implements StandardCovariate {
int result = Math.abs(cycle); int result = Math.abs(cycle);
result = result << 1; // shift so we can add the "sign" bit result = result << 1; // shift so we can add the "sign" bit
if ( cycle < 0 ) if ( cycle < 0 )
result++; // negative cycles get the lower-most bit set result++; // negative cycles get the lower-most bit set
return result; return result;
} }
} }