Changing the commenting style in the BQSR
This commit is contained in:
parent
19cc0b373e
commit
e12ae65d33
|
|
@ -80,7 +80,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
||||||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||||
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
||||||
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
||||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||||
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
|
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
|
||||||
else
|
else
|
||||||
rgPreviousDatum.combine(rgThisDatum);
|
rgPreviousDatum.combine(rgThisDatum);
|
||||||
|
|
@ -126,7 +126,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
||||||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||||
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
||||||
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
||||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||||
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
|
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
|
||||||
else
|
else
|
||||||
rgPreviousDatum.combine(rgThisDatum);
|
rgPreviousDatum.combine(rgThisDatum);
|
||||||
|
|
|
||||||
|
|
@ -32,13 +32,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
@ -299,6 +297,4 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
||||||
|
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -106,26 +106,26 @@ import java.util.ArrayList;
|
||||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||||
@By(DataSource.READS)
|
@By(DataSource.READS)
|
||||||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
|
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
|
||||||
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
|
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
|
||||||
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
|
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
|
||||||
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> {
|
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> {
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
|
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
|
||||||
|
|
||||||
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
|
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
|
||||||
|
|
||||||
private RecalibrationTables recalibrationTables;
|
private RecalibrationTables recalibrationTables;
|
||||||
|
|
||||||
private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental)
|
private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental)
|
||||||
|
|
||||||
private RecalibrationEngine recalibrationEngine;
|
private RecalibrationEngine recalibrationEngine;
|
||||||
|
|
||||||
private int minimumQToUse;
|
private int minimumQToUse;
|
||||||
|
|
||||||
protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
|
protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
|
||||||
protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
|
protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
|
||||||
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
|
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
|
||||||
|
|
||||||
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.";
|
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.";
|
||||||
|
|
||||||
|
|
@ -143,16 +143,16 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
||||||
if (RAC.FORCE_PLATFORM != null)
|
if (RAC.FORCE_PLATFORM != null)
|
||||||
RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
|
RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
|
||||||
|
|
||||||
if (RAC.knownSites.isEmpty() && !RAC.RUN_WITHOUT_DBSNP) // Warn the user if no dbSNP file or other variant mask was specified
|
if (RAC.knownSites.isEmpty() && !RAC.RUN_WITHOUT_DBSNP) // Warn the user if no dbSNP file or other variant mask was specified
|
||||||
throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);
|
throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);
|
||||||
|
|
||||||
if (RAC.LIST_ONLY) {
|
if (RAC.LIST_ONLY) {
|
||||||
RecalUtils.listAvailableCovariates(logger);
|
RecalUtils.listAvailableCovariates(logger);
|
||||||
System.exit(0);
|
System.exit(0);
|
||||||
}
|
}
|
||||||
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
|
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
|
||||||
|
|
||||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
|
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||||
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
||||||
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
||||||
|
|
||||||
|
|
@ -164,9 +164,9 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
||||||
requestedCovariates[covariateIndex++] = covariate;
|
requestedCovariates[covariateIndex++] = covariate;
|
||||||
|
|
||||||
logger.info("The covariates being used here: ");
|
logger.info("The covariates being used here: ");
|
||||||
for (Covariate cov : requestedCovariates) { // list all the covariates being used
|
for (Covariate cov : requestedCovariates) { // list all the covariates being used
|
||||||
logger.info("\t" + cov.getClass().getSimpleName());
|
logger.info("\t" + cov.getClass().getSimpleName());
|
||||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||||
}
|
}
|
||||||
|
|
||||||
int numReadGroups = 0;
|
int numReadGroups = 0;
|
||||||
|
|
@ -216,12 +216,14 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
||||||
*/
|
*/
|
||||||
public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
long countedSites = 0L;
|
long countedSites = 0L;
|
||||||
if (tracker.getValues(RAC.knownSites).size() == 0) { // Only analyze sites not present in the provided known sites
|
// Only analyze sites not present in the provided known sites
|
||||||
|
if (tracker.getValues(RAC.knownSites).size() == 0) {
|
||||||
for (final PileupElement p : context.getBasePileup()) {
|
for (final PileupElement p : context.getBasePileup()) {
|
||||||
final GATKSAMRecord read = p.getRead();
|
final GATKSAMRecord read = p.getRead();
|
||||||
final int offset = p.getOffset();
|
final int offset = p.getOffset();
|
||||||
|
|
||||||
if (readHasBeenSkipped(read) || isLowQualityBase(read, offset)) // This read has been marked to be skipped or base is low quality (we don't recalibrate low quality bases)
|
// This read has been marked to be skipped or base is low quality (we don't recalibrate low quality bases)
|
||||||
|
if (readHasBeenSkipped(read) || isLowQualityBase(read, offset))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (readNotSeen(read)) {
|
if (readNotSeen(read)) {
|
||||||
|
|
@ -234,10 +236,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
||||||
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
|
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
|
// SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
|
||||||
|
if (!ReadUtils.isSOLiDRead(read) ||
|
||||||
RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
|
RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
|
||||||
RecalUtils.isColorSpaceConsistent(read, offset))
|
RecalUtils.isColorSpaceConsistent(read, offset))
|
||||||
recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap
|
// This base finally passed all the checks for a good base, so add it to the big data hashmap
|
||||||
|
recalibrationEngine.updateDataForPileupElement(p, ref.getBase());
|
||||||
}
|
}
|
||||||
countedSites++;
|
countedSites++;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
||||||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||||
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
||||||
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
||||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||||
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
|
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
|
||||||
else
|
else
|
||||||
rgPreviousDatum.combine(rgThisDatum);
|
rgPreviousDatum.combine(rgThisDatum);
|
||||||
|
|
|
||||||
|
|
@ -48,9 +48,9 @@ public class BaseRecalibration {
|
||||||
private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000;
|
private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000;
|
||||||
private final ReadCovariates readCovariates;
|
private final ReadCovariates readCovariates;
|
||||||
|
|
||||||
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
||||||
private final RecalibrationTables recalibrationTables;
|
private final RecalibrationTables recalibrationTables;
|
||||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||||
|
|
||||||
private final boolean disableIndelQuals;
|
private final boolean disableIndelQuals;
|
||||||
private final int preserveQLessThan;
|
private final int preserveQLessThan;
|
||||||
|
|
@ -76,9 +76,9 @@ public class BaseRecalibration {
|
||||||
recalibrationTables = recalibrationReport.getRecalibrationTables();
|
recalibrationTables = recalibrationReport.getRecalibrationTables();
|
||||||
requestedCovariates = recalibrationReport.getRequestedCovariates();
|
requestedCovariates = recalibrationReport.getRequestedCovariates();
|
||||||
quantizationInfo = recalibrationReport.getQuantizationInfo();
|
quantizationInfo = recalibrationReport.getQuantizationInfo();
|
||||||
if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores
|
if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores
|
||||||
quantizationInfo.noQuantization();
|
quantizationInfo.noQuantization();
|
||||||
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report.
|
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report.
|
||||||
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
||||||
|
|
||||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||||
|
|
@ -103,24 +103,26 @@ public class BaseRecalibration {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
// Compute all covariates for the read
|
||||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
|
||||||
|
|
||||||
|
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||||
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
|
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
|
||||||
read.setBaseQualities(null, errorModel);
|
read.setBaseQualities(null, errorModel);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
final byte[] quals = read.getBaseQualities(errorModel);
|
final byte[] quals = read.getBaseQualities(errorModel);
|
||||||
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model
|
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model
|
||||||
|
|
||||||
final int readLength = read.getReadLength();
|
final int readLength = read.getReadLength();
|
||||||
for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read
|
for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read
|
||||||
|
|
||||||
final byte originalQualityScore = quals[offset];
|
final byte originalQualityScore = quals[offset];
|
||||||
|
|
||||||
if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||||
final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model
|
final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model
|
||||||
final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||||
quals[offset] = recalibratedQualityScore;
|
quals[offset] = recalibratedQualityScore;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -152,10 +154,10 @@ public class BaseRecalibration {
|
||||||
final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead);
|
final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead);
|
||||||
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, key, errorModel, globalDeltaQ, deltaQReported, qualFromRead);
|
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, key, errorModel, globalDeltaQ, deltaQReported, qualFromRead);
|
||||||
|
|
||||||
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
||||||
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
||||||
|
|
||||||
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
||||||
}
|
}
|
||||||
|
|
||||||
private double calculateGlobalDeltaQ(final NestedIntegerArray<RecalDatum> table, final int[] key, final EventType errorModel) {
|
private double calculateGlobalDeltaQ(final NestedIntegerArray<RecalDatum> table, final int[] key, final EventType errorModel) {
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ public class QuantizationInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) {
|
public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) {
|
||||||
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
|
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
|
||||||
for (int i = 0; i < qualHistogram.length; i++)
|
for (int i = 0; i < qualHistogram.length; i++)
|
||||||
qualHistogram[i] = 0L;
|
qualHistogram[i] = 0L;
|
||||||
|
|
||||||
|
|
@ -38,10 +38,10 @@ public class QuantizationInfo {
|
||||||
|
|
||||||
for (final RecalDatum value : qualTable.getAllValues()) {
|
for (final RecalDatum value : qualTable.getAllValues()) {
|
||||||
final RecalDatum datum = value;
|
final RecalDatum datum = value;
|
||||||
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
|
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
|
||||||
qualHistogram[empiricalQual] += (long) datum.getNumObservations(); // add the number of observations for every key
|
qualHistogram[empiricalQual] += (long) datum.getNumObservations(); // add the number of observations for every key
|
||||||
}
|
}
|
||||||
empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities
|
empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities
|
||||||
quantizeQualityScores(quantizationLevels);
|
quantizeQualityScores(quantizationLevels);
|
||||||
|
|
||||||
this.quantizationLevels = quantizationLevels;
|
this.quantizationLevels = quantizationLevels;
|
||||||
|
|
@ -49,8 +49,8 @@ public class QuantizationInfo {
|
||||||
|
|
||||||
|
|
||||||
public void quantizeQualityScores(int nLevels) {
|
public void quantizeQualityScores(int nLevels) {
|
||||||
QualQuantizer quantizer = new QualQuantizer(empiricalQualCounts, nLevels, QualityUtils.MIN_USABLE_Q_SCORE); // quantize the qualities to the desired number of levels
|
QualQuantizer quantizer = new QualQuantizer(empiricalQualCounts, nLevels, QualityUtils.MIN_USABLE_Q_SCORE); // quantize the qualities to the desired number of levels
|
||||||
quantizedQuals = quantizer.getOriginalToQuantizedMap(); // map with the original to quantized qual map (using the standard number of levels in the RAC)
|
quantizedQuals = quantizer.getOriginalToQuantizedMap(); // map with the original to quantized qual map (using the standard number of levels in the RAC)
|
||||||
}
|
}
|
||||||
|
|
||||||
public void noQuantization() {
|
public void noQuantization() {
|
||||||
|
|
|
||||||
|
|
@ -81,8 +81,8 @@ public class RecalUtils {
|
||||||
public final static String NUMBER_OBSERVATIONS_COLUMN_NAME = "Observations";
|
public final static String NUMBER_OBSERVATIONS_COLUMN_NAME = "Observations";
|
||||||
public final static String NUMBER_ERRORS_COLUMN_NAME = "Errors";
|
public final static String NUMBER_ERRORS_COLUMN_NAME = "Errors";
|
||||||
|
|
||||||
private final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
private final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
||||||
private final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
private final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
||||||
private static boolean warnUserNullPlatform = false;
|
private static boolean warnUserNullPlatform = false;
|
||||||
|
|
||||||
private static final String SCRIPT_FILE = "BQSR.R";
|
private static final String SCRIPT_FILE = "BQSR.R";
|
||||||
|
|
@ -111,12 +111,13 @@ public class RecalUtils {
|
||||||
final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
|
final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
|
||||||
final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
|
final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
|
||||||
|
|
||||||
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
|
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
|
||||||
ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||||
if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES)
|
if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES)
|
||||||
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
|
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
|
||||||
|
|
||||||
if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified
|
// parse the -cov arguments that were provided, skipping over the ones already specified
|
||||||
|
if (argumentCollection.COVARIATES != null) {
|
||||||
for (String requestedCovariateString : argumentCollection.COVARIATES) {
|
for (String requestedCovariateString : argumentCollection.COVARIATES) {
|
||||||
// help the transition from BQSR v1 to BQSR v2
|
// help the transition from BQSR v1 to BQSR v2
|
||||||
if ( requestedCovariateString.equals("DinucCovariate") )
|
if ( requestedCovariateString.equals("DinucCovariate") )
|
||||||
|
|
@ -126,12 +127,12 @@ public class RecalUtils {
|
||||||
|
|
||||||
boolean foundClass = false;
|
boolean foundClass = false;
|
||||||
for (Class<? extends Covariate> covClass : covariateClasses) {
|
for (Class<? extends Covariate> covClass : covariateClasses) {
|
||||||
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
|
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
|
||||||
foundClass = true;
|
foundClass = true;
|
||||||
if (!requiredClasses.contains(covClass) &&
|
if (!requiredClasses.contains(covClass) &&
|
||||||
(argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
|
(argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
|
||||||
try {
|
try {
|
||||||
final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it
|
final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it
|
||||||
optionalCovariates.add(covariate);
|
optionalCovariates.add(covariate);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new DynamicClassResolutionException(covClass, e);
|
throw new DynamicClassResolutionException(covClass, e);
|
||||||
|
|
@ -161,7 +162,7 @@ public class RecalUtils {
|
||||||
if (classes.size() != 2)
|
if (classes.size() != 2)
|
||||||
throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected");
|
throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected");
|
||||||
|
|
||||||
dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next.
|
dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next.
|
||||||
dest.add(new QualityScoreCovariate());
|
dest.add(new QualityScoreCovariate());
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
@ -266,20 +267,20 @@ public class RecalUtils {
|
||||||
|
|
||||||
for (int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++) {
|
for (int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++) {
|
||||||
|
|
||||||
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
|
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
|
||||||
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
|
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
|
||||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
|
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
|
||||||
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
||||||
columnNames.add(covariateValue);
|
columnNames.add(covariateValue);
|
||||||
columnNames.add(covariateName);
|
columnNames.add(covariateName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
columnNames.add(eventType); // the order of these column names is important here
|
columnNames.add(eventType); // the order of these column names is important here
|
||||||
columnNames.add(empiricalQuality);
|
columnNames.add(empiricalQuality);
|
||||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
||||||
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
|
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
|
||||||
columnNames.add(nObservations);
|
columnNames.add(nObservations);
|
||||||
columnNames.add(nErrors);
|
columnNames.add(nErrors);
|
||||||
|
|
||||||
|
|
@ -288,7 +289,7 @@ public class RecalUtils {
|
||||||
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size());
|
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size());
|
||||||
for (final Pair<String, String> columnName : columnNames)
|
for (final Pair<String, String> columnName : columnNames)
|
||||||
reportTable.addColumn(columnName.getFirst(), columnName.getSecond());
|
reportTable.addColumn(columnName.getFirst(), columnName.getSecond());
|
||||||
rowIndex = 0; // reset the row index since we're starting with a new table
|
rowIndex = 0; // reset the row index since we're starting with a new table
|
||||||
} else {
|
} else {
|
||||||
reportTable = result.get(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index);
|
reportTable = result.get(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index);
|
||||||
}
|
}
|
||||||
|
|
@ -316,7 +317,7 @@ public class RecalUtils {
|
||||||
|
|
||||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
|
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
|
||||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
||||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), Math.round(datum.getNumObservations()));
|
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), Math.round(datum.getNumObservations()));
|
||||||
reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), Math.round(datum.getNumMismatches()));
|
reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), Math.round(datum.getNumMismatches()));
|
||||||
|
|
||||||
|
|
@ -349,7 +350,6 @@ public class RecalUtils {
|
||||||
return Utils.join(",", names);
|
return Utils.join(",", names);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
|
public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
|
||||||
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
|
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
|
||||||
}
|
}
|
||||||
|
|
@ -410,13 +410,13 @@ public class RecalUtils {
|
||||||
|
|
||||||
// add the quality score table to the delta table
|
// add the quality score table to the delta table
|
||||||
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||||
for (final NestedIntegerArray.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table
|
for (final NestedIntegerArray.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table
|
||||||
final int[] newCovs = new int[4];
|
final int[] newCovs = new int[4];
|
||||||
newCovs[0] = leaf.keys[0];
|
newCovs[0] = leaf.keys[0];
|
||||||
newCovs[1] = requestedCovariates.length; // replace the covariate name with an arbitrary (unused) index for QualityScore
|
newCovs[1] = requestedCovariates.length; // replace the covariate name with an arbitrary (unused) index for QualityScore
|
||||||
newCovs[2] = leaf.keys[1];
|
newCovs[2] = leaf.keys[1];
|
||||||
newCovs[3] = leaf.keys[2];
|
newCovs[3] = leaf.keys[2];
|
||||||
addToDeltaTable(deltaTable, newCovs, (RecalDatum)leaf.value); // add this covariate to the delta table
|
addToDeltaTable(deltaTable, newCovs, (RecalDatum)leaf.value); // add this covariate to the delta table
|
||||||
}
|
}
|
||||||
|
|
||||||
// add the optional covariates to the delta table
|
// add the optional covariates to the delta table
|
||||||
|
|
@ -425,10 +425,10 @@ public class RecalUtils {
|
||||||
for (final NestedIntegerArray.Leaf leaf : covTable.getAllLeaves()) {
|
for (final NestedIntegerArray.Leaf leaf : covTable.getAllLeaves()) {
|
||||||
final int[] covs = new int[4];
|
final int[] covs = new int[4];
|
||||||
covs[0] = leaf.keys[0];
|
covs[0] = leaf.keys[0];
|
||||||
covs[1] = i; // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
covs[1] = i; // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
||||||
covs[2] = leaf.keys[2];
|
covs[2] = leaf.keys[2];
|
||||||
covs[3] = leaf.keys[3];
|
covs[3] = leaf.keys[3];
|
||||||
addToDeltaTable(deltaTable, covs, (RecalDatum) leaf.value); // add this covariate to the delta table
|
addToDeltaTable(deltaTable, covs, (RecalDatum) leaf.value); // add this covariate to the delta table
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -486,11 +486,11 @@ public class RecalUtils {
|
||||||
*/
|
*/
|
||||||
private static void addToDeltaTable(final NestedHashMap deltaTable, final int[] deltaKey, final RecalDatum recalDatum) {
|
private static void addToDeltaTable(final NestedHashMap deltaTable, final int[] deltaKey, final RecalDatum recalDatum) {
|
||||||
Object[] wrappedKey = wrapKeys(deltaKey);
|
Object[] wrappedKey = wrapKeys(deltaKey);
|
||||||
final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(wrappedKey); // check if we already have a RecalDatum for this key
|
final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(wrappedKey); // check if we already have a RecalDatum for this key
|
||||||
if (deltaDatum == null)
|
if (deltaDatum == null)
|
||||||
deltaTable.put(new RecalDatum(recalDatum), wrappedKey); // if we don't have a key yet, create a new one with the same values as the curent datum
|
deltaTable.put(new RecalDatum(recalDatum), wrappedKey); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||||
else
|
else
|
||||||
deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one.
|
deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one.
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Object[] wrapKeys(final int[] keys) {
|
private static Object[] wrapKeys(final int[] keys) {
|
||||||
|
|
@ -539,10 +539,11 @@ public class RecalUtils {
|
||||||
* @return true if this read is consistent or false if this read should be skipped
|
* @return true if this read is consistent or false if this read should be skipped
|
||||||
*/
|
*/
|
||||||
public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
|
public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
|
||||||
if (!ReadUtils.isSOLiDRead(read)) // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
if (!ReadUtils.isSOLiDRead(read)) // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
|
// Haven't calculated the inconsistency array yet for this read
|
||||||
|
if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) {
|
||||||
final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG);
|
final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||||
if (attr != null) {
|
if (attr != null) {
|
||||||
byte[] colorSpace;
|
byte[] colorSpace;
|
||||||
|
|
@ -562,13 +563,13 @@ public class RecalUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
|
byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
|
||||||
if (read.getReadNegativeStrandFlag())
|
if (read.getReadNegativeStrandFlag())
|
||||||
readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
|
readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
|
||||||
|
|
||||||
final byte[] inconsistency = new byte[readBases.length];
|
final byte[] inconsistency = new byte[readBases.length];
|
||||||
int i;
|
int i;
|
||||||
byte prevBase = colorSpace[0]; // The sentinel
|
byte prevBase = colorSpace[0]; // The sentinel
|
||||||
for (i = 0; i < readBases.length; i++) {
|
for (i = 0; i < readBases.length; i++) {
|
||||||
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]);
|
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]);
|
||||||
inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1);
|
inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1);
|
||||||
|
|
@ -576,11 +577,11 @@ public class RecalUtils {
|
||||||
}
|
}
|
||||||
read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
|
read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
|
||||||
}
|
}
|
||||||
else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it
|
else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it
|
||||||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||||
|
|
||||||
else
|
else
|
||||||
return false; // otherwise, just skip the read
|
return false; // otherwise, just skip the read
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -774,6 +775,4 @@ public class RecalUtils {
|
||||||
return base;
|
return base;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -19,13 +19,13 @@ import java.util.*;
|
||||||
* @since 3/26/12
|
* @since 3/26/12
|
||||||
*/
|
*/
|
||||||
public class RecalibrationReport {
|
public class RecalibrationReport {
|
||||||
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
|
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
|
||||||
private final RecalibrationTables recalibrationTables; // quick access reference to the tables
|
private final RecalibrationTables recalibrationTables; // quick access reference to the tables
|
||||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||||
private final HashMap<String, Integer> optionalCovariateIndexes;
|
private final HashMap<String, Integer> optionalCovariateIndexes;
|
||||||
|
|
||||||
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||||
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
|
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
|
||||||
|
|
||||||
private final int[] tempRGarray = new int[2];
|
private final int[] tempRGarray = new int[2];
|
||||||
private final int[] tempQUALarray = new int[3];
|
private final int[] tempQUALarray = new int[3];
|
||||||
|
|
@ -40,7 +40,7 @@ public class RecalibrationReport {
|
||||||
GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE);
|
GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE);
|
||||||
quantizationInfo = initializeQuantizationTable(quantizedTable);
|
quantizationInfo = initializeQuantizationTable(quantizedTable);
|
||||||
|
|
||||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
|
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||||
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
||||||
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
||||||
requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
|
requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
|
||||||
|
|
@ -50,13 +50,13 @@ public class RecalibrationReport {
|
||||||
requestedCovariates[covariateIndex++] = covariate;
|
requestedCovariates[covariateIndex++] = covariate;
|
||||||
for (final Covariate covariate : optionalCovariates) {
|
for (final Covariate covariate : optionalCovariates) {
|
||||||
requestedCovariates[covariateIndex] = covariate;
|
requestedCovariates[covariateIndex] = covariate;
|
||||||
final String covariateName = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
final String covariateName = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||||
optionalCovariateIndexes.put(covariateName, covariateIndex-2);
|
optionalCovariateIndexes.put(covariateName, covariateIndex-2);
|
||||||
covariateIndex++;
|
covariateIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Covariate cov : requestedCovariates)
|
for (Covariate cov : requestedCovariates)
|
||||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||||
|
|
||||||
recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE)));
|
recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE)));
|
||||||
|
|
||||||
|
|
@ -198,9 +198,10 @@ public class RecalibrationReport {
|
||||||
final long nErrors = (Long) reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
|
final long nErrors = (Long) reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
|
||||||
final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME);
|
final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME);
|
||||||
|
|
||||||
final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table
|
// the estimatedQreported column only exists in the ReadGroup table
|
||||||
(Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
final double estimatedQReported = hasEstimatedQReportedColumn ?
|
||||||
Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
(Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
||||||
|
Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
||||||
|
|
||||||
final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1);
|
final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1);
|
||||||
datum.setEstimatedQReported(estimatedQReported);
|
datum.setEstimatedQReported(estimatedQReported);
|
||||||
|
|
@ -242,7 +243,7 @@ public class RecalibrationReport {
|
||||||
final String argument = table.get(i, "Argument").toString();
|
final String argument = table.get(i, "Argument").toString();
|
||||||
Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||||
if (value.equals("null"))
|
if (value.equals("null"))
|
||||||
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
|
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
|
||||||
|
|
||||||
if (argument.equals("covariate") && value != null)
|
if (argument.equals("covariate") && value != null)
|
||||||
RAC.COVARIATES = value.toString().split(",");
|
RAC.COVARIATES = value.toString().split(",");
|
||||||
|
|
|
||||||
|
|
@ -87,7 +87,8 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
|
|
||||||
// store the original bases and then write Ns over low quality ones
|
// store the original bases and then write Ns over low quality ones
|
||||||
final byte[] originalBases = read.getReadBases().clone();
|
final byte[] originalBases = read.getReadBases().clone();
|
||||||
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
|
// Write N's over the low quality tail of the reads to avoid adding them into the context
|
||||||
|
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS);
|
||||||
|
|
||||||
final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag();
|
final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag();
|
||||||
byte[] bases = clippedRead.getReadBases();
|
byte[] bases = clippedRead.getReadBases();
|
||||||
|
|
@ -115,7 +116,7 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String formatKey(final int key) {
|
public String formatKey(final int key) {
|
||||||
if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file
|
if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
return contextFromKey(key);
|
return contextFromKey(key);
|
||||||
|
|
@ -176,9 +177,9 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
|
|
||||||
for (int currentIndex = contextSize; currentIndex < readLength; currentIndex++) {
|
for (int currentIndex = contextSize; currentIndex < readLength; currentIndex++) {
|
||||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(bases[currentIndex]);
|
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(bases[currentIndex]);
|
||||||
if (baseIndex == -1) { // ignore non-ACGT bases
|
if (baseIndex == -1) { // ignore non-ACGT bases
|
||||||
currentNPenalty = contextSize;
|
currentNPenalty = contextSize;
|
||||||
currentKey = 0; // reset the key
|
currentKey = 0; // reset the key
|
||||||
} else {
|
} else {
|
||||||
// push this base's contribution onto the key: shift everything 2 bits, mask out the non-context bits, and add the new base and the length in
|
// push this base's contribution onto the key: shift everything 2 bits, mask out the non-context bits, and add the new base and the length in
|
||||||
currentKey = (currentKey >> 2) & mask;
|
currentKey = (currentKey >> 2) & mask;
|
||||||
|
|
@ -215,7 +216,7 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
int bitOffset = LENGTH_BITS;
|
int bitOffset = LENGTH_BITS;
|
||||||
for (int i = start; i < end; i++) {
|
for (int i = start; i < end; i++) {
|
||||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]);
|
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]);
|
||||||
if (baseIndex == -1) // ignore non-ACGT bases
|
if (baseIndex == -1) // ignore non-ACGT bases
|
||||||
return -1;
|
return -1;
|
||||||
key |= (baseIndex << bitOffset);
|
key |= (baseIndex << bitOffset);
|
||||||
bitOffset += 2;
|
bitOffset += 2;
|
||||||
|
|
@ -233,15 +234,15 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
if (key < 0)
|
if (key < 0)
|
||||||
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
||||||
|
|
||||||
final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context
|
final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context
|
||||||
int mask = 48; // use the mask to pull out bases
|
int mask = 48; // use the mask to pull out bases
|
||||||
int offset = LENGTH_BITS;
|
int offset = LENGTH_BITS;
|
||||||
|
|
||||||
StringBuilder dna = new StringBuilder();
|
StringBuilder dna = new StringBuilder();
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
final int baseIndex = (key & mask) >> offset;
|
final int baseIndex = (key & mask) >> offset;
|
||||||
dna.append((char)BaseUtils.baseIndexToSimpleBase(baseIndex));
|
dna.append((char)BaseUtils.baseIndexToSimpleBase(baseIndex));
|
||||||
mask = mask << 2; // move the mask over to the next 2 bits
|
mask = mask << 2; // move the mask over to the next 2 bits
|
||||||
offset += 2;
|
offset += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -108,7 +108,7 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
// the current sequential model would consider the effects independently instead of jointly.
|
// the current sequential model would consider the effects independently instead of jointly.
|
||||||
final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();
|
final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();
|
||||||
|
|
||||||
int cycle = multiplyByNegative1 ? -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms.
|
int cycle = multiplyByNegative1 ? -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms.
|
||||||
|
|
||||||
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
|
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
|
||||||
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
|
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
|
||||||
|
|
@ -201,9 +201,9 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String formatKey(final int key) {
|
public String formatKey(final int key) {
|
||||||
int cycle = key >> 1; // shift so we can remove the "sign" bit
|
int cycle = key >> 1; // shift so we can remove the "sign" bit
|
||||||
if ( (key & 1) != 0 ) // is the last bit set?
|
if ( (key & 1) != 0 ) // is the last bit set?
|
||||||
cycle *= -1; // then the cycle is negative
|
cycle *= -1; // then the cycle is negative
|
||||||
return String.format("%d", cycle);
|
return String.format("%d", cycle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -222,7 +222,7 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
int result = Math.abs(cycle);
|
int result = Math.abs(cycle);
|
||||||
result = result << 1; // shift so we can add the "sign" bit
|
result = result << 1; // shift so we can add the "sign" bit
|
||||||
if ( cycle < 0 )
|
if ( cycle < 0 )
|
||||||
result++; // negative cycles get the lower-most bit set
|
result++; // negative cycles get the lower-most bit set
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue