Added a --no_indel_quals argument that when used with -BQSR inhibits the writing of base insertion and base deletion quality tags.
This commit is contained in:
parent
33306d2e20
commit
dd571d9aa0
|
|
@ -197,7 +197,7 @@ public class GenomeAnalysisEngine {
|
||||||
private BaseRecalibration baseRecalibration = null;
|
private BaseRecalibration baseRecalibration = null;
|
||||||
public BaseRecalibration getBaseRecalibration() { return baseRecalibration; }
|
public BaseRecalibration getBaseRecalibration() { return baseRecalibration; }
|
||||||
public boolean hasBaseRecalibration() { return baseRecalibration != null; }
|
public boolean hasBaseRecalibration() { return baseRecalibration != null; }
|
||||||
public void setBaseRecalibration(File recalFile, int quantizationLevels) { baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels); }
|
public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean noIndelQuals) { baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, noIndelQuals); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Actually run the GATK with the specified walker.
|
* Actually run the GATK with the specified walker.
|
||||||
|
|
@ -227,7 +227,7 @@ public class GenomeAnalysisEngine {
|
||||||
|
|
||||||
// if the use specified an input BQSR recalibration table then enable on the fly recalibration
|
// if the use specified an input BQSR recalibration table then enable on the fly recalibration
|
||||||
if (this.getArguments().BQSR_RECAL_FILE != null)
|
if (this.getArguments().BQSR_RECAL_FILE != null)
|
||||||
setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels);
|
setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels, this.getArguments().noIndelQuals);
|
||||||
|
|
||||||
// Determine how the threads should be divided between CPU vs. IO.
|
// Determine how the threads should be divided between CPU vs. IO.
|
||||||
determineThreadAllocation();
|
determineThreadAllocation();
|
||||||
|
|
|
||||||
|
|
@ -209,6 +209,12 @@ public class GATKArgumentCollection {
|
||||||
@Argument(fullName="quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels.", required=false)
|
@Argument(fullName="quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels.", required=false)
|
||||||
public int quantizationLevels = -1;
|
public int quantizationLevels = -1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Turns off printing of the base insertion and base deletion tags when using the -BQSR argument. Only the base substitution qualities will be produced.
|
||||||
|
*/
|
||||||
|
@Argument(fullName="no_indel_quals", shortName = "NIQ", doc = "If true, inhibits printing of base insertion and base deletion tags.", required=false)
|
||||||
|
public boolean noIndelQuals = false;
|
||||||
|
|
||||||
@Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false)
|
@Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false)
|
||||||
public byte defaultBaseQualities = -1;
|
public byte defaultBaseQualities = -1;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -664,8 +664,12 @@ public class SAMDataSource {
|
||||||
IndexedFastaSequenceFile refReader,
|
IndexedFastaSequenceFile refReader,
|
||||||
BaseRecalibration bqsrApplier,
|
BaseRecalibration bqsrApplier,
|
||||||
byte defaultBaseQualities) {
|
byte defaultBaseQualities) {
|
||||||
// **** NOTE: ALL FILTERING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! ****
|
|
||||||
// (otherwise we will process something that we may end up throwing away)
|
// *********************************************************************************** //
|
||||||
|
// * NOTE: ALL FILTERING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! * //
|
||||||
|
// * (otherwise we will process something that we may end up throwing away) * //
|
||||||
|
// *********************************************************************************** //
|
||||||
|
|
||||||
if (downsamplingFraction != null)
|
if (downsamplingFraction != null)
|
||||||
wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction);
|
wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,8 @@ public class BaseRecalibration {
|
||||||
private final RecalibrationTables recalibrationTables;
|
private final RecalibrationTables recalibrationTables;
|
||||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||||
|
|
||||||
|
private final boolean noIndelQuals;
|
||||||
|
|
||||||
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
||||||
static {
|
static {
|
||||||
for (int i = 0; i < EventType.values().length; i++)
|
for (int i = 0; i < EventType.values().length; i++)
|
||||||
|
|
@ -58,10 +60,11 @@ public class BaseRecalibration {
|
||||||
/**
|
/**
|
||||||
* Constructor using a GATK Report file
|
* Constructor using a GATK Report file
|
||||||
*
|
*
|
||||||
* @param RECAL_FILE a GATK Report file containing the recalibration information
|
* @param RECAL_FILE a GATK Report file containing the recalibration information
|
||||||
* @param quantizationLevels number of bins to quantize the quality scores
|
* @param quantizationLevels number of bins to quantize the quality scores
|
||||||
|
* @param noIndelQuals if true, do not emit base indel qualities
|
||||||
*/
|
*/
|
||||||
public BaseRecalibration(final File RECAL_FILE, int quantizationLevels) {
|
public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean noIndelQuals) {
|
||||||
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
|
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
|
||||||
|
|
||||||
recalibrationTables = recalibrationReport.getRecalibrationTables();
|
recalibrationTables = recalibrationReport.getRecalibrationTables();
|
||||||
|
|
@ -73,6 +76,7 @@ public class BaseRecalibration {
|
||||||
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
||||||
|
|
||||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||||
|
this.noIndelQuals = noIndelQuals;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -87,6 +91,7 @@ public class BaseRecalibration {
|
||||||
this.recalibrationTables = recalibrationTables;
|
this.recalibrationTables = recalibrationTables;
|
||||||
this.requestedCovariates = requestedCovariates;
|
this.requestedCovariates = requestedCovariates;
|
||||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||||
|
noIndelQuals = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -99,6 +104,11 @@ public class BaseRecalibration {
|
||||||
public void recalibrateRead(final GATKSAMRecord read) {
|
public void recalibrateRead(final GATKSAMRecord read) {
|
||||||
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
||||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||||
|
if (noIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
|
||||||
|
read.setBaseQualities(null, errorModel);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
final byte[] quals = read.getBaseQualities(errorModel);
|
final byte[] quals = read.getBaseQualities(errorModel);
|
||||||
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model
|
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -173,10 +173,10 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
setBaseQualities(quals);
|
setBaseQualities(quals);
|
||||||
break;
|
break;
|
||||||
case BASE_INSERTION:
|
case BASE_INSERTION:
|
||||||
setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, SAMUtils.phredToFastq(quals) );
|
setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, quals == null ? null : SAMUtils.phredToFastq(quals) );
|
||||||
break;
|
break;
|
||||||
case BASE_DELETION:
|
case BASE_DELETION:
|
||||||
setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, SAMUtils.phredToFastq(quals) );
|
setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, quals == null ? null : SAMUtils.phredToFastq(quals) );
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
|
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue