Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
c2e1dc63ff
|
|
@ -197,7 +197,7 @@ public class GenomeAnalysisEngine {
|
||||||
private BaseRecalibration baseRecalibration = null;
|
private BaseRecalibration baseRecalibration = null;
|
||||||
public BaseRecalibration getBaseRecalibration() { return baseRecalibration; }
|
public BaseRecalibration getBaseRecalibration() { return baseRecalibration; }
|
||||||
public boolean hasBaseRecalibration() { return baseRecalibration != null; }
|
public boolean hasBaseRecalibration() { return baseRecalibration != null; }
|
||||||
public void setBaseRecalibration(File recalFile, int quantizationLevels) { baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels); }
|
public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean noIndelQuals) { baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, noIndelQuals); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Actually run the GATK with the specified walker.
|
* Actually run the GATK with the specified walker.
|
||||||
|
|
@ -227,7 +227,7 @@ public class GenomeAnalysisEngine {
|
||||||
|
|
||||||
// if the use specified an input BQSR recalibration table then enable on the fly recalibration
|
// if the use specified an input BQSR recalibration table then enable on the fly recalibration
|
||||||
if (this.getArguments().BQSR_RECAL_FILE != null)
|
if (this.getArguments().BQSR_RECAL_FILE != null)
|
||||||
setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels);
|
setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels, this.getArguments().noIndelQuals);
|
||||||
|
|
||||||
// Determine how the threads should be divided between CPU vs. IO.
|
// Determine how the threads should be divided between CPU vs. IO.
|
||||||
determineThreadAllocation();
|
determineThreadAllocation();
|
||||||
|
|
|
||||||
|
|
@ -209,6 +209,12 @@ public class GATKArgumentCollection {
|
||||||
@Argument(fullName="quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels.", required=false)
|
@Argument(fullName="quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels.", required=false)
|
||||||
public int quantizationLevels = -1;
|
public int quantizationLevels = -1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Turns off printing of the base insertion and base deletion tags when using the -BQSR argument. Only the base substitution qualities will be produced.
|
||||||
|
*/
|
||||||
|
@Argument(fullName="no_indel_quals", shortName = "NIQ", doc = "If true, inhibits printing of base insertion and base deletion tags.", required=false)
|
||||||
|
public boolean noIndelQuals = false;
|
||||||
|
|
||||||
@Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false)
|
@Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false)
|
||||||
public byte defaultBaseQualities = -1;
|
public byte defaultBaseQualities = -1;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -664,8 +664,12 @@ public class SAMDataSource {
|
||||||
IndexedFastaSequenceFile refReader,
|
IndexedFastaSequenceFile refReader,
|
||||||
BaseRecalibration bqsrApplier,
|
BaseRecalibration bqsrApplier,
|
||||||
byte defaultBaseQualities) {
|
byte defaultBaseQualities) {
|
||||||
// **** NOTE: ALL FILTERING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! ****
|
|
||||||
// (otherwise we will process something that we may end up throwing away)
|
// *********************************************************************************** //
|
||||||
|
// * NOTE: ALL FILTERING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! * //
|
||||||
|
// * (otherwise we will process something that we may end up throwing away) * //
|
||||||
|
// *********************************************************************************** //
|
||||||
|
|
||||||
if (downsamplingFraction != null)
|
if (downsamplingFraction != null)
|
||||||
wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction);
|
wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,7 @@ import java.util.EnumSet;
|
||||||
public class CycleCovariate implements StandardCovariate {
|
public class CycleCovariate implements StandardCovariate {
|
||||||
|
|
||||||
private static final int MAXIMUM_CYCLE_VALUE = 1000;
|
private static final int MAXIMUM_CYCLE_VALUE = 1000;
|
||||||
|
private static final int CUSHION_FOR_INDELS = 4;
|
||||||
|
|
||||||
private static final EnumSet<NGSPlatform> DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS);
|
private static final EnumSet<NGSPlatform> DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS);
|
||||||
private static final EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT);
|
private static final EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT);
|
||||||
|
|
@ -79,11 +80,11 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
increment = readOrderFactor;
|
increment = readOrderFactor;
|
||||||
}
|
}
|
||||||
|
|
||||||
final int CUSHION = 4;
|
final int MAX_CYCLE_FOR_INDELS = readLength - CUSHION_FOR_INDELS - 1;
|
||||||
final int MAX_CYCLE = readLength - CUSHION - 1;
|
|
||||||
for (int i = 0; i < readLength; i++) {
|
for (int i = 0; i < readLength; i++) {
|
||||||
final int key = (i<CUSHION || i>MAX_CYCLE) ? -1 : keyFromCycle(cycle);
|
final int substitutionKey = keyFromCycle(cycle);
|
||||||
values.addCovariate(key, key, key, i);
|
final int indelKey = (i < CUSHION_FOR_INDELS || i > MAX_CYCLE_FOR_INDELS) ? -1 : substitutionKey;
|
||||||
|
values.addCovariate(substitutionKey, indelKey, indelKey, i);
|
||||||
cycle += increment;
|
cycle += increment;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -170,7 +170,7 @@ public class RecalDataManager {
|
||||||
|
|
||||||
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
|
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
|
||||||
ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||||
if (argumentCollection.USE_STANDARD_COVARIATES)
|
if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES)
|
||||||
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
|
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
|
||||||
|
|
||||||
if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified
|
if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified
|
||||||
|
|
@ -180,7 +180,7 @@ public class RecalDataManager {
|
||||||
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
|
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
|
||||||
foundClass = true;
|
foundClass = true;
|
||||||
if (!requiredClasses.contains(covClass) &&
|
if (!requiredClasses.contains(covClass) &&
|
||||||
(!argumentCollection.USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
|
(argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
|
||||||
try {
|
try {
|
||||||
final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it
|
final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it
|
||||||
optionalCovariates.add(covariate);
|
optionalCovariates.add(covariate);
|
||||||
|
|
|
||||||
|
|
@ -71,16 +71,19 @@ public class RecalibrationArgumentCollection {
|
||||||
public boolean LIST_ONLY = false;
|
public boolean LIST_ONLY = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you. See the list of covariates with -list.
|
* Note that the ReadGroup and QualityScore covariates are required and do not need to be specified.
|
||||||
|
* Also, unless --no_standard_covs is specified, the Cycle and Context covariates are standard and are included by default.
|
||||||
|
* Use the --list argument to see the available covariates.
|
||||||
*/
|
*/
|
||||||
@Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false)
|
@Argument(fullName = "covariate", shortName = "cov", doc = "One or more covariates to be used in the recalibration. Can be specified multiple times", required = false)
|
||||||
public String[] COVARIATES = null;
|
public String[] COVARIATES = null;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use the standard set of covariates in addition to the ones listed using the -cov argument
|
* The Cycle and Context covariates are standard and are included by default unless this argument is provided.
|
||||||
|
* Note that the ReadGroup and QualityScore covariates are required and cannot be excluded.
|
||||||
*/
|
*/
|
||||||
@Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false)
|
@Argument(fullName = "no_standard_covs", shortName = "noStandard", doc = "Do not use the standard set of covariates, but rather just the ones listed using the -cov argument", required = false)
|
||||||
public boolean USE_STANDARD_COVARIATES = true;
|
public boolean DO_NOT_USE_STANDARD_COVARIATES = false;
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Debugging-only Arguments
|
// Debugging-only Arguments
|
||||||
|
|
@ -172,8 +175,8 @@ public class RecalibrationArgumentCollection {
|
||||||
argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
|
argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
|
||||||
argumentsTable.addRowID("covariate", true);
|
argumentsTable.addRowID("covariate", true);
|
||||||
argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
|
argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
|
||||||
argumentsTable.addRowID("standard_covs", true);
|
argumentsTable.addRowID("no_standard_covs", true);
|
||||||
argumentsTable.set("standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, USE_STANDARD_COVARIATES);
|
argumentsTable.set("no_standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
|
||||||
argumentsTable.addRowID("run_without_dbsnp", true);
|
argumentsTable.addRowID("run_without_dbsnp", true);
|
||||||
argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
|
argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
|
||||||
argumentsTable.addRowID("solid_recal_mode", true);
|
argumentsTable.addRowID("solid_recal_mode", true);
|
||||||
|
|
|
||||||
|
|
@ -247,7 +247,7 @@ public class RecalibrationReport {
|
||||||
RAC.COVARIATES = value.toString().split(",");
|
RAC.COVARIATES = value.toString().split(",");
|
||||||
|
|
||||||
else if (argument.equals("standard_covs"))
|
else if (argument.equals("standard_covs"))
|
||||||
RAC.USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value);
|
RAC.DO_NOT_USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value);
|
||||||
|
|
||||||
else if (argument.equals("solid_recal_mode"))
|
else if (argument.equals("solid_recal_mode"))
|
||||||
RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value);
|
RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value);
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,8 @@ public class BaseRecalibration {
|
||||||
private final RecalibrationTables recalibrationTables;
|
private final RecalibrationTables recalibrationTables;
|
||||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||||
|
|
||||||
|
private final boolean noIndelQuals;
|
||||||
|
|
||||||
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
||||||
static {
|
static {
|
||||||
for (int i = 0; i < EventType.values().length; i++)
|
for (int i = 0; i < EventType.values().length; i++)
|
||||||
|
|
@ -60,8 +62,9 @@ public class BaseRecalibration {
|
||||||
*
|
*
|
||||||
* @param RECAL_FILE a GATK Report file containing the recalibration information
|
* @param RECAL_FILE a GATK Report file containing the recalibration information
|
||||||
* @param quantizationLevels number of bins to quantize the quality scores
|
* @param quantizationLevels number of bins to quantize the quality scores
|
||||||
|
* @param noIndelQuals if true, do not emit base indel qualities
|
||||||
*/
|
*/
|
||||||
public BaseRecalibration(final File RECAL_FILE, int quantizationLevels) {
|
public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean noIndelQuals) {
|
||||||
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
|
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
|
||||||
|
|
||||||
recalibrationTables = recalibrationReport.getRecalibrationTables();
|
recalibrationTables = recalibrationReport.getRecalibrationTables();
|
||||||
|
|
@ -73,6 +76,7 @@ public class BaseRecalibration {
|
||||||
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
||||||
|
|
||||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||||
|
this.noIndelQuals = noIndelQuals;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -87,6 +91,7 @@ public class BaseRecalibration {
|
||||||
this.recalibrationTables = recalibrationTables;
|
this.recalibrationTables = recalibrationTables;
|
||||||
this.requestedCovariates = requestedCovariates;
|
this.requestedCovariates = requestedCovariates;
|
||||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||||
|
noIndelQuals = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -99,6 +104,11 @@ public class BaseRecalibration {
|
||||||
public void recalibrateRead(final GATKSAMRecord read) {
|
public void recalibrateRead(final GATKSAMRecord read) {
|
||||||
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
||||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||||
|
if (noIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
|
||||||
|
read.setBaseQualities(null, errorModel);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
final byte[] quals = read.getBaseQualities(errorModel);
|
final byte[] quals = read.getBaseQualities(errorModel);
|
||||||
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model
|
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -173,10 +173,10 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
setBaseQualities(quals);
|
setBaseQualities(quals);
|
||||||
break;
|
break;
|
||||||
case BASE_INSERTION:
|
case BASE_INSERTION:
|
||||||
setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, SAMUtils.phredToFastq(quals) );
|
setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, quals == null ? null : SAMUtils.phredToFastq(quals) );
|
||||||
break;
|
break;
|
||||||
case BASE_DELETION:
|
case BASE_DELETION:
|
||||||
setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, SAMUtils.phredToFastq(quals) );
|
setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, quals == null ? null : SAMUtils.phredToFastq(quals) );
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
|
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
|
||||||
|
|
|
||||||
|
|
@ -102,17 +102,14 @@ public class ReadClipperTestUtils {
|
||||||
cigarElementStack.push(cigarElement);
|
cigarElementStack.push(cigarElement);
|
||||||
}
|
}
|
||||||
|
|
||||||
readHasStarted = false; // search the inverted list of elements (stack) for the stopping operator
|
|
||||||
while (!cigarElementStack.empty()) {
|
while (!cigarElementStack.empty()) {
|
||||||
CigarElement cigarElement = cigarElementStack.pop();
|
CigarElement cigarElement = cigarElementStack.pop();
|
||||||
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
|
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
|
||||||
readHasStarted = true;
|
|
||||||
endingOp = cigarElement.getOperator();
|
endingOp = cigarElement.getOperator();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
|
|
||||||
if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION)
|
if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION)
|
||||||
return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
|
return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -151,51 +151,40 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
final byte LOW_QUAL = 2;
|
final byte LOW_QUAL = 2;
|
||||||
final byte HIGH_QUAL = 30;
|
final byte HIGH_QUAL = 30;
|
||||||
|
|
||||||
// create a read for every cigar permutation
|
/** create a read for every cigar permutation */
|
||||||
for (Cigar cigar : cigarList) {
|
for (Cigar cigar : cigarList) {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
int readLength = read.getReadLength();
|
int readLength = read.getReadLength();
|
||||||
byte[] quals = new byte[readLength];
|
byte[] quals = new byte[readLength];
|
||||||
|
|
||||||
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
|
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
|
||||||
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail
|
|
||||||
|
/** create a read with nLowQualBases in the left tail */
|
||||||
|
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
||||||
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
|
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
|
||||||
quals[addLeft] = LOW_QUAL;
|
quals[addLeft] = LOW_QUAL;
|
||||||
read.setBaseQualities(quals);
|
read.setBaseQualities(quals);
|
||||||
GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||||
|
checkClippedReadsForLowQualEnds(read, clipLeft, LOW_QUAL, nLowQualBases);
|
||||||
|
|
||||||
assertUnclippedLimits(read, clipLeft); // Make sure limits haven't changed
|
/** create a read with nLowQualBases in the right tail */
|
||||||
assertNoLowQualBases(clipLeft, LOW_QUAL); // Make sure the low qualities are gone
|
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
||||||
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
|
||||||
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
|
|
||||||
|
|
||||||
|
|
||||||
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the right tail
|
|
||||||
for (int addRight = 0; addRight < nLowQualBases; addRight++)
|
for (int addRight = 0; addRight < nLowQualBases; addRight++)
|
||||||
quals[readLength - addRight - 1] = LOW_QUAL;
|
quals[readLength - addRight - 1] = LOW_QUAL;
|
||||||
read.setBaseQualities(quals);
|
read.setBaseQualities(quals);
|
||||||
GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||||
|
checkClippedReadsForLowQualEnds(read, clipRight, LOW_QUAL, nLowQualBases);
|
||||||
|
|
||||||
// System.out.println(String.format("Debug [%d]: %s -> %s / %s", nLowQualBases, cigar.toString(), clipLeft.getCigarString(), clipRight.getCigarString()));
|
/** create a read with nLowQualBases on both tails */
|
||||||
|
|
||||||
assertUnclippedLimits(read, clipRight); // Make sure limits haven't changed
|
|
||||||
assertNoLowQualBases(clipRight, LOW_QUAL); // Make sure the low qualities are gone
|
|
||||||
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
|
||||||
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
|
|
||||||
|
|
||||||
if (nLowQualBases <= readLength / 2) {
|
if (nLowQualBases <= readLength / 2) {
|
||||||
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases on both tails
|
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
||||||
for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
|
for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
|
||||||
quals[addBoth] = LOW_QUAL;
|
quals[addBoth] = LOW_QUAL;
|
||||||
quals[readLength - addBoth - 1] = LOW_QUAL;
|
quals[readLength - addBoth - 1] = LOW_QUAL;
|
||||||
}
|
}
|
||||||
read.setBaseQualities(quals);
|
read.setBaseQualities(quals);
|
||||||
GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||||
|
checkClippedReadsForLowQualEnds(read, clipBoth, LOW_QUAL, 2*nLowQualBases);
|
||||||
assertUnclippedLimits(read, clipBoth); // Make sure limits haven't changed
|
|
||||||
assertNoLowQualBases(clipBoth, LOW_QUAL); // Make sure the low qualities are gone
|
|
||||||
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
|
||||||
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2 * nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -286,11 +275,17 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void checkClippedReadsForLowQualEnds(GATKSAMRecord read, GATKSAMRecord clippedRead, byte lowQual, int nLowQualBases) {
|
||||||
|
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
|
||||||
|
assertNoLowQualBases(clippedRead, lowQual); // Make sure the low qualities are gone
|
||||||
|
assertNumberOfBases(read, clippedRead, nLowQualBases); // Make sure only low quality bases were clipped
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Asserts that clipping doesn't change the getUnclippedStart / getUnclippedEnd
|
* Asserts that clipping doesn't change the getUnclippedStart / getUnclippedEnd
|
||||||
*
|
*
|
||||||
* @param original
|
* @param original original read
|
||||||
* @param clipped
|
* @param clipped clipped read
|
||||||
*/
|
*/
|
||||||
private void assertUnclippedLimits(GATKSAMRecord original, GATKSAMRecord clipped) {
|
private void assertUnclippedLimits(GATKSAMRecord original, GATKSAMRecord clipped) {
|
||||||
if (ReadClipperTestUtils.readHasNonClippedBases(clipped)) {
|
if (ReadClipperTestUtils.readHasNonClippedBases(clipped)) {
|
||||||
|
|
@ -299,6 +294,12 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void assertNumberOfBases(GATKSAMRecord read, GATKSAMRecord clipLeft, int nLowQualBases) {
|
||||||
|
if (read.getCigarString().contains("M"))
|
||||||
|
Assert.assertEquals(clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private boolean startsWithInsertion(Cigar cigar) {
|
private boolean startsWithInsertion(Cigar cigar) {
|
||||||
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
|
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,11 +11,11 @@ insertions_default_quality 45
|
||||||
low_quality_tail 2
|
low_quality_tail 2
|
||||||
mismatches_context_size 2
|
mismatches_context_size 2
|
||||||
mismatches_default_quality -1
|
mismatches_default_quality -1
|
||||||
|
no_standard_covs false
|
||||||
quantizing_levels 16
|
quantizing_levels 16
|
||||||
run_without_dbsnp false
|
run_without_dbsnp false
|
||||||
solid_nocall_strategy THROW_EXCEPTION
|
solid_nocall_strategy THROW_EXCEPTION
|
||||||
solid_recal_mode SET_Q_ZERO
|
solid_recal_mode SET_Q_ZERO
|
||||||
standard_covs true
|
|
||||||
|
|
||||||
#:GATKTable:3:94:::;
|
#:GATKTable:3:94:::;
|
||||||
#:GATKTable:Quantized:Quality quantization map
|
#:GATKTable:Quantized:Quality quantization map
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue