BQSR Parameter cleanup
* get rid of 320C argument that nobody uses. * get rid of DEFAULT_READ_GROUP parameter and functionality (later to become an engine argument).
This commit is contained in:
parent
717cd4b912
commit
0d3ea0401c
|
|
@ -129,13 +129,14 @@ import java.util.Map;
|
|||
* -cov DinucCovariate \
|
||||
* -recalFile my_reads.recal_data.csv
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
|
||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||
@By(DataSource.READS) // Only look at covered loci, not every loci of the reference file
|
||||
@ReadFilters( {MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class} ) // Filter out all reads with zero or unavailable mapping quality
|
||||
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
|
||||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class})
|
||||
// Filter out all reads with zero or unavailable mapping quality
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||
// This walker requires both -I input.bam and -R reference.fasta
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.CountedData, CountCovariatesWalker.CountedData> implements TreeReducible<CountCovariatesWalker.CountedData> {
|
||||
|
||||
|
|
@ -149,7 +150,8 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
/////////////////////////////
|
||||
// Shared Arguments
|
||||
/////////////////////////////
|
||||
@ArgumentCollection private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
@ArgumentCollection
|
||||
private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
|
||||
/////////////////////////////
|
||||
// Command Line Arguments
|
||||
|
|
@ -217,6 +219,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
|
||||
/**
|
||||
* Adds the values of other to this, returning this
|
||||
*
|
||||
* @param other
|
||||
* @return this object
|
||||
*/
|
||||
|
|
@ -247,8 +250,9 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
*/
|
||||
public void initialize() {
|
||||
|
||||
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
|
||||
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
|
||||
if (RAC.FORCE_PLATFORM != null) {
|
||||
RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
|
||||
}
|
||||
|
||||
// Get a list of all available covariates
|
||||
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
|
||||
|
|
@ -276,7 +280,8 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
if (requiredClasses.size() == 2) { // readGroup and reported quality score
|
||||
requestedCovariates.add(new ReadGroupCovariate()); // Order is important here
|
||||
requestedCovariates.add(new QualityScoreCovariate());
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
throw new UserException.CommandLineException("There are more required covariates than expected. The instantiation list needs to be updated with the new required covariate and in the correct order.");
|
||||
}
|
||||
// Next add the standard covariates if -standard was specified by the user
|
||||
|
|
@ -333,7 +338,6 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// map
|
||||
|
|
@ -343,6 +347,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
/**
|
||||
* For each read at this locus get the various covariate values and increment that location in the map based on
|
||||
* whether or not the base matches the reference at this particular location
|
||||
*
|
||||
* @param tracker The reference metadata tracker
|
||||
* @param ref The reference context
|
||||
* @param context The alignment context
|
||||
|
|
@ -362,8 +367,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
continue;
|
||||
}
|
||||
|
||||
if( !gatkRead.containsTemporaryAttribute( SEEN_ATTRIBUTE ) )
|
||||
{
|
||||
if (!gatkRead.containsTemporaryAttribute(SEEN_ATTRIBUTE)) {
|
||||
gatkRead.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
|
||||
RecalDataManager.parseSAMRecord(gatkRead, RAC);
|
||||
|
||||
|
|
@ -374,8 +378,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
}
|
||||
|
||||
RecalDataManager.parseColorSpace(gatkRead);
|
||||
gatkRead.setTemporaryAttribute( COVARS_ATTRIBUTE,
|
||||
RecalDataManager.computeCovariates( gatkRead, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION ));
|
||||
gatkRead.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(gatkRead, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION));
|
||||
}
|
||||
|
||||
// Skip this position if base quality is zero
|
||||
|
|
@ -394,10 +397,12 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
// This base finally passed all the checks for a good base, so add it to the big data hashmap
|
||||
updateDataFromRead(counter, gatkRead, offset, refBase);
|
||||
|
||||
} else { // calculate SOLID reference insertion rate
|
||||
}
|
||||
else { // calculate SOLID reference insertion rate
|
||||
if (refBase == bases[offset]) {
|
||||
counter.solidInsertedReferenceBases++;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
counter.otherColorSpaceInconsistency++;
|
||||
}
|
||||
}
|
||||
|
|
@ -405,7 +410,8 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
}
|
||||
}
|
||||
counter.countedSites++;
|
||||
} else { // We skipped over the dbSNP site, and we are only processing every Nth locus
|
||||
}
|
||||
else { // We skipped over the dbSNP site, and we are only processing every Nth locus
|
||||
counter.skippedSites++;
|
||||
updateMismatchCounts(counter, context, ref.getBase()); // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable
|
||||
}
|
||||
|
|
@ -442,6 +448,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
* adding one to the number of observations and potentially one to the number of mismatches
|
||||
* Lots of things are passed as parameters to this method as a strategy for optimizing the covariate.getValue calls
|
||||
* because pulling things out of the SAMRecord is an expensive operation.
|
||||
*
|
||||
* @param counter Data structure which holds the counted bases
|
||||
* @param gatkRead The SAMRecord holding all the data for this read
|
||||
* @param offset The offset in the read for this locus
|
||||
|
|
@ -470,7 +477,6 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
counter.novelCountsMM += datum.getNumMismatches() - curMismatches; // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// reduce
|
||||
|
|
@ -479,6 +485,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
|
||||
/**
|
||||
* Initialize the reduce step by creating a PrintStream from the filename specified as an argument to the walker.
|
||||
*
|
||||
* @return returns A PrintStream created from the -recalFile filename argument specified to the walker
|
||||
*/
|
||||
public CountedData reduceInit() {
|
||||
|
|
@ -487,6 +494,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
|
||||
/**
|
||||
* The Reduce method doesn't do anything for this walker.
|
||||
*
|
||||
* @param mapped Result of the map. This value is immediately ignored.
|
||||
* @param sum The summing CountedData used to output the CSV data
|
||||
* @return returns The sum used to output the CSV data
|
||||
|
|
@ -508,8 +516,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
final double fractionMM_dbsnp = (double) counter.dbSNPCountsMM / (double) counter.dbSNPCountsBases;
|
||||
|
||||
if (fractionMM_dbsnp < DBSNP_VS_NOVEL_MISMATCH_RATE * fractionMM_novel) {
|
||||
Utils.warnUser("The variation rate at the supplied list of known variant sites seems suspiciously low. Please double-check that the correct ROD is being used. " +
|
||||
String.format("[dbSNP variation rate = %.4f, novel variation rate = %.4f]", fractionMM_dbsnp, fractionMM_novel) );
|
||||
Utils.warnUser("The variation rate at the supplied list of known variant sites seems suspiciously low. Please double-check that the correct ROD is being used. " + String.format("[dbSNP variation rate = %.4f, novel variation rate = %.4f]", fractionMM_dbsnp, fractionMM_novel));
|
||||
DBSNP_VALIDATION_CHECK_FREQUENCY *= 2; // Don't annoyingly output the warning message every megabase of a large file
|
||||
}
|
||||
}
|
||||
|
|
@ -524,6 +531,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
|
||||
/**
|
||||
* Write out the full data hashmap to disk in CSV format
|
||||
*
|
||||
* @param sum The CountedData to write out to RECAL_FILE
|
||||
*/
|
||||
public void onTraversalDone(CountedData sum) {
|
||||
|
|
@ -537,6 +545,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
|
||||
/**
|
||||
* For each entry (key-value pair) in the data hashmap output the Covariate's values as well as the RecalDatum's data in CSV format
|
||||
*
|
||||
* @param recalTableStream The PrintStream to write out to
|
||||
*/
|
||||
private void outputToCSV(CountedData sum, final PrintStream recalTableStream) {
|
||||
|
|
@ -558,7 +567,8 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
|
||||
if (DONT_SORT_OUTPUT) {
|
||||
printMappings(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
printMappingsSorted(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
|
||||
}
|
||||
|
||||
|
|
@ -585,7 +595,8 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
}
|
||||
// Output the RecalDatum entry
|
||||
recalTableStream.println(((RecalDatumOptimized) val).outputToCSV());
|
||||
} else { // Another layer in the nested hash map
|
||||
}
|
||||
else { // Another layer in the nested hash map
|
||||
printMappingsSorted(recalTableStream, curPos + 1, key, (Map) val);
|
||||
}
|
||||
}
|
||||
|
|
@ -603,7 +614,8 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
}
|
||||
// Output the RecalDatum entry
|
||||
recalTableStream.println(((RecalDatumOptimized) val).outputToCSV());
|
||||
} else { // Another layer in the nested hash map
|
||||
}
|
||||
else { // Another layer in the nested hash map
|
||||
printMappings(recalTableStream, curPos + 1, key, (Map) val);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -256,32 +256,6 @@ public class RecalDataManager {
|
|||
public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
|
||||
GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord) read).getReadGroup();
|
||||
|
||||
// If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments
|
||||
if (readGroup == null) {
|
||||
if (RAC.DEFAULT_READ_GROUP != null && RAC.DEFAULT_PLATFORM != null) {
|
||||
if (!warnUserNullReadGroup && RAC.FORCE_READ_GROUP == null) {
|
||||
Utils.warnUser("The input .bam file contains reads with no read group. " +
|
||||
"Defaulting to read group ID = " + RAC.DEFAULT_READ_GROUP + " and platform = " + RAC.DEFAULT_PLATFORM + ". " +
|
||||
"First observed at read with name = " + read.getReadName());
|
||||
warnUserNullReadGroup = true;
|
||||
}
|
||||
// There is no readGroup so defaulting to these values
|
||||
readGroup = new GATKSAMReadGroupRecord(RAC.DEFAULT_READ_GROUP);
|
||||
readGroup.setPlatform(RAC.DEFAULT_PLATFORM);
|
||||
((GATKSAMRecord) read).setReadGroup(readGroup);
|
||||
}
|
||||
else {
|
||||
throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName());
|
||||
}
|
||||
}
|
||||
|
||||
if (RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP)) { // Collapse all the read groups into a single common String provided by the user
|
||||
final String oldPlatform = readGroup.getPlatform();
|
||||
readGroup = new GATKSAMReadGroupRecord(RAC.FORCE_READ_GROUP);
|
||||
readGroup.setPlatform(oldPlatform);
|
||||
((GATKSAMRecord) read).setReadGroup(readGroup);
|
||||
}
|
||||
|
||||
if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
|
||||
readGroup.setPlatform(RAC.FORCE_PLATFORM);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,31 +43,15 @@ public class RecalibrationArgumentCollection {
|
|||
// Shared Command Line Arguments
|
||||
//////////////////////////////////
|
||||
@Hidden
|
||||
@Argument(fullName = "default_read_group", shortName = "dRG", required = false, doc = "If a read has no read group then default to the provided String.")
|
||||
public String DEFAULT_READ_GROUP = null;
|
||||
@Hidden
|
||||
@Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
|
||||
public String DEFAULT_PLATFORM = null;
|
||||
@Hidden
|
||||
@Argument(fullName = "force_read_group", shortName = "fRG", required = false, doc = "If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.")
|
||||
public String FORCE_READ_GROUP = null;
|
||||
@Hidden
|
||||
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
|
||||
public String FORCE_PLATFORM = null;
|
||||
@Hidden
|
||||
@Argument(fullName = "window_size_nqs", shortName = "nqs", doc = "The window size used by MinimumNQSCovariate for its calculation", required = false)
|
||||
public int WINDOW_SIZE = 5;
|
||||
|
||||
/**
|
||||
* This window size tells the module in how big of a neighborhood around the current base it should look for the minimum base quality score.
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(fullName = "homopolymer_nback", shortName = "nback", doc = "The number of previous bases to look at in HomopolymerCovariate", required = false)
|
||||
public int HOMOPOLYMER_NBACK = 7;
|
||||
@Hidden
|
||||
@Argument(fullName = "exception_if_no_tile", shortName = "throwTileException", doc = "If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required = false)
|
||||
public boolean EXCEPTION_IF_NO_TILE = false;
|
||||
|
||||
/**
|
||||
* CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
|
||||
* reads which have had the reference inserted because of color space inconsistencies.
|
||||
|
|
@ -89,4 +73,10 @@ public class RecalibrationArgumentCollection {
|
|||
@Argument(fullName = "context_size", shortName = "cs", doc = "size of the k-mer context to be used", required = false)
|
||||
public int CONTEXT_SIZE = 8;
|
||||
|
||||
/**
|
||||
* This window size tells the module in how big of a neighborhood around the current base it should look for the minimum base quality score.
|
||||
*/
|
||||
@Argument(fullName = "homopolymer_nback", shortName = "nback", doc = "The number of previous bases to look at in HomopolymerCovariate", required = false)
|
||||
public int HOMOPOLYMER_NBACK = 7;
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -86,12 +86,12 @@ import java.util.regex.Pattern;
|
|||
* -o my_reads.recal.bam \
|
||||
* -recalFile my_reads.recal_data.csv
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
|
||||
@WalkerName("TableRecalibration")
|
||||
@Requires({ DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES }) // This walker requires -I input.bam, it also requires -R reference.fasta
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||
// This walker requires -I input.bam, it also requires -R reference.fasta
|
||||
public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||
|
||||
public static final String PROGRAM_RECORD_NAME = "GATK TableRecalibration";
|
||||
|
|
@ -99,7 +99,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
/////////////////////////////
|
||||
// Shared Arguments
|
||||
/////////////////////////////
|
||||
@ArgumentCollection private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
@ArgumentCollection
|
||||
private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
|
||||
/////////////////////////////
|
||||
// Command Line Arguments
|
||||
|
|
@ -165,7 +166,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
@Argument(fullName = "skipUQUpdate", shortName = "skipUQUpdate", required = false, doc = "If true, we will skip the UQ updating step for each read, speeding up the calculations")
|
||||
private boolean skipUQUpdate = false;
|
||||
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
/////////////////////////////
|
||||
|
|
@ -195,8 +195,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
*/
|
||||
public void initialize() {
|
||||
|
||||
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
|
||||
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
|
||||
if (RAC.FORCE_PLATFORM != null) {
|
||||
RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
|
||||
}
|
||||
|
||||
// Get a list of all available covariates
|
||||
final List<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();
|
||||
|
|
@ -213,14 +214,16 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
lineNumber++;
|
||||
if (EOF_MARKER.equals(line)) {
|
||||
sawEOF = true;
|
||||
} else if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() ) {
|
||||
}
|
||||
else if (COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches()) {
|
||||
; // Skip over the comment lines, (which start with '#')
|
||||
}
|
||||
// Read in the covariates that were used from the input file
|
||||
else if (COVARIATE_PATTERN.matcher(line).matches()) { // The line string is either specifying a covariate or is giving csv data
|
||||
if (foundAllCovariates) {
|
||||
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE);
|
||||
} else { // Found the covariate list in input file, loop through all of them and instantiate them
|
||||
}
|
||||
else { // Found the covariate list in input file, loop through all of them and instantiate them
|
||||
String[] vals = line.split(",");
|
||||
for (int iii = 0; iii < vals.length - 3; iii++) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
|
||||
boolean foundClass = false;
|
||||
|
|
@ -243,7 +246,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
}
|
||||
}
|
||||
|
||||
} else { // Found a line of data
|
||||
}
|
||||
else { // Found a line of data
|
||||
if (!foundAllCovariates) {
|
||||
foundAllCovariates = true;
|
||||
|
||||
|
|
@ -302,7 +306,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
try {
|
||||
final String version = headerInfo.getString("org.broadinstitute.sting.gatk.version");
|
||||
programRecord.setProgramVersion(version);
|
||||
} catch (MissingResourceException e) {}
|
||||
} catch (MissingResourceException e) {
|
||||
}
|
||||
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(getToolkit().createApproximateCommandLineArgumentString(getToolkit(), this));
|
||||
|
|
@ -331,6 +336,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
/**
|
||||
* For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
|
||||
*
|
||||
* @param line A line of CSV data read from the recalibration table data file
|
||||
*/
|
||||
private void addCSVData(final File file, final String line) {
|
||||
|
|
@ -388,7 +394,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
numReadsWithMalformedColorSpace++;
|
||||
if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED) {
|
||||
return read; // can't recalibrate a SOLiD read with no calls in the color space, and the user wants to skip over them
|
||||
} else if ( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ ) {
|
||||
}
|
||||
else if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ) {
|
||||
read.setReadFailsVendorQualityCheckFlag(true);
|
||||
return read;
|
||||
}
|
||||
|
|
@ -398,8 +405,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
}
|
||||
|
||||
//compute all covariate values for this read
|
||||
final Comparable[][] covariateValues_offset_x_covar =
|
||||
RecalDataManager.computeCovariates(read, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION);
|
||||
final Comparable[][] covariateValues_offset_x_covar = RecalDataManager.computeCovariates(read, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION);
|
||||
|
||||
// For each base in the read
|
||||
for (int offset = 0; offset < read.getReadLength(); offset++) {
|
||||
|
|
@ -407,8 +413,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
|
||||
|
||||
Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
|
||||
if(qualityScore == null)
|
||||
{
|
||||
if (qualityScore == null) {
|
||||
qualityScore = performSequentialQualityCalculation(fullCovariateKey);
|
||||
qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
|
||||
}
|
||||
|
|
@ -446,6 +451,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
* - The final shift equation is:
|
||||
*
|
||||
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
|
||||
*
|
||||
* @param key The list of Comparables that were calculated from the covariates
|
||||
* @return A recalibrated quality score as a byte
|
||||
*/
|
||||
|
|
@ -508,6 +514,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
/**
|
||||
* Loop over the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold
|
||||
*
|
||||
* @param originalQuals The list of original base quality scores
|
||||
* @param recalQuals A list of the new recalibrated quality scores
|
||||
*/
|
||||
|
|
@ -527,6 +534,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
/**
|
||||
* Start the reduce with a handle to the output bam file
|
||||
*
|
||||
* @return A FileWriter pointing to a new bam file
|
||||
*/
|
||||
public SAMFileWriter reduceInit() {
|
||||
|
|
@ -535,6 +543,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
/**
|
||||
* Output each read to disk
|
||||
*
|
||||
* @param read The read to output
|
||||
* @param output The FileWriter to write the read to
|
||||
* @return The FileWriter
|
||||
|
|
@ -548,6 +557,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
/**
|
||||
* Do nothing
|
||||
*
|
||||
* @param output The SAMFileWriter that outputs the bam file
|
||||
*/
|
||||
public void onTraversalDone(SAMFileWriter output) {
|
||||
|
|
@ -557,7 +567,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
"because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
|
||||
"for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
|
||||
"These reads remain in the output bam file but haven't been corrected for reference bias. !!! USE AT YOUR OWN RISK !!!");
|
||||
} else if ( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ ) {
|
||||
}
|
||||
else if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ) {
|
||||
Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
|
||||
"because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
|
||||
"for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
|
||||
|
|
|
|||
Loading…
Reference in New Issue