BQSR Parameter cleanup
* get rid of 320C argument that nobody uses. * get rid of DEFAULT_READ_GROUP parameter and functionality (later to become an engine argument).
This commit is contained in:
parent
717cd4b912
commit
0d3ea0401c
|
|
@ -129,13 +129,14 @@ import java.util.Map;
|
||||||
* -cov DinucCovariate \
|
* -cov DinucCovariate \
|
||||||
* -recalFile my_reads.recal_data.csv
|
* -recalFile my_reads.recal_data.csv
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||||
@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
|
@By(DataSource.READS) // Only look at covered loci, not every loci of the reference file
|
||||||
@ReadFilters( {MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class} ) // Filter out all reads with zero or unavailable mapping quality
|
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class})
|
||||||
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
|
// Filter out all reads with zero or unavailable mapping quality
|
||||||
|
@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||||
|
// This walker requires both -I input.bam and -R reference.fasta
|
||||||
@PartitionBy(PartitionType.LOCUS)
|
@PartitionBy(PartitionType.LOCUS)
|
||||||
public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.CountedData, CountCovariatesWalker.CountedData> implements TreeReducible<CountCovariatesWalker.CountedData> {
|
public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.CountedData, CountCovariatesWalker.CountedData> implements TreeReducible<CountCovariatesWalker.CountedData> {
|
||||||
|
|
||||||
|
|
@ -149,7 +150,8 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Shared Arguments
|
// Shared Arguments
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
@ArgumentCollection private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
@ArgumentCollection
|
||||||
|
private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Command Line Arguments
|
// Command Line Arguments
|
||||||
|
|
@ -160,7 +162,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
* for use as this database. For users wishing to exclude an interval list of known variation simply use -XL my.interval.list to skip over processing those sites.
|
* for use as this database. For users wishing to exclude an interval list of known variation simply use -XL my.interval.list to skip over processing those sites.
|
||||||
* Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument.
|
* Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument.
|
||||||
*/
|
*/
|
||||||
@Input(fullName="knownSites", shortName = "knownSites", doc="A database of known polymorphic sites to skip over in the recalibration algorithm", required=false)
|
@Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false)
|
||||||
public List<RodBinding<Feature>> knownSites = Collections.emptyList();
|
public List<RodBinding<Feature>> knownSites = Collections.emptyList();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -169,31 +171,31 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
|
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
|
||||||
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
|
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
|
||||||
*/
|
*/
|
||||||
@Output(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the output covariates table recalibration file")
|
@Output(fullName = "recal_file", shortName = "recalFile", required = true, doc = "Filename for the output covariates table recalibration file")
|
||||||
@Gather(CountCovariatesGatherer.class)
|
@Gather(CountCovariatesGatherer.class)
|
||||||
public PrintStream RECAL_FILE;
|
public PrintStream RECAL_FILE;
|
||||||
|
|
||||||
@Argument(fullName="list", shortName="ls", doc="List the available covariates and exit", required=false)
|
@Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false)
|
||||||
private boolean LIST_ONLY = false;
|
private boolean LIST_ONLY = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* See the -list argument to view available covariates.
|
* See the -list argument to view available covariates.
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="covariate", shortName="cov", doc="Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required=false)
|
@Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false)
|
||||||
private String[] COVARIATES = null;
|
private String[] COVARIATES = null;
|
||||||
@Argument(fullName="standard_covs", shortName="standard", doc="Use the standard set of covariates in addition to the ones listed using the -cov argument", required=false)
|
@Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false)
|
||||||
private boolean USE_STANDARD_COVARIATES = false;
|
private boolean USE_STANDARD_COVARIATES = false;
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Debugging-only Arguments
|
// Debugging-only Arguments
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
@Argument(fullName="dont_sort_output", shortName="unsorted", required=false, doc="If specified, the output table recalibration csv file will be in an unsorted, arbitrary order to save some run time.")
|
@Argument(fullName = "dont_sort_output", shortName = "unsorted", required = false, doc = "If specified, the output table recalibration csv file will be in an unsorted, arbitrary order to save some run time.")
|
||||||
private boolean DONT_SORT_OUTPUT = false;
|
private boolean DONT_SORT_OUTPUT = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option.
|
* This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option.
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="run_without_dbsnp_potentially_ruining_quality", shortName="run_without_dbsnp_potentially_ruining_quality", required=false, doc="If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
|
@Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
|
||||||
private boolean RUN_WITHOUT_DBSNP = false;
|
private boolean RUN_WITHOUT_DBSNP = false;
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
|
|
@ -217,6 +219,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds the values of other to this, returning this
|
* Adds the values of other to this, returning this
|
||||||
|
*
|
||||||
* @param other
|
* @param other
|
||||||
* @return this object
|
* @return this object
|
||||||
*/
|
*/
|
||||||
|
|
@ -247,53 +250,55 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
*/
|
*/
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
|
||||||
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
|
if (RAC.FORCE_PLATFORM != null) {
|
||||||
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
|
RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
|
||||||
|
}
|
||||||
|
|
||||||
// Get a list of all available covariates
|
// Get a list of all available covariates
|
||||||
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>( Covariate.class ).getPlugins();
|
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
|
||||||
final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>( RequiredCovariate.class ).getPlugins();
|
final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
|
||||||
final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>( StandardCovariate.class ).getPlugins();
|
final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
|
||||||
|
|
||||||
// Print and exit if that's what was requested
|
// Print and exit if that's what was requested
|
||||||
if ( LIST_ONLY ) {
|
if (LIST_ONLY) {
|
||||||
logger.info( "Available covariates:" );
|
logger.info("Available covariates:");
|
||||||
for( Class<?> covClass : covariateClasses ) {
|
for (Class<?> covClass : covariateClasses) {
|
||||||
logger.info( covClass.getSimpleName() );
|
logger.info(covClass.getSimpleName());
|
||||||
}
|
}
|
||||||
logger.info("");
|
logger.info("");
|
||||||
|
|
||||||
System.exit( 0 ); // Early exit here because user requested it
|
System.exit(0); // Early exit here because user requested it
|
||||||
}
|
}
|
||||||
|
|
||||||
// Warn the user if no dbSNP file or other variant mask was specified
|
// Warn the user if no dbSNP file or other variant mask was specified
|
||||||
if( knownSites.isEmpty() && !RUN_WITHOUT_DBSNP ) {
|
if (knownSites.isEmpty() && !RUN_WITHOUT_DBSNP) {
|
||||||
throw new UserException.CommandLineException("This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.");
|
throw new UserException.CommandLineException("This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize the requested covariates by parsing the -cov argument
|
// Initialize the requested covariates by parsing the -cov argument
|
||||||
// First add the required covariates
|
// First add the required covariates
|
||||||
if( requiredClasses.size() == 2) { // readGroup and reported quality score
|
if (requiredClasses.size() == 2) { // readGroup and reported quality score
|
||||||
requestedCovariates.add( new ReadGroupCovariate() ); // Order is important here
|
requestedCovariates.add(new ReadGroupCovariate()); // Order is important here
|
||||||
requestedCovariates.add( new QualityScoreCovariate() );
|
requestedCovariates.add(new QualityScoreCovariate());
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
throw new UserException.CommandLineException("There are more required covariates than expected. The instantiation list needs to be updated with the new required covariate and in the correct order.");
|
throw new UserException.CommandLineException("There are more required covariates than expected. The instantiation list needs to be updated with the new required covariate and in the correct order.");
|
||||||
}
|
}
|
||||||
// Next add the standard covariates if -standard was specified by the user
|
// Next add the standard covariates if -standard was specified by the user
|
||||||
if( USE_STANDARD_COVARIATES ) {
|
if (USE_STANDARD_COVARIATES) {
|
||||||
// We want the standard covariates to appear in a consistent order but the packageUtils method gives a random order
|
// We want the standard covariates to appear in a consistent order but the packageUtils method gives a random order
|
||||||
// A list of Classes can't be sorted, but a list of Class names can be
|
// A list of Classes can't be sorted, but a list of Class names can be
|
||||||
final List<String> standardClassNames = new ArrayList<String>();
|
final List<String> standardClassNames = new ArrayList<String>();
|
||||||
for( Class<?> covClass : standardClasses ) {
|
for (Class<?> covClass : standardClasses) {
|
||||||
standardClassNames.add( covClass.getName() );
|
standardClassNames.add(covClass.getName());
|
||||||
}
|
}
|
||||||
Collections.sort(standardClassNames); // Sort the list of class names
|
Collections.sort(standardClassNames); // Sort the list of class names
|
||||||
for( String className : standardClassNames ) {
|
for (String className : standardClassNames) {
|
||||||
for( Class<?> covClass : standardClasses ) { // Find the class that matches this class name
|
for (Class<?> covClass : standardClasses) { // Find the class that matches this class name
|
||||||
if( covClass.getName().equals( className ) ) {
|
if (covClass.getName().equals(className)) {
|
||||||
try {
|
try {
|
||||||
final Covariate covariate = (Covariate)covClass.newInstance();
|
final Covariate covariate = (Covariate) covClass.newInstance();
|
||||||
requestedCovariates.add( covariate );
|
requestedCovariates.add(covariate);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new DynamicClassResolutionException(covClass, e);
|
throw new DynamicClassResolutionException(covClass, e);
|
||||||
}
|
}
|
||||||
|
|
@ -302,17 +307,17 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Finally parse the -cov arguments that were provided, skipping over the ones already specified
|
// Finally parse the -cov arguments that were provided, skipping over the ones already specified
|
||||||
if( COVARIATES != null ) {
|
if (COVARIATES != null) {
|
||||||
for( String requestedCovariateString : COVARIATES ) {
|
for (String requestedCovariateString : COVARIATES) {
|
||||||
boolean foundClass = false;
|
boolean foundClass = false;
|
||||||
for( Class<?> covClass : covariateClasses ) {
|
for (Class<?> covClass : covariateClasses) {
|
||||||
if( requestedCovariateString.equalsIgnoreCase( covClass.getSimpleName() ) ) { // -cov argument matches the class name for an implementing class
|
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
|
||||||
foundClass = true;
|
foundClass = true;
|
||||||
if( !requiredClasses.contains( covClass ) && (!USE_STANDARD_COVARIATES || !standardClasses.contains( covClass )) ) {
|
if (!requiredClasses.contains(covClass) && (!USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
|
||||||
try {
|
try {
|
||||||
// Now that we've found a matching class, try to instantiate it
|
// Now that we've found a matching class, try to instantiate it
|
||||||
final Covariate covariate = (Covariate)covClass.newInstance();
|
final Covariate covariate = (Covariate) covClass.newInstance();
|
||||||
requestedCovariates.add( covariate );
|
requestedCovariates.add(covariate);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new DynamicClassResolutionException(covClass, e);
|
throw new DynamicClassResolutionException(covClass, e);
|
||||||
}
|
}
|
||||||
|
|
@ -320,20 +325,19 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !foundClass ) {
|
if (!foundClass) {
|
||||||
throw new UserException.CommandLineException( "The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates." );
|
throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info( "The covariates being used here: " );
|
logger.info("The covariates being used here: ");
|
||||||
for( Covariate cov : requestedCovariates ) {
|
for (Covariate cov : requestedCovariates) {
|
||||||
logger.info( "\t" + cov.getClass().getSimpleName() );
|
logger.info("\t" + cov.getClass().getSimpleName());
|
||||||
cov.initialize( RAC ); // Initialize any covariate member variables using the shared argument collection
|
cov.initialize(RAC); // Initialize any covariate member variables using the shared argument collection
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// map
|
// map
|
||||||
|
|
@ -342,62 +346,63 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For each read at this locus get the various covariate values and increment that location in the map based on
|
* For each read at this locus get the various covariate values and increment that location in the map based on
|
||||||
* whether or not the base matches the reference at this particular location
|
* whether or not the base matches the reference at this particular location
|
||||||
|
*
|
||||||
* @param tracker The reference metadata tracker
|
* @param tracker The reference metadata tracker
|
||||||
* @param ref The reference context
|
* @param ref The reference context
|
||||||
* @param context The alignment context
|
* @param context The alignment context
|
||||||
* @return Returns 1, but this value isn't used in the reduce step
|
* @return Returns 1, but this value isn't used in the reduce step
|
||||||
*/
|
*/
|
||||||
public CountedData map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
public CountedData map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
// Only use data from non-dbsnp sites
|
// Only use data from non-dbsnp sites
|
||||||
// Assume every mismatch at a non-dbsnp site is indicative of poor quality
|
// Assume every mismatch at a non-dbsnp site is indicative of poor quality
|
||||||
CountedData counter = new CountedData();
|
CountedData counter = new CountedData();
|
||||||
if( tracker.getValues(knownSites).size() == 0 ) { // If something here is in one of the knownSites tracks then skip over it, otherwise proceed
|
if (tracker.getValues(knownSites).size() == 0) { // If something here is in one of the knownSites tracks then skip over it, otherwise proceed
|
||||||
// For each read at this locus
|
// For each read at this locus
|
||||||
for( final PileupElement p : context.getBasePileup() ) {
|
for (final PileupElement p : context.getBasePileup()) {
|
||||||
final GATKSAMRecord gatkRead = p.getRead();
|
final GATKSAMRecord gatkRead = p.getRead();
|
||||||
int offset = p.getOffset();
|
int offset = p.getOffset();
|
||||||
|
|
||||||
if( gatkRead.containsTemporaryAttribute( SKIP_RECORD_ATTRIBUTE ) ) {
|
if (gatkRead.containsTemporaryAttribute(SKIP_RECORD_ATTRIBUTE)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !gatkRead.containsTemporaryAttribute( SEEN_ATTRIBUTE ) )
|
if (!gatkRead.containsTemporaryAttribute(SEEN_ATTRIBUTE)) {
|
||||||
{
|
gatkRead.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
|
||||||
gatkRead.setTemporaryAttribute( SEEN_ATTRIBUTE, true );
|
RecalDataManager.parseSAMRecord(gatkRead, RAC);
|
||||||
RecalDataManager.parseSAMRecord( gatkRead, RAC );
|
|
||||||
|
|
||||||
// Skip over reads with no calls in the color space if the user requested it
|
// Skip over reads with no calls in the color space if the user requested it
|
||||||
if( !(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) && RecalDataManager.checkNoCallColorSpace( gatkRead ) ) {
|
if (!(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) && RecalDataManager.checkNoCallColorSpace(gatkRead)) {
|
||||||
gatkRead.setTemporaryAttribute( SKIP_RECORD_ATTRIBUTE, true);
|
gatkRead.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
RecalDataManager.parseColorSpace( gatkRead );
|
RecalDataManager.parseColorSpace(gatkRead);
|
||||||
gatkRead.setTemporaryAttribute( COVARS_ATTRIBUTE,
|
gatkRead.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(gatkRead, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION));
|
||||||
RecalDataManager.computeCovariates( gatkRead, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION ));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip this position if base quality is zero
|
// Skip this position if base quality is zero
|
||||||
if( gatkRead.getBaseQualities()[offset] > 0 ) {
|
if (gatkRead.getBaseQualities()[offset] > 0) {
|
||||||
|
|
||||||
byte[] bases = gatkRead.getReadBases();
|
byte[] bases = gatkRead.getReadBases();
|
||||||
byte refBase = ref.getBase();
|
byte refBase = ref.getBase();
|
||||||
|
|
||||||
// Skip if this base is an 'N' or etc.
|
// Skip if this base is an 'N' or etc.
|
||||||
if( BaseUtils.isRegularBase( bases[offset] ) ) {
|
if (BaseUtils.isRegularBase(bases[offset])) {
|
||||||
|
|
||||||
// SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
|
// SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
|
||||||
if( !gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
|
if (!gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
|
||||||
!RecalDataManager.isInconsistentColorSpace( gatkRead, offset ) ) {
|
!RecalDataManager.isInconsistentColorSpace(gatkRead, offset)) {
|
||||||
|
|
||||||
// This base finally passed all the checks for a good base, so add it to the big data hashmap
|
// This base finally passed all the checks for a good base, so add it to the big data hashmap
|
||||||
updateDataFromRead( counter, gatkRead, offset, refBase );
|
updateDataFromRead(counter, gatkRead, offset, refBase);
|
||||||
|
|
||||||
} else { // calculate SOLID reference insertion rate
|
}
|
||||||
if( refBase == bases[offset] ) {
|
else { // calculate SOLID reference insertion rate
|
||||||
|
if (refBase == bases[offset]) {
|
||||||
counter.solidInsertedReferenceBases++;
|
counter.solidInsertedReferenceBases++;
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
counter.otherColorSpaceInconsistency++;
|
counter.otherColorSpaceInconsistency++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -405,7 +410,8 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
counter.countedSites++;
|
counter.countedSites++;
|
||||||
} else { // We skipped over the dbSNP site, and we are only processing every Nth locus
|
}
|
||||||
|
else { // We skipped over the dbSNP site, and we are only processing every Nth locus
|
||||||
counter.skippedSites++;
|
counter.skippedSites++;
|
||||||
updateMismatchCounts(counter, context, ref.getBase()); // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable
|
updateMismatchCounts(counter, context, ref.getBase()); // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable
|
||||||
}
|
}
|
||||||
|
|
@ -413,7 +419,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
return counter;
|
return counter;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update the mismatch / total_base counts for a given class of loci.
|
* Update the mismatch / total_base counts for a given class of loci.
|
||||||
*
|
*
|
||||||
* @param counter The CountedData to be updated
|
* @param counter The CountedData to be updated
|
||||||
|
|
@ -421,13 +427,13 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
* @param refBase The reference base
|
* @param refBase The reference base
|
||||||
*/
|
*/
|
||||||
private static void updateMismatchCounts(CountedData counter, final AlignmentContext context, final byte refBase) {
|
private static void updateMismatchCounts(CountedData counter, final AlignmentContext context, final byte refBase) {
|
||||||
for( PileupElement p : context.getBasePileup() ) {
|
for (PileupElement p : context.getBasePileup()) {
|
||||||
final byte readBase = p.getBase();
|
final byte readBase = p.getBase();
|
||||||
final int readBaseIndex = BaseUtils.simpleBaseToBaseIndex(readBase);
|
final int readBaseIndex = BaseUtils.simpleBaseToBaseIndex(readBase);
|
||||||
final int refBaseIndex = BaseUtils.simpleBaseToBaseIndex(refBase);
|
final int refBaseIndex = BaseUtils.simpleBaseToBaseIndex(refBase);
|
||||||
|
|
||||||
if( readBaseIndex != -1 && refBaseIndex != -1 ) {
|
if (readBaseIndex != -1 && refBaseIndex != -1) {
|
||||||
if( readBaseIndex != refBaseIndex ) {
|
if (readBaseIndex != refBaseIndex) {
|
||||||
counter.novelCountsMM++;
|
counter.novelCountsMM++;
|
||||||
}
|
}
|
||||||
counter.novelCountsBases++;
|
counter.novelCountsBases++;
|
||||||
|
|
@ -439,13 +445,14 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
* Major workhorse routine for this walker.
|
* Major workhorse routine for this walker.
|
||||||
* Loop through the list of requested covariates and pick out the value from the read, offset, and reference
|
* Loop through the list of requested covariates and pick out the value from the read, offset, and reference
|
||||||
* Using the list of covariate values as a key, pick out the RecalDatum and increment,
|
* Using the list of covariate values as a key, pick out the RecalDatum and increment,
|
||||||
* adding one to the number of observations and potentially one to the number of mismatches
|
* adding one to the number of observations and potentially one to the number of mismatches
|
||||||
* Lots of things are passed as parameters to this method as a strategy for optimizing the covariate.getValue calls
|
* Lots of things are passed as parameters to this method as a strategy for optimizing the covariate.getValue calls
|
||||||
* because pulling things out of the SAMRecord is an expensive operation.
|
* because pulling things out of the SAMRecord is an expensive operation.
|
||||||
* @param counter Data structure which holds the counted bases
|
*
|
||||||
|
* @param counter Data structure which holds the counted bases
|
||||||
* @param gatkRead The SAMRecord holding all the data for this read
|
* @param gatkRead The SAMRecord holding all the data for this read
|
||||||
* @param offset The offset in the read for this locus
|
* @param offset The offset in the read for this locus
|
||||||
* @param refBase The reference base at this locus
|
* @param refBase The reference base at this locus
|
||||||
*/
|
*/
|
||||||
private void updateDataFromRead(CountedData counter, final GATKSAMRecord gatkRead, final int offset, final byte refBase) {
|
private void updateDataFromRead(CountedData counter, final GATKSAMRecord gatkRead, final int offset, final byte refBase) {
|
||||||
final Object[][] covars = (Comparable[][]) gatkRead.getTemporaryAttribute(COVARS_ATTRIBUTE);
|
final Object[][] covars = (Comparable[][]) gatkRead.getTemporaryAttribute(COVARS_ATTRIBUTE);
|
||||||
|
|
@ -453,10 +460,10 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
|
|
||||||
// Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap
|
// Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap
|
||||||
final NestedHashMap data = dataManager.data; //optimization - create local reference
|
final NestedHashMap data = dataManager.data; //optimization - create local reference
|
||||||
RecalDatumOptimized datum = (RecalDatumOptimized) data.get( key );
|
RecalDatumOptimized datum = (RecalDatumOptimized) data.get(key);
|
||||||
if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
|
if (datum == null) { // key doesn't exist yet in the map so make a new bucket and add it
|
||||||
// initialized with zeros, will be incremented at end of method
|
// initialized with zeros, will be incremented at end of method
|
||||||
datum = (RecalDatumOptimized)data.put( new RecalDatumOptimized(), true, (Object[])key );
|
datum = (RecalDatumOptimized) data.put(new RecalDatumOptimized(), true, (Object[]) key);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Need the bases to determine whether or not we have a mismatch
|
// Need the bases to determine whether or not we have a mismatch
|
||||||
|
|
@ -464,13 +471,12 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
final long curMismatches = datum.getNumMismatches();
|
final long curMismatches = datum.getNumMismatches();
|
||||||
|
|
||||||
// Add one to the number of observations and potentially one to the number of mismatches
|
// Add one to the number of observations and potentially one to the number of mismatches
|
||||||
datum.incrementBaseCounts( base, refBase );
|
datum.incrementBaseCounts(base, refBase);
|
||||||
counter.countedBases++;
|
counter.countedBases++;
|
||||||
counter.novelCountsBases++;
|
counter.novelCountsBases++;
|
||||||
counter.novelCountsMM += datum.getNumMismatches() - curMismatches; // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable
|
counter.novelCountsMM += datum.getNumMismatches() - curMismatches; // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// reduce
|
// reduce
|
||||||
|
|
@ -479,6 +485,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize the reduce step by creating a PrintStream from the filename specified as an argument to the walker.
|
* Initialize the reduce step by creating a PrintStream from the filename specified as an argument to the walker.
|
||||||
|
*
|
||||||
* @return returns A PrintStream created from the -recalFile filename argument specified to the walker
|
* @return returns A PrintStream created from the -recalFile filename argument specified to the walker
|
||||||
*/
|
*/
|
||||||
public CountedData reduceInit() {
|
public CountedData reduceInit() {
|
||||||
|
|
@ -487,11 +494,12 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Reduce method doesn't do anything for this walker.
|
* The Reduce method doesn't do anything for this walker.
|
||||||
|
*
|
||||||
* @param mapped Result of the map. This value is immediately ignored.
|
* @param mapped Result of the map. This value is immediately ignored.
|
||||||
* @param sum The summing CountedData used to output the CSV data
|
* @param sum The summing CountedData used to output the CSV data
|
||||||
* @return returns The sum used to output the CSV data
|
* @return returns The sum used to output the CSV data
|
||||||
*/
|
*/
|
||||||
public CountedData reduce( CountedData mapped, CountedData sum ) {
|
public CountedData reduce(CountedData mapped, CountedData sum) {
|
||||||
// Do a dbSNP sanity check every so often
|
// Do a dbSNP sanity check every so often
|
||||||
return validatingDbsnpMismatchRate(sum.add(mapped));
|
return validatingDbsnpMismatchRate(sum.add(mapped));
|
||||||
}
|
}
|
||||||
|
|
@ -500,16 +508,15 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
* Validate the dbSNP reference mismatch rates.
|
* Validate the dbSNP reference mismatch rates.
|
||||||
*/
|
*/
|
||||||
private CountedData validatingDbsnpMismatchRate(CountedData counter) {
|
private CountedData validatingDbsnpMismatchRate(CountedData counter) {
|
||||||
if( ++counter.lociSinceLastDbsnpCheck >= DBSNP_VALIDATION_CHECK_FREQUENCY ) {
|
if (++counter.lociSinceLastDbsnpCheck >= DBSNP_VALIDATION_CHECK_FREQUENCY) {
|
||||||
counter.lociSinceLastDbsnpCheck = 0;
|
counter.lociSinceLastDbsnpCheck = 0;
|
||||||
|
|
||||||
if( counter.novelCountsBases != 0L && counter.dbSNPCountsBases != 0L ) {
|
if (counter.novelCountsBases != 0L && counter.dbSNPCountsBases != 0L) {
|
||||||
final double fractionMM_novel = (double)counter.novelCountsMM / (double)counter.novelCountsBases;
|
final double fractionMM_novel = (double) counter.novelCountsMM / (double) counter.novelCountsBases;
|
||||||
final double fractionMM_dbsnp = (double)counter.dbSNPCountsMM / (double)counter.dbSNPCountsBases;
|
final double fractionMM_dbsnp = (double) counter.dbSNPCountsMM / (double) counter.dbSNPCountsBases;
|
||||||
|
|
||||||
if( fractionMM_dbsnp < DBSNP_VS_NOVEL_MISMATCH_RATE * fractionMM_novel ) {
|
if (fractionMM_dbsnp < DBSNP_VS_NOVEL_MISMATCH_RATE * fractionMM_novel) {
|
||||||
Utils.warnUser("The variation rate at the supplied list of known variant sites seems suspiciously low. Please double-check that the correct ROD is being used. " +
|
Utils.warnUser("The variation rate at the supplied list of known variant sites seems suspiciously low. Please double-check that the correct ROD is being used. " + String.format("[dbSNP variation rate = %.4f, novel variation rate = %.4f]", fractionMM_dbsnp, fractionMM_novel));
|
||||||
String.format("[dbSNP variation rate = %.4f, novel variation rate = %.4f]", fractionMM_dbsnp, fractionMM_novel) );
|
|
||||||
DBSNP_VALIDATION_CHECK_FREQUENCY *= 2; // Don't annoyingly output the warning message every megabase of a large file
|
DBSNP_VALIDATION_CHECK_FREQUENCY *= 2; // Don't annoyingly output the warning message every megabase of a large file
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -518,47 +525,50 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
return counter;
|
return counter;
|
||||||
}
|
}
|
||||||
|
|
||||||
public CountedData treeReduce( CountedData sum1, CountedData sum2 ) {
|
public CountedData treeReduce(CountedData sum1, CountedData sum2) {
|
||||||
return validatingDbsnpMismatchRate(sum1.add(sum2));
|
return validatingDbsnpMismatchRate(sum1.add(sum2));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Write out the full data hashmap to disk in CSV format
|
* Write out the full data hashmap to disk in CSV format
|
||||||
|
*
|
||||||
* @param sum The CountedData to write out to RECAL_FILE
|
* @param sum The CountedData to write out to RECAL_FILE
|
||||||
*/
|
*/
|
||||||
public void onTraversalDone( CountedData sum ) {
|
public void onTraversalDone(CountedData sum) {
|
||||||
logger.info( "Writing raw recalibration data..." );
|
logger.info("Writing raw recalibration data...");
|
||||||
if( sum.countedBases == 0L ) {
|
if (sum.countedBases == 0L) {
|
||||||
throw new UserException.BadInput("Could not find any usable data in the input BAM file(s).");
|
throw new UserException.BadInput("Could not find any usable data in the input BAM file(s).");
|
||||||
}
|
}
|
||||||
outputToCSV( sum, RECAL_FILE );
|
outputToCSV(sum, RECAL_FILE);
|
||||||
logger.info( "...done!" );
|
logger.info("...done!");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For each entry (key-value pair) in the data hashmap output the Covariate's values as well as the RecalDatum's data in CSV format
|
* For each entry (key-value pair) in the data hashmap output the Covariate's values as well as the RecalDatum's data in CSV format
|
||||||
|
*
|
||||||
* @param recalTableStream The PrintStream to write out to
|
* @param recalTableStream The PrintStream to write out to
|
||||||
*/
|
*/
|
||||||
private void outputToCSV( CountedData sum, final PrintStream recalTableStream ) {
|
private void outputToCSV(CountedData sum, final PrintStream recalTableStream) {
|
||||||
recalTableStream.printf("# Counted Sites %d%n", sum.countedSites);
|
recalTableStream.printf("# Counted Sites %d%n", sum.countedSites);
|
||||||
recalTableStream.printf("# Counted Bases %d%n", sum.countedBases);
|
recalTableStream.printf("# Counted Bases %d%n", sum.countedBases);
|
||||||
recalTableStream.printf("# Skipped Sites %d%n", sum.skippedSites);
|
recalTableStream.printf("# Skipped Sites %d%n", sum.skippedSites);
|
||||||
recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double)sum.countedSites / sum.skippedSites);
|
recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double) sum.countedSites / sum.skippedSites);
|
||||||
|
|
||||||
if( sum.solidInsertedReferenceBases != 0 ) {
|
if (sum.solidInsertedReferenceBases != 0) {
|
||||||
recalTableStream.printf("# Fraction SOLiD inserted reference 1 / %.0f bases%n", (double) sum.countedBases / sum.solidInsertedReferenceBases);
|
recalTableStream.printf("# Fraction SOLiD inserted reference 1 / %.0f bases%n", (double) sum.countedBases / sum.solidInsertedReferenceBases);
|
||||||
recalTableStream.printf("# Fraction other color space inconsistencies 1 / %.0f bases%n", (double) sum.countedBases / sum.otherColorSpaceInconsistency);
|
recalTableStream.printf("# Fraction other color space inconsistencies 1 / %.0f bases%n", (double) sum.countedBases / sum.otherColorSpaceInconsistency);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Output header saying which covariates were used and in what order
|
// Output header saying which covariates were used and in what order
|
||||||
for( Covariate cov : requestedCovariates ) {
|
for (Covariate cov : requestedCovariates) {
|
||||||
recalTableStream.print( cov.getClass().getSimpleName().split("Covariate")[0] + "," );
|
recalTableStream.print(cov.getClass().getSimpleName().split("Covariate")[0] + ",");
|
||||||
}
|
}
|
||||||
recalTableStream.println("nObservations,nMismatches,Qempirical");
|
recalTableStream.println("nObservations,nMismatches,Qempirical");
|
||||||
|
|
||||||
if( DONT_SORT_OUTPUT ) {
|
if (DONT_SORT_OUTPUT) {
|
||||||
printMappings(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
|
printMappings(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
printMappingsSorted(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
|
printMappingsSorted(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -566,45 +576,47 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
recalTableStream.println(TableRecalibrationWalker.EOF_MARKER);
|
recalTableStream.println(TableRecalibrationWalker.EOF_MARKER);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void printMappingsSorted( final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
|
private void printMappingsSorted(final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
|
||||||
final ArrayList<Comparable> keyList = new ArrayList<Comparable>();
|
final ArrayList<Comparable> keyList = new ArrayList<Comparable>();
|
||||||
for( Object comp : data.keySet() ) {
|
for (Object comp : data.keySet()) {
|
||||||
keyList.add((Comparable) comp);
|
keyList.add((Comparable) comp);
|
||||||
}
|
}
|
||||||
|
|
||||||
Collections.sort(keyList);
|
Collections.sort(keyList);
|
||||||
|
|
||||||
for( Comparable comp : keyList ) {
|
for (Comparable comp : keyList) {
|
||||||
key[curPos] = comp;
|
key[curPos] = comp;
|
||||||
final Object val = data.get(comp);
|
final Object val = data.get(comp);
|
||||||
if( val instanceof RecalDatumOptimized ) { // We are at the end of the nested hash maps
|
if (val instanceof RecalDatumOptimized) { // We are at the end of the nested hash maps
|
||||||
// For each Covariate in the key
|
// For each Covariate in the key
|
||||||
for( Object compToPrint : key ) {
|
for (Object compToPrint : key) {
|
||||||
// Output the Covariate's value
|
// Output the Covariate's value
|
||||||
recalTableStream.print( compToPrint + "," );
|
recalTableStream.print(compToPrint + ",");
|
||||||
}
|
}
|
||||||
// Output the RecalDatum entry
|
// Output the RecalDatum entry
|
||||||
recalTableStream.println( ((RecalDatumOptimized)val).outputToCSV() );
|
recalTableStream.println(((RecalDatumOptimized) val).outputToCSV());
|
||||||
} else { // Another layer in the nested hash map
|
}
|
||||||
printMappingsSorted( recalTableStream, curPos + 1, key, (Map) val );
|
else { // Another layer in the nested hash map
|
||||||
|
printMappingsSorted(recalTableStream, curPos + 1, key, (Map) val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void printMappings( final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
|
private void printMappings(final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
|
||||||
for( Object comp : data.keySet() ) {
|
for (Object comp : data.keySet()) {
|
||||||
key[curPos] = comp;
|
key[curPos] = comp;
|
||||||
final Object val = data.get(comp);
|
final Object val = data.get(comp);
|
||||||
if( val instanceof RecalDatumOptimized ) { // We are at the end of the nested hash maps
|
if (val instanceof RecalDatumOptimized) { // We are at the end of the nested hash maps
|
||||||
// For each Covariate in the key
|
// For each Covariate in the key
|
||||||
for( Object compToPrint : key ) {
|
for (Object compToPrint : key) {
|
||||||
// Output the Covariate's value
|
// Output the Covariate's value
|
||||||
recalTableStream.print( compToPrint + "," );
|
recalTableStream.print(compToPrint + ",");
|
||||||
}
|
}
|
||||||
// Output the RecalDatum entry
|
// Output the RecalDatum entry
|
||||||
recalTableStream.println( ((RecalDatumOptimized)val).outputToCSV() );
|
recalTableStream.println(((RecalDatumOptimized) val).outputToCSV());
|
||||||
} else { // Another layer in the nested hash map
|
}
|
||||||
printMappings( recalTableStream, curPos + 1, key, (Map) val );
|
else { // Another layer in the nested hash map
|
||||||
|
printMappings(recalTableStream, curPos + 1, key, (Map) val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -256,32 +256,6 @@ public class RecalDataManager {
|
||||||
public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
|
public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
|
||||||
GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord) read).getReadGroup();
|
GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord) read).getReadGroup();
|
||||||
|
|
||||||
// If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments
|
|
||||||
if (readGroup == null) {
|
|
||||||
if (RAC.DEFAULT_READ_GROUP != null && RAC.DEFAULT_PLATFORM != null) {
|
|
||||||
if (!warnUserNullReadGroup && RAC.FORCE_READ_GROUP == null) {
|
|
||||||
Utils.warnUser("The input .bam file contains reads with no read group. " +
|
|
||||||
"Defaulting to read group ID = " + RAC.DEFAULT_READ_GROUP + " and platform = " + RAC.DEFAULT_PLATFORM + ". " +
|
|
||||||
"First observed at read with name = " + read.getReadName());
|
|
||||||
warnUserNullReadGroup = true;
|
|
||||||
}
|
|
||||||
// There is no readGroup so defaulting to these values
|
|
||||||
readGroup = new GATKSAMReadGroupRecord(RAC.DEFAULT_READ_GROUP);
|
|
||||||
readGroup.setPlatform(RAC.DEFAULT_PLATFORM);
|
|
||||||
((GATKSAMRecord) read).setReadGroup(readGroup);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP)) { // Collapse all the read groups into a single common String provided by the user
|
|
||||||
final String oldPlatform = readGroup.getPlatform();
|
|
||||||
readGroup = new GATKSAMReadGroupRecord(RAC.FORCE_READ_GROUP);
|
|
||||||
readGroup.setPlatform(oldPlatform);
|
|
||||||
((GATKSAMRecord) read).setReadGroup(readGroup);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
|
if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
|
||||||
readGroup.setPlatform(RAC.FORCE_PLATFORM);
|
readGroup.setPlatform(RAC.FORCE_PLATFORM);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -43,31 +43,15 @@ public class RecalibrationArgumentCollection {
|
||||||
// Shared Command Line Arguments
|
// Shared Command Line Arguments
|
||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "default_read_group", shortName = "dRG", required = false, doc = "If a read has no read group then default to the provided String.")
|
|
||||||
public String DEFAULT_READ_GROUP = null;
|
|
||||||
@Hidden
|
|
||||||
@Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
|
@Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
|
||||||
public String DEFAULT_PLATFORM = null;
|
public String DEFAULT_PLATFORM = null;
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "force_read_group", shortName = "fRG", required = false, doc = "If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.")
|
|
||||||
public String FORCE_READ_GROUP = null;
|
|
||||||
@Hidden
|
|
||||||
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
|
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
|
||||||
public String FORCE_PLATFORM = null;
|
public String FORCE_PLATFORM = null;
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "window_size_nqs", shortName = "nqs", doc = "The window size used by MinimumNQSCovariate for its calculation", required = false)
|
@Argument(fullName = "window_size_nqs", shortName = "nqs", doc = "The window size used by MinimumNQSCovariate for its calculation", required = false)
|
||||||
public int WINDOW_SIZE = 5;
|
public int WINDOW_SIZE = 5;
|
||||||
|
|
||||||
/**
|
|
||||||
* This window size tells the module in how big of a neighborhood around the current base it should look for the minimum base quality score.
|
|
||||||
*/
|
|
||||||
@Hidden
|
|
||||||
@Argument(fullName = "homopolymer_nback", shortName = "nback", doc = "The number of previous bases to look at in HomopolymerCovariate", required = false)
|
|
||||||
public int HOMOPOLYMER_NBACK = 7;
|
|
||||||
@Hidden
|
|
||||||
@Argument(fullName = "exception_if_no_tile", shortName = "throwTileException", doc = "If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required = false)
|
|
||||||
public boolean EXCEPTION_IF_NO_TILE = false;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
|
* CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
|
||||||
* reads which have had the reference inserted because of color space inconsistencies.
|
* reads which have had the reference inserted because of color space inconsistencies.
|
||||||
|
|
@ -89,4 +73,10 @@ public class RecalibrationArgumentCollection {
|
||||||
@Argument(fullName = "context_size", shortName = "cs", doc = "size of the k-mer context to be used", required = false)
|
@Argument(fullName = "context_size", shortName = "cs", doc = "size of the k-mer context to be used", required = false)
|
||||||
public int CONTEXT_SIZE = 8;
|
public int CONTEXT_SIZE = 8;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This window size tells the module in how big of a neighborhood around the current base it should look for the minimum base quality score.
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "homopolymer_nback", shortName = "nback", doc = "The number of previous bases to look at in HomopolymerCovariate", required = false)
|
||||||
|
public int HOMOPOLYMER_NBACK = 7;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -86,12 +86,12 @@ import java.util.regex.Pattern;
|
||||||
* -o my_reads.recal.bam \
|
* -o my_reads.recal.bam \
|
||||||
* -recalFile my_reads.recal_data.csv
|
* -recalFile my_reads.recal_data.csv
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
|
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
|
||||||
@WalkerName("TableRecalibration")
|
@WalkerName("TableRecalibration")
|
||||||
@Requires({ DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES }) // This walker requires -I input.bam, it also requires -R reference.fasta
|
@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||||
|
// This walker requires -I input.bam, it also requires -R reference.fasta
|
||||||
public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||||
|
|
||||||
public static final String PROGRAM_RECORD_NAME = "GATK TableRecalibration";
|
public static final String PROGRAM_RECORD_NAME = "GATK TableRecalibration";
|
||||||
|
|
@ -99,7 +99,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Shared Arguments
|
// Shared Arguments
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
@ArgumentCollection private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
@ArgumentCollection
|
||||||
|
private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Command Line Arguments
|
// Command Line Arguments
|
||||||
|
|
@ -110,12 +111,12 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
|
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
|
||||||
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
|
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
|
||||||
*/
|
*/
|
||||||
@Input(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the input covariates table recalibration .csv file")
|
@Input(fullName = "recal_file", shortName = "recalFile", required = true, doc = "Filename for the input covariates table recalibration .csv file")
|
||||||
public File RECAL_FILE = null;
|
public File RECAL_FILE = null;
|
||||||
/**
|
/**
|
||||||
* A new bam file in which the quality scores in each read have been recalibrated. The alignment of the reads is left untouched.
|
* A new bam file in which the quality scores in each read have been recalibrated. The alignment of the reads is left untouched.
|
||||||
*/
|
*/
|
||||||
@Output(doc="The output recalibrated BAM file", required=true)
|
@Output(doc = "The output recalibrated BAM file", required = true)
|
||||||
private StingSAMFileWriter OUTPUT_BAM = null;
|
private StingSAMFileWriter OUTPUT_BAM = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -126,7 +127,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
* your Q2 and Q3 bins can be elevated to Q8 or Q10, leading to issues downstream. With the default value of 5, all Q0-Q4 bases
|
* your Q2 and Q3 bins can be elevated to Q8 or Q10, leading to issues downstream. With the default value of 5, all Q0-Q4 bases
|
||||||
* are unmodified during recalibration, so they don't get inappropriately evaluated.
|
* are unmodified during recalibration, so they don't get inappropriately evaluated.
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="preserve_qscores_less_than", shortName="pQ", doc="Bases with quality scores less than this threshold won't be recalibrated. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
|
@Argument(fullName = "preserve_qscores_less_than", shortName = "pQ", doc = "Bases with quality scores less than this threshold won't be recalibrated. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required = false)
|
||||||
private int PRESERVE_QSCORES_LESS_THAN = 5;
|
private int PRESERVE_QSCORES_LESS_THAN = 5;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -135,37 +136,36 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
* argument which sets how many unobserved counts to add to every bin. Use --smoothing 0 to turn off all smoothing or, for example,
|
* argument which sets how many unobserved counts to add to every bin. Use --smoothing 0 to turn off all smoothing or, for example,
|
||||||
* --smoothing 15 for a large amount of smoothing.
|
* --smoothing 15 for a large amount of smoothing.
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
|
@Argument(fullName = "smoothing", shortName = "sm", required = false, doc = "Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
|
||||||
private int SMOOTHING = 1;
|
private int SMOOTHING = 1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Combinations of covariates in which there are zero mismatches technically have infinite quality. We get around this situation
|
* Combinations of covariates in which there are zero mismatches technically have infinite quality. We get around this situation
|
||||||
* by capping at the specified value. We've found that Q40 is too low when using a more completely database of known variation like dbSNP build 132 or later.
|
* by capping at the specified value. We've found that Q40 is too low when using a more completely database of known variation like dbSNP build 132 or later.
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores")
|
@Argument(fullName = "max_quality_score", shortName = "maxQ", required = false, doc = "The integer value at which to cap the quality scores")
|
||||||
private int MAX_QUALITY_SCORE = 50;
|
private int MAX_QUALITY_SCORE = 50;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* By default TableRecalibration emits the OQ field -- so you can go back and look at the original quality scores, rerun
|
* By default TableRecalibration emits the OQ field -- so you can go back and look at the original quality scores, rerun
|
||||||
* the system using the OQ flags, etc, on the output BAM files; to turn off emission of the OQ field use this flag.
|
* the system using the OQ flags, etc, on the output BAM files; to turn off emission of the OQ field use this flag.
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="doNotWriteOriginalQuals", shortName="noOQs", required=false, doc="If true, we will not write the original quality (OQ) tag for each read")
|
@Argument(fullName = "doNotWriteOriginalQuals", shortName = "noOQs", required = false, doc = "If true, we will not write the original quality (OQ) tag for each read")
|
||||||
private boolean DO_NOT_WRITE_OQ = false;
|
private boolean DO_NOT_WRITE_OQ = false;
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Debugging-only Arguments
|
// Debugging-only Arguments
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the recalibrated bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
@Argument(fullName = "no_pg_tag", shortName = "noPG", required = false, doc = "Don't output the usual PG tag in the recalibrated bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
||||||
private boolean NO_PG_TAG = false;
|
private boolean NO_PG_TAG = false;
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName="fail_with_no_eof_marker", shortName="requireEOF", required=false, doc="If no EOF marker is present in the covariates file, exit the program with an exception.")
|
@Argument(fullName = "fail_with_no_eof_marker", shortName = "requireEOF", required = false, doc = "If no EOF marker is present in the covariates file, exit the program with an exception.")
|
||||||
private boolean REQUIRE_EOF = false;
|
private boolean REQUIRE_EOF = false;
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName="skipUQUpdate", shortName="skipUQUpdate", required=false, doc="If true, we will skip the UQ updating step for each read, speeding up the calculations")
|
@Argument(fullName = "skipUQUpdate", shortName = "skipUQUpdate", required = false, doc = "If true, we will skip the UQ updating step for each read, speeding up the calculations")
|
||||||
private boolean skipUQUpdate = false;
|
private boolean skipUQUpdate = false;
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Private Member Variables
|
// Private Member Variables
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
|
|
@ -195,8 +195,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
*/
|
*/
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
|
||||||
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
|
if (RAC.FORCE_PLATFORM != null) {
|
||||||
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
|
RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
|
||||||
|
}
|
||||||
|
|
||||||
// Get a list of all available covariates
|
// Get a list of all available covariates
|
||||||
final List<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();
|
final List<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();
|
||||||
|
|
@ -205,31 +206,33 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
boolean foundAllCovariates = false;
|
boolean foundAllCovariates = false;
|
||||||
|
|
||||||
// Read in the data from the csv file and populate the data map and covariates list
|
// Read in the data from the csv file and populate the data map and covariates list
|
||||||
logger.info( "Reading in the data from input csv file..." );
|
logger.info("Reading in the data from input csv file...");
|
||||||
|
|
||||||
boolean sawEOF = false;
|
boolean sawEOF = false;
|
||||||
try {
|
try {
|
||||||
for ( String line : new XReadLines(RECAL_FILE) ) {
|
for (String line : new XReadLines(RECAL_FILE)) {
|
||||||
lineNumber++;
|
lineNumber++;
|
||||||
if ( EOF_MARKER.equals(line) ) {
|
if (EOF_MARKER.equals(line)) {
|
||||||
sawEOF = true;
|
sawEOF = true;
|
||||||
} else if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() ) {
|
}
|
||||||
|
else if (COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches()) {
|
||||||
; // Skip over the comment lines, (which start with '#')
|
; // Skip over the comment lines, (which start with '#')
|
||||||
}
|
}
|
||||||
// Read in the covariates that were used from the input file
|
// Read in the covariates that were used from the input file
|
||||||
else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data
|
else if (COVARIATE_PATTERN.matcher(line).matches()) { // The line string is either specifying a covariate or is giving csv data
|
||||||
if( foundAllCovariates ) {
|
if (foundAllCovariates) {
|
||||||
throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
|
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE);
|
||||||
} else { // Found the covariate list in input file, loop through all of them and instantiate them
|
}
|
||||||
|
else { // Found the covariate list in input file, loop through all of them and instantiate them
|
||||||
String[] vals = line.split(",");
|
String[] vals = line.split(",");
|
||||||
for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
|
for (int iii = 0; iii < vals.length - 3; iii++) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
|
||||||
boolean foundClass = false;
|
boolean foundClass = false;
|
||||||
for( Class<?> covClass : classes ) {
|
for (Class<?> covClass : classes) {
|
||||||
if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
|
if ((vals[iii] + "Covariate").equalsIgnoreCase(covClass.getSimpleName())) {
|
||||||
foundClass = true;
|
foundClass = true;
|
||||||
try {
|
try {
|
||||||
Covariate covariate = (Covariate)covClass.newInstance();
|
Covariate covariate = (Covariate) covClass.newInstance();
|
||||||
requestedCovariates.add( covariate );
|
requestedCovariates.add(covariate);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new DynamicClassResolutionException(covClass, e);
|
throw new DynamicClassResolutionException(covClass, e);
|
||||||
}
|
}
|
||||||
|
|
@ -237,107 +240,110 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !foundClass ) {
|
if (!foundClass) {
|
||||||
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." );
|
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else { // Found a line of data
|
}
|
||||||
if( !foundAllCovariates ) {
|
else { // Found a line of data
|
||||||
|
if (!foundAllCovariates) {
|
||||||
foundAllCovariates = true;
|
foundAllCovariates = true;
|
||||||
|
|
||||||
// At this point all the covariates should have been found and initialized
|
// At this point all the covariates should have been found and initialized
|
||||||
if( requestedCovariates.size() < 2 ) {
|
if (requestedCovariates.size() < 2) {
|
||||||
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE );
|
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE);
|
||||||
}
|
}
|
||||||
|
|
||||||
final boolean createCollapsedTables = true;
|
final boolean createCollapsedTables = true;
|
||||||
|
|
||||||
// Initialize any covariate member variables using the shared argument collection
|
// Initialize any covariate member variables using the shared argument collection
|
||||||
for( Covariate cov : requestedCovariates ) {
|
for (Covariate cov : requestedCovariates) {
|
||||||
cov.initialize( RAC );
|
cov.initialize(RAC);
|
||||||
}
|
}
|
||||||
// Initialize the data hashMaps
|
// Initialize the data hashMaps
|
||||||
dataManager = new RecalDataManager( createCollapsedTables, requestedCovariates.size() );
|
dataManager = new RecalDataManager(createCollapsedTables, requestedCovariates.size());
|
||||||
|
|
||||||
}
|
}
|
||||||
addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
|
addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch ( FileNotFoundException e ) {
|
} catch (FileNotFoundException e) {
|
||||||
throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
|
throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
|
||||||
} catch ( NumberFormatException e ) {
|
} catch (NumberFormatException e) {
|
||||||
throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
|
throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
|
||||||
}
|
}
|
||||||
logger.info( "...done!" );
|
logger.info("...done!");
|
||||||
|
|
||||||
if ( !sawEOF ) {
|
if (!sawEOF) {
|
||||||
final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
|
final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
|
||||||
if ( REQUIRE_EOF )
|
if (REQUIRE_EOF)
|
||||||
throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
|
throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
|
||||||
logger.warn(errorMessage);
|
logger.warn(errorMessage);
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info( "The covariates being used here: " );
|
logger.info("The covariates being used here: ");
|
||||||
for( Covariate cov : requestedCovariates ) {
|
for (Covariate cov : requestedCovariates) {
|
||||||
logger.info( "\t" + cov.getClass().getSimpleName() );
|
logger.info("\t" + cov.getClass().getSimpleName());
|
||||||
}
|
}
|
||||||
|
|
||||||
if( dataManager == null ) {
|
if (dataManager == null) {
|
||||||
throw new UserException.MalformedFile(RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?");
|
throw new UserException.MalformedFile(RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create the tables of empirical quality scores that will be used in the sequential calculation
|
// Create the tables of empirical quality scores that will be used in the sequential calculation
|
||||||
logger.info( "Generating tables of empirical qualities for use in sequential calculation..." );
|
logger.info("Generating tables of empirical qualities for use in sequential calculation...");
|
||||||
dataManager.generateEmpiricalQualities( SMOOTHING, MAX_QUALITY_SCORE );
|
dataManager.generateEmpiricalQualities(SMOOTHING, MAX_QUALITY_SCORE);
|
||||||
logger.info( "...done!" );
|
logger.info("...done!");
|
||||||
|
|
||||||
// Take the header of the input SAM file and tweak it by adding in a new programRecord with the version number and list of covariates that were used
|
// Take the header of the input SAM file and tweak it by adding in a new programRecord with the version number and list of covariates that were used
|
||||||
final SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
|
final SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
|
||||||
if( !NO_PG_TAG ) {
|
if (!NO_PG_TAG) {
|
||||||
final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
|
final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
|
||||||
final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
|
final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
|
||||||
try {
|
try {
|
||||||
final String version = headerInfo.getString("org.broadinstitute.sting.gatk.version");
|
final String version = headerInfo.getString("org.broadinstitute.sting.gatk.version");
|
||||||
programRecord.setProgramVersion(version);
|
programRecord.setProgramVersion(version);
|
||||||
} catch (MissingResourceException e) {}
|
} catch (MissingResourceException e) {
|
||||||
|
}
|
||||||
|
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
sb.append(getToolkit().createApproximateCommandLineArgumentString(getToolkit(), this));
|
sb.append(getToolkit().createApproximateCommandLineArgumentString(getToolkit(), this));
|
||||||
sb.append(" Covariates=[");
|
sb.append(" Covariates=[");
|
||||||
for( Covariate cov : requestedCovariates ) {
|
for (Covariate cov : requestedCovariates) {
|
||||||
sb.append(cov.getClass().getSimpleName());
|
sb.append(cov.getClass().getSimpleName());
|
||||||
sb.append(", ");
|
sb.append(", ");
|
||||||
}
|
}
|
||||||
sb.setCharAt(sb.length()-2, ']');
|
sb.setCharAt(sb.length() - 2, ']');
|
||||||
sb.setCharAt(sb.length()-1, ' ');
|
sb.setCharAt(sb.length() - 1, ' ');
|
||||||
programRecord.setCommandLine(sb.toString());
|
programRecord.setCommandLine(sb.toString());
|
||||||
|
|
||||||
List<SAMProgramRecord> oldRecords = header.getProgramRecords();
|
List<SAMProgramRecord> oldRecords = header.getProgramRecords();
|
||||||
List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
|
List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size() + 1);
|
||||||
for ( SAMProgramRecord record : oldRecords ) {
|
for (SAMProgramRecord record : oldRecords) {
|
||||||
if ( !record.getId().startsWith(PROGRAM_RECORD_NAME) )
|
if (!record.getId().startsWith(PROGRAM_RECORD_NAME))
|
||||||
newRecords.add(record);
|
newRecords.add(record);
|
||||||
}
|
}
|
||||||
newRecords.add(programRecord);
|
newRecords.add(programRecord);
|
||||||
header.setProgramRecords(newRecords);
|
header.setProgramRecords(newRecords);
|
||||||
|
|
||||||
// Write out the new header
|
// Write out the new header
|
||||||
OUTPUT_BAM.writeHeader( header );
|
OUTPUT_BAM.writeHeader(header);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
|
* For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
|
||||||
|
*
|
||||||
* @param line A line of CSV data read from the recalibration table data file
|
* @param line A line of CSV data read from the recalibration table data file
|
||||||
*/
|
*/
|
||||||
private void addCSVData(final File file, final String line) {
|
private void addCSVData(final File file, final String line) {
|
||||||
final String[] vals = line.split(",");
|
final String[] vals = line.split(",");
|
||||||
|
|
||||||
// Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
|
// Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
|
||||||
if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
|
if (vals.length != requestedCovariates.size() + 3) { // +3 because of nObservations, nMismatch, and Qempirical
|
||||||
throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
|
throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
|
||||||
" --Perhaps the read group string contains a comma and isn't being parsed correctly.");
|
" --Perhaps the read group string contains a comma and isn't being parsed correctly.");
|
||||||
}
|
}
|
||||||
|
|
@ -345,15 +351,15 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
final Object[] key = new Object[requestedCovariates.size()];
|
final Object[] key = new Object[requestedCovariates.size()];
|
||||||
Covariate cov;
|
Covariate cov;
|
||||||
int iii;
|
int iii;
|
||||||
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
|
for (iii = 0; iii < requestedCovariates.size(); iii++) {
|
||||||
cov = requestedCovariates.get( iii );
|
cov = requestedCovariates.get(iii);
|
||||||
key[iii] = cov.getValue( vals[iii] );
|
key[iii] = cov.getValue(vals[iii]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a new datum using the number of observations, number of mismatches, and reported quality score
|
// Create a new datum using the number of observations, number of mismatches, and reported quality score
|
||||||
final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
|
final RecalDatum datum = new RecalDatum(Long.parseLong(vals[iii]), Long.parseLong(vals[iii + 1]), Double.parseDouble(vals[1]), 0.0);
|
||||||
// Add that datum to all the collapsed tables which will be used in the sequential calculation
|
// Add that datum to all the collapsed tables which will be used in the sequential calculation
|
||||||
dataManager.addToAllTables( key, datum, PRESERVE_QSCORES_LESS_THAN );
|
dataManager.addToAllTables(key, datum, PRESERVE_QSCORES_LESS_THAN);
|
||||||
}
|
}
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -366,64 +372,63 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
* For each base in the read calculate a new recalibrated quality score and replace the quality scores in the read
|
* For each base in the read calculate a new recalibrated quality score and replace the quality scores in the read
|
||||||
*
|
*
|
||||||
* @param refBases References bases over the length of the read
|
* @param refBases References bases over the length of the read
|
||||||
* @param read The read to be recalibrated
|
* @param read The read to be recalibrated
|
||||||
* @return The read with quality scores replaced
|
* @return The read with quality scores replaced
|
||||||
*/
|
*/
|
||||||
public SAMRecord map( ReferenceContext refBases, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
public SAMRecord map(ReferenceContext refBases, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||||
|
|
||||||
if( read.getReadLength() == 0 ) { // Some reads have '*' as the SEQ field and samtools returns length zero. We don't touch these reads.
|
if (read.getReadLength() == 0) { // Some reads have '*' as the SEQ field and samtools returns length zero. We don't touch these reads.
|
||||||
return read;
|
return read;
|
||||||
}
|
}
|
||||||
|
|
||||||
RecalDataManager.parseSAMRecord( read, RAC );
|
RecalDataManager.parseSAMRecord(read, RAC);
|
||||||
|
|
||||||
byte[] originalQuals = read.getBaseQualities();
|
byte[] originalQuals = read.getBaseQualities();
|
||||||
final byte[] recalQuals = originalQuals.clone();
|
final byte[] recalQuals = originalQuals.clone();
|
||||||
|
|
||||||
final String platform = read.getReadGroup().getPlatform();
|
final String platform = read.getReadGroup().getPlatform();
|
||||||
if( platform.toUpperCase().contains("SOLID") && !(RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING) ) {
|
if (platform.toUpperCase().contains("SOLID") && !(RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING)) {
|
||||||
if( !(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) ) {
|
if (!(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION)) {
|
||||||
final boolean badColor = RecalDataManager.checkNoCallColorSpace( read );
|
final boolean badColor = RecalDataManager.checkNoCallColorSpace(read);
|
||||||
if( badColor ) {
|
if (badColor) {
|
||||||
numReadsWithMalformedColorSpace++;
|
numReadsWithMalformedColorSpace++;
|
||||||
if( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED ) {
|
if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED) {
|
||||||
return read; // can't recalibrate a SOLiD read with no calls in the color space, and the user wants to skip over them
|
return read; // can't recalibrate a SOLiD read with no calls in the color space, and the user wants to skip over them
|
||||||
} else if ( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ ) {
|
}
|
||||||
|
else if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ) {
|
||||||
read.setReadFailsVendorQualityCheckFlag(true);
|
read.setReadFailsVendorQualityCheckFlag(true);
|
||||||
return read;
|
return read;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, refBases == null ? null : refBases.getBases() );
|
originalQuals = RecalDataManager.calcColorSpace(read, originalQuals, RAC.SOLID_RECAL_MODE, refBases == null ? null : refBases.getBases());
|
||||||
}
|
}
|
||||||
|
|
||||||
//compute all covariate values for this read
|
//compute all covariate values for this read
|
||||||
final Comparable[][] covariateValues_offset_x_covar =
|
final Comparable[][] covariateValues_offset_x_covar = RecalDataManager.computeCovariates(read, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION);
|
||||||
RecalDataManager.computeCovariates(read, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION);
|
|
||||||
|
|
||||||
// For each base in the read
|
// For each base in the read
|
||||||
for( int offset = 0; offset < read.getReadLength(); offset++ ) {
|
for (int offset = 0; offset < read.getReadLength(); offset++) {
|
||||||
|
|
||||||
final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
|
final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
|
||||||
|
|
||||||
Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
|
Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
|
||||||
if(qualityScore == null)
|
if (qualityScore == null) {
|
||||||
{
|
qualityScore = performSequentialQualityCalculation(fullCovariateKey);
|
||||||
qualityScore = performSequentialQualityCalculation( fullCovariateKey );
|
|
||||||
qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
|
qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
|
||||||
}
|
}
|
||||||
|
|
||||||
recalQuals[offset] = qualityScore;
|
recalQuals[offset] = qualityScore;
|
||||||
}
|
}
|
||||||
|
|
||||||
preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
|
preserveQScores(originalQuals, recalQuals); // Overwrite the work done if original quality score is too low
|
||||||
|
|
||||||
read.setBaseQualities( recalQuals ); // Overwrite old qualities with new recalibrated qualities
|
read.setBaseQualities(recalQuals); // Overwrite old qualities with new recalibrated qualities
|
||||||
if ( !DO_NOT_WRITE_OQ && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) == null ) { // Save the old qualities if the tag isn't already taken in the read
|
if (!DO_NOT_WRITE_OQ && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) == null) { // Save the old qualities if the tag isn't already taken in the read
|
||||||
read.setAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, SAMUtils.phredToFastq(originalQuals));
|
read.setAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, SAMUtils.phredToFastq(originalQuals));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (! skipUQUpdate && refBases != null && read.getAttribute(SAMTag.UQ.name()) != null) {
|
if (!skipUQUpdate && refBases != null && read.getAttribute(SAMTag.UQ.name()) != null) {
|
||||||
read.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(read, refBases.getBases(), read.getAlignmentStart() - 1, false));
|
read.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(read, refBases.getBases(), read.getAlignmentStart() - 1, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -440,27 +445,28 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
*
|
*
|
||||||
* Given the full recalibration table, we perform the following preprocessing steps:
|
* Given the full recalibration table, we perform the following preprocessing steps:
|
||||||
*
|
*
|
||||||
* - calculate the global quality score shift across all data [DeltaQ]
|
* - calculate the global quality score shift across all data [DeltaQ]
|
||||||
* - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
|
* - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
|
||||||
* -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
|
* -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
|
||||||
* - The final shift equation is:
|
* - The final shift equation is:
|
||||||
|
*
|
||||||
|
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
|
||||||
*
|
*
|
||||||
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
|
|
||||||
* @param key The list of Comparables that were calculated from the covariates
|
* @param key The list of Comparables that were calculated from the covariates
|
||||||
* @return A recalibrated quality score as a byte
|
* @return A recalibrated quality score as a byte
|
||||||
*/
|
*/
|
||||||
private byte performSequentialQualityCalculation( final Object... key ) {
|
private byte performSequentialQualityCalculation(final Object... key) {
|
||||||
|
|
||||||
final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
|
final byte qualFromRead = (byte) Integer.parseInt(key[1].toString());
|
||||||
final Object[] readGroupCollapsedKey = new Object[1];
|
final Object[] readGroupCollapsedKey = new Object[1];
|
||||||
final Object[] qualityScoreCollapsedKey = new Object[2];
|
final Object[] qualityScoreCollapsedKey = new Object[2];
|
||||||
final Object[] covariateCollapsedKey = new Object[3];
|
final Object[] covariateCollapsedKey = new Object[3];
|
||||||
|
|
||||||
// The global quality shift (over the read group only)
|
// The global quality shift (over the read group only)
|
||||||
readGroupCollapsedKey[0] = key[0];
|
readGroupCollapsedKey[0] = key[0];
|
||||||
final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey ));
|
final RecalDatum globalRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(0).get(readGroupCollapsedKey));
|
||||||
double globalDeltaQ = 0.0;
|
double globalDeltaQ = 0.0;
|
||||||
if( globalRecalDatum != null ) {
|
if (globalRecalDatum != null) {
|
||||||
final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
|
final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
|
||||||
final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
|
final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
|
||||||
globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
|
globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
|
||||||
|
|
@ -469,9 +475,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
// The shift in quality between reported and empirical
|
// The shift in quality between reported and empirical
|
||||||
qualityScoreCollapsedKey[0] = key[0];
|
qualityScoreCollapsedKey[0] = key[0];
|
||||||
qualityScoreCollapsedKey[1] = key[1];
|
qualityScoreCollapsedKey[1] = key[1];
|
||||||
final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey ));
|
final RecalDatum qReportedRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(1).get(qualityScoreCollapsedKey));
|
||||||
double deltaQReported = 0.0;
|
double deltaQReported = 0.0;
|
||||||
if( qReportedRecalDatum != null ) {
|
if (qReportedRecalDatum != null) {
|
||||||
final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
|
final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
|
||||||
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
||||||
}
|
}
|
||||||
|
|
@ -481,17 +487,17 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
double deltaQCovariateEmpirical;
|
double deltaQCovariateEmpirical;
|
||||||
covariateCollapsedKey[0] = key[0];
|
covariateCollapsedKey[0] = key[0];
|
||||||
covariateCollapsedKey[1] = key[1];
|
covariateCollapsedKey[1] = key[1];
|
||||||
for( int iii = 2; iii < key.length; iii++ ) {
|
for (int iii = 2; iii < key.length; iii++) {
|
||||||
covariateCollapsedKey[2] = key[iii]; // The given covariate
|
covariateCollapsedKey[2] = key[iii]; // The given covariate
|
||||||
final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( covariateCollapsedKey ));
|
final RecalDatum covariateRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(iii).get(covariateCollapsedKey));
|
||||||
if( covariateRecalDatum != null ) {
|
if (covariateRecalDatum != null) {
|
||||||
deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
|
deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
|
||||||
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
|
deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
||||||
return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
|
return QualityUtils.boundQual((int) Math.round(newQuality), (byte) MAX_QUALITY_SCORE);
|
||||||
|
|
||||||
// Verbose printouts used to validate with old recalibrator
|
// Verbose printouts used to validate with old recalibrator
|
||||||
//if(key.contains(null)) {
|
//if(key.contains(null)) {
|
||||||
|
|
@ -508,12 +514,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loop over the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold
|
* Loop over the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold
|
||||||
|
*
|
||||||
* @param originalQuals The list of original base quality scores
|
* @param originalQuals The list of original base quality scores
|
||||||
* @param recalQuals A list of the new recalibrated quality scores
|
* @param recalQuals A list of the new recalibrated quality scores
|
||||||
*/
|
*/
|
||||||
private void preserveQScores( final byte[] originalQuals, final byte[] recalQuals ) {
|
private void preserveQScores(final byte[] originalQuals, final byte[] recalQuals) {
|
||||||
for( int iii = 0; iii < recalQuals.length; iii++ ) {
|
for (int iii = 0; iii < recalQuals.length; iii++) {
|
||||||
if( originalQuals[iii] < PRESERVE_QSCORES_LESS_THAN ) {
|
if (originalQuals[iii] < PRESERVE_QSCORES_LESS_THAN) {
|
||||||
recalQuals[iii] = originalQuals[iii];
|
recalQuals[iii] = originalQuals[iii];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -527,6 +534,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start the reduce with a handle to the output bam file
|
* Start the reduce with a handle to the output bam file
|
||||||
|
*
|
||||||
* @return A FileWriter pointing to a new bam file
|
* @return A FileWriter pointing to a new bam file
|
||||||
*/
|
*/
|
||||||
public SAMFileWriter reduceInit() {
|
public SAMFileWriter reduceInit() {
|
||||||
|
|
@ -535,12 +543,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Output each read to disk
|
* Output each read to disk
|
||||||
* @param read The read to output
|
*
|
||||||
|
* @param read The read to output
|
||||||
* @param output The FileWriter to write the read to
|
* @param output The FileWriter to write the read to
|
||||||
* @return The FileWriter
|
* @return The FileWriter
|
||||||
*/
|
*/
|
||||||
public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) {
|
public SAMFileWriter reduce(SAMRecord read, SAMFileWriter output) {
|
||||||
if( output != null ) {
|
if (output != null) {
|
||||||
output.addAlignment(read);
|
output.addAlignment(read);
|
||||||
}
|
}
|
||||||
return output;
|
return output;
|
||||||
|
|
@ -548,20 +557,22 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Do nothing
|
* Do nothing
|
||||||
|
*
|
||||||
* @param output The SAMFileWriter that outputs the bam file
|
* @param output The SAMFileWriter that outputs the bam file
|
||||||
*/
|
*/
|
||||||
public void onTraversalDone(SAMFileWriter output) {
|
public void onTraversalDone(SAMFileWriter output) {
|
||||||
if( numReadsWithMalformedColorSpace != 0 ) {
|
if (numReadsWithMalformedColorSpace != 0) {
|
||||||
if( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED ) {
|
if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED) {
|
||||||
Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
|
Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
|
||||||
"because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
|
"because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
|
||||||
"for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
|
"for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
|
||||||
"These reads remain in the output bam file but haven't been corrected for reference bias. !!! USE AT YOUR OWN RISK !!!");
|
"These reads remain in the output bam file but haven't been corrected for reference bias. !!! USE AT YOUR OWN RISK !!!");
|
||||||
} else if ( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ ) {
|
}
|
||||||
|
else if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ) {
|
||||||
Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
|
Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
|
||||||
"because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
|
"because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
|
||||||
"for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
|
"for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
|
||||||
"These reads were completely removed from the output bam file.");
|
"These reads were completely removed from the output bam file.");
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue