The refactored CountCovariates now hashes the read object into a HashMap which holds all the properties the covariates pull out of the read over and over again such as read group string, bases string and its complement string, quality scores, etc. This results in a big speed up. CountCovariatesRefactored is now just slightly slower than CountCovariates (perhaps 1.07x according to my latest time trial). Thanks to Alec for suggesting IdentityHashMap. CycleCovariate now warns the user that is is defaulting to the Solexa definition of cycle when the platform string pulled out of the read is unrecognized instead of halting with an Exception.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2108 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2009-11-21 20:38:17 +00:00
parent 27122f7f97
commit 98f921fe24
12 changed files with 181 additions and 125 deletions

View File

@ -38,7 +38,7 @@ import net.sf.samtools.SAMRecord;
*/
public interface Covariate {
public Comparable getValue(SAMRecord read, int offset, String readGroup, String platform, byte[] quals, byte[] bases); // used to pick out the value from attributes of the read
public Comparable getValue(ReadHashDatum readDatum, int offset); // used to pick out the value from attributes of the read
public Comparable getValue(String str); // used to get value from input file
public int estimatedNumberOfBins(); // used to estimate the amount space required for the HashMap
}

View File

@ -88,8 +88,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
private ArrayList<Covariate> requestedCovariates; // A list to hold the covariate objects that were requested
//private HashMap<SAMRecord, String> readGroupHashMap; // A hash map that hashes the read object itself into the read group name
// This is done for optimization purposes because pulling the read group out of the SAMRecord is expensive
private IdentityHashMap<SAMRecord, ReadHashDatum> readDatumHashMap; // A hash map that hashes the read object itself into properties commonly pulled out of the read. Done for optimization purposes.
private int sizeOfReadDatumHashMap = 0;
private long countedSites = 0; // Number of loci used in the calculations, used for reporting in the output file
private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file
private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file
@ -204,7 +205,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
if(estimatedCapacity > 300 * 40 * 200 * 16) { estimatedCapacity = 300 * 40 * 200 * 16; } // Don't want to crash with out of heap space exception
dataManager = new RecalDataManager( estimatedCapacity );
//readGroupHashMap = new HashMap<SAMRecord, String>( 50000000, 0.97f );
readDatumHashMap = new IdentityHashMap<SAMRecord, ReadHashDatum>();
}
@ -250,73 +251,93 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
byte prevBase;
String platform;
byte[] colorSpaceQuals;
ReadHashDatum readDatum;
boolean isNegStrand;
int mappingQuality;
int length;
final int numReads = reads.size();
// For each read at this locus
for( int iii = 0; iii < numReads; iii++ ) {
read = reads.get(iii);
offset = offsets.get(iii); // offset is zero based so quals[offset] and bases[offset] is correct
//readGroupId = readGroupHashMap.get( read );
//if( readGroupId == null ) { // read is not in the hashmap so add it
// readGroupId = read.getReadGroup().getReadGroupId();
// readGroupHashMap.put( read, readGroupId );
//}
if( read.getMappingQuality() > 0 ) { // BUGBUG: turn this into a read filter after passing the old integration tests
readDatum = readDatumHashMap.get( read );
if( readDatum == null ) {
offset = offsets.get(iii); // offset is zero based so quals[offset] and bases[offset] is correct
// If the HashMap of read objects has grown too large then throw out the (mostly stale) reads
if( sizeOfReadDatumHashMap > 100000 ) { //BUGBUG: Can I make this number larger?
readDatumHashMap.clear();
sizeOfReadDatumHashMap = 0;
}
// skip first and last base because there is no dinuc, this is mainly done for speed so we don't have to check cases
if( offset > 0 && offset < read.getReadLength() - 1 ) {
quals = read.getBaseQualities();
// Check if we need to use the original quality scores instead
if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
if ( obj instanceof String )
quals = QualityUtils.fastqToPhred((String)obj);
else {
throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
}
// This read isn't in the hashMap yet so fill out the datum and add it to the map so that we never have to do the work again
quals = read.getBaseQualities();
// Check if we need to use the original quality scores instead
if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
if ( obj instanceof String )
quals = QualityUtils.fastqToPhred((String)obj);
else {
throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
}
}
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'. Is this true?
isNegStrand = read.getReadNegativeStrandFlag();
final SAMReadGroupRecord readGroup = read.getReadGroup();
readGroupId = readGroup.getReadGroupId();
platform = readGroup.getPlatform();
mappingQuality = read.getMappingQuality();
length = bases.length;
if( USE_SLX_PLATFORM ) {
platform = "ILLUMINA";
}
// skip if base quality is zero
if( quals[offset] > 0 ) {
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'
refBase = (byte)ref.getBase();
prevBase = bases[offset-1];
readDatum = new ReadHashDatum( readGroupId, platform, quals, bases, isNegStrand, mappingQuality, length );
readDatumHashMap.put( read, readDatum );
sizeOfReadDatumHashMap++;
}
// Get the complement base strand if we are a negative strand read
if( read.getReadNegativeStrandFlag() ) {
bases = BaseUtils.simpleComplement( bases ); // this is an expensive call
refBase = (byte)BaseUtils.simpleComplement( ref.getBase() );
prevBase = bases[offset+1];
}
// skip if this base or the previous one was an 'N' or etc.
if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)bases[offset] ) ) {
if( readDatum.mappingQuality > 0 ) { // BUGBUG: turn this into a read filter after passing the old integration tests
final SAMReadGroupRecord readGroup = read.getReadGroup();
readGroupId = readGroup.getReadGroupId();
platform = readGroup.getPlatform();
if( USE_SLX_PLATFORM ) {
platform = "ILLUMINA";
// skip first and last base because there is no dinuc
// BUGBUG: Technically we only have to skip the first base on forward reads and the last base on negative strand reads. Change after passing old integration tests.
if( offset > 0 ) {
if( offset < readDatum.length - 1 ) {
// skip if base quality is zero
if( readDatum.quals[offset] > 0 ) {
refBase = (byte)ref.getBase();
prevBase = readDatum.bases[offset-1];
// Get the complement base strand if we are a negative strand read
if( readDatum.isNegStrand ) {
prevBase = readDatum.bases[offset+1];
}
// SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them
colorSpaceQuals = null;
if( platform.equalsIgnoreCase("SOLID") ) {
colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
}
if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet
{
updateDataFromRead( read, offset, readGroupId, platform, quals, bases, refBase );
}
} else {
if( VALIDATE_OLD_RECALIBRATOR ) {
countedBases++; // replicating a small bug in the old recalibrator
// skip if this base or the previous one was an 'N' or etc.
if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)(readDatum.bases[offset]) ) ) {
// SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them
colorSpaceQuals = null;
if( readDatum.platform.equalsIgnoreCase("SOLID") ) {
colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
}
if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet
{
// This base finally passed all the checks, so add it to the big hashmap
updateDataFromRead( readDatum, offset, refBase );
}
} else {
if( VALIDATE_OLD_RECALIBRATOR ) {
countedBases++; // replicating a small bug in the old recalibrator
}
}
}
} else { // at the last base in the read so we can remove it from our IdentityHashMap
readDatumHashMap.remove( read );
sizeOfReadDatumHashMap--;
}
}
}
@ -337,22 +358,17 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
* adding one to the number of observations and potentially one to the number of mismatches
* Lots of things are passed as parameters to this method as a strategy for optimizing the covariate.getValue calls
* because pulling things out of the SAMRecord is an expensive operation.
* @param read The read
* @param readDatum The ReadHashDatum holding all the important properties of this read
* @param offset The offset in the read for this locus
* @param readGroup The read group the read is in
* @param platform The String that has the platform this read came from: Illumina, 454, or solid
* @param quals List of base quality scores
* @param bases The bases which make up the read
* @param refBase The reference base at this locus
*/
private void updateDataFromRead(final SAMRecord read, final int offset, final String readGroup, final String platform,
final byte[] quals, final byte[] bases, final byte refBase) {
private void updateDataFromRead(final ReadHashDatum readDatum, final int offset, final byte refBase) {
List<Comparable> key = new ArrayList<Comparable>();
// Loop through the list of requested covariates and pick out the value from the read, offset, and reference
for( Covariate covariate : requestedCovariates ) {
key.add( covariate.getValue( read, offset, readGroup, platform, quals, bases ) );
key.add( covariate.getValue( readDatum, offset ) );
}
// Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap
@ -367,7 +383,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
}
// Need the bases to determine whether or not we have a mismatch
byte base = bases[offset];
byte base = readDatum.bases[offset];
// Add one to the number of observations and potentially one to the number of mismatches
datum.increment( (char)base, (char)refBase ); // dangerous: if you don't cast to char than the bytes default to the (long, long) version of the increment method which is really bad
@ -408,9 +424,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
* @param recalTableStream The PrintStream to write out to
*/
public void onTraversalDone( PrintStream recalTableStream ) {
out.print( "Writing raw recalibration data..." );
logger.info( "Writing raw recalibration data..." );
outputToCSV( recalTableStream );
out.println( "...done!" );
logger.info( "...done!" );
recalTableStream.close();
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import net.sf.samtools.SAMRecord;
@ -44,37 +45,42 @@ import net.sf.samtools.SAMRecord;
public class CycleCovariate implements Covariate {
private static boolean warnedUserNoPlatform = false;
public CycleCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
}
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
final byte[] quals, final byte[] bases) {
if( platform.equalsIgnoreCase( "ILLUMINA" ) ) {
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
if( readDatum.platform.equalsIgnoreCase( "ILLUMINA" ) ) {
int cycle = offset;
if( read.getReadNegativeStrandFlag() ) {
cycle = bases.length - (offset + 1);
if( readDatum.isNegStrand ) {
cycle = readDatum.bases.length - (offset + 1);
}
return cycle;
} else if( platform.contains( "454" ) ) { // some bams have "LS454" and others have just "454"
} else if( readDatum.platform.contains( "454" ) ) { // some bams have "LS454" and others have just "454"
int cycle = 0;
byte prevBase = bases[0];
byte prevBase = readDatum.bases[0];
for( int iii = 1; iii <= offset; iii++ ) {
if(bases[iii] != prevBase) { // this base doesn't match the previous one so it is a new cycle
if(readDatum.bases[iii] != prevBase) { // this base doesn't match the previous one so it is a new cycle
cycle++;
prevBase = bases[iii];
prevBase = readDatum.bases[iii];
}
}
return cycle;
} else if( platform.equalsIgnoreCase( "SOLID" ) ) {
} else if( readDatum.platform.equalsIgnoreCase( "SOLID" ) ) {
// the ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
return offset / 5; // integer division
} else {
throw new StingException( "Platform in read (" + platform + ") is not supported in CycleCovariate. Read = " + read );
} else { // platform is unrecognized so revert to Illumina definition of cycle but warn the user
if( !warnedUserNoPlatform ) {
Utils.warnUser( "Platform (" + readDatum.platform + ") unrecognized. Reverting to Illumina definition of machine cycle." );
warnedUserNoPlatform = true;
}
return PositionCovariate.revertToPositionAsCycle( readDatum, offset );
}
}
public final Comparable getValue(final String str) {
public final Comparable getValue( final String str ) {
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
}

View File

@ -4,6 +4,8 @@ import net.sf.samtools.SAMRecord;
import java.util.HashMap;
import org.broadinstitute.sting.utils.BaseUtils;
/*
* Copyright (c) 2009 The Broad Institute
*
@ -54,16 +56,17 @@ public class DinucCovariate implements Covariate {
}
}
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
final byte[] quals, final byte[] bases) {
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
byte base = bases[offset];
byte base;
byte prevBase;
// If this is a negative strand read then we need to reverse the direction for our previous base
if( read.getReadNegativeStrandFlag() ) {
prevBase = bases[offset + 1];
if( readDatum.isNegStrand ) {
base = (byte)BaseUtils.simpleComplement( (char)readDatum.bases[offset] );
prevBase = (byte)BaseUtils.simpleComplement( (char)readDatum.bases[offset + 1] );
} else {
prevBase = bases[offset - 1];
base = readDatum.bases[offset];
prevBase = readDatum.bases[offset - 1];
}
//char[] charArray = {(char)prevBase, (char)base};
//return new String( charArray ); // This is an expensive call
@ -71,7 +74,7 @@ public class DinucCovariate implements Covariate {
//return String.format("%c%c", prevBase, base); // This return statement is too slow
}
public final Comparable getValue(final String str) {
public final Comparable getValue( final String str ) {
//return str;
return dinucHashMap.get( Dinuc.hashBytes( (byte)str.charAt(0), (byte)str.charAt(1) ) );
}

View File

@ -40,13 +40,12 @@ public class MappingQualityCovariate implements Covariate {
public MappingQualityCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
}
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
final byte[] quals, final byte[] bases) {
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
return read.getMappingQuality();
return readDatum.mappingQuality;
}
public final Comparable getValue(final String str) {
public final Comparable getValue( final String str ) {
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
}

View File

@ -48,22 +48,21 @@ public class MinimumNQSCovariate implements Covariate {
windowReach = windowSize / 2; // integer division
}
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
final byte[] quals, final byte[] bases) {
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
// Loop over the list of base quality scores in the window and find the minimum
int minQual = quals[offset];
int minQual = readDatum.quals[offset];
int minIndex = Math.max(offset - windowReach, 0);
int maxIndex = Math.min(offset + windowReach, quals.length - 1);
int maxIndex = Math.min(offset + windowReach, readDatum.quals.length - 1);
for ( int iii = minIndex; iii < maxIndex; iii++ ) {
if( quals[iii] < minQual ) {
minQual = quals[iii];
if( readDatum.quals[iii] < minQual ) {
minQual = readDatum.quals[iii];
}
}
return minQual;
}
public final Comparable getValue(final String str) {
public final Comparable getValue( final String str ) {
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.Utils;
/*
* Copyright (c) 2009 The Broad Institute
@ -41,16 +42,23 @@ public class PositionCovariate implements Covariate {
public PositionCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
}
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
final byte[] quals, final byte[] bases) {
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
int cycle = offset;
if( read.getReadNegativeStrandFlag() ) {
cycle = bases.length - (offset + 1);
if( readDatum.isNegStrand ) {
cycle = readDatum.bases.length - (offset + 1);
}
return cycle;
}
public final Comparable getValue(final String str) {
public static Comparable revertToPositionAsCycle( final ReadHashDatum readDatum, final int offset ) { // called from CycleCovariate if platform was unrecognized
int cycle = offset;
if( readDatum.isNegStrand ) {
cycle = readDatum.bases.length - (offset + 1);
}
return cycle;
}
public final Comparable getValue( final String str ) {
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
}

View File

@ -44,9 +44,8 @@ public class PrimerRoundCovariate implements Covariate {
public PrimerRoundCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
}
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
final byte[] quals, final byte[] bases) {
if( platform.equalsIgnoreCase( "SOLID" ) ) {
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
if( readDatum.platform.equalsIgnoreCase( "SOLID" ) ) {
return offset % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
} else {
return 1; // nothing to do here because it is always the same
@ -54,7 +53,7 @@ public class PrimerRoundCovariate implements Covariate {
}
public final Comparable getValue(final String str) {
public final Comparable getValue( final String str ) {
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
}

View File

@ -40,13 +40,12 @@ public class QualityScoreCovariate implements Covariate {
public QualityScoreCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
}
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
final byte[] quals, final byte[] bases) {
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
return (int)quals[offset];
return (int)(readDatum.quals[offset]);
}
public final Comparable getValue(final String str) {
public final Comparable getValue( final String str ) {
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
}

View File

@ -40,12 +40,11 @@ public class ReadGroupCovariate implements Covariate{
public ReadGroupCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
}
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
final byte[] quals, final byte[] bases) {
return readGroup;
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
return readDatum.readGroup;
}
public final Comparable getValue(final String str) {
public final Comparable getValue( final String str ) {
return str;
}

View File

@ -0,0 +1,26 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Nov 14, 2009
*/
public class ReadHashDatum {
public String readGroup;
public String platform;
public byte[] quals;
public byte[] bases;
public boolean isNegStrand;
public int mappingQuality;
public int length;
public ReadHashDatum(String _readGroup, String _platform, byte[] _quals, byte[] _bases, boolean _isNegStrand, int _mappingQuality, int _length) {
readGroup = _readGroup;
platform = _platform;
quals = _quals;
bases = _bases;
isNegStrand = _isNegStrand;
mappingQuality = _mappingQuality;
length = _length;
}
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.gatk.walkers.Requires;
@ -126,7 +127,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
}
}
if( foundDBSNP ) {
Utils.warnUser("A dbSNP rod file was specified but this walker doesn't make use of it.");
Utils.warnUser("A dbSNP rod file was specified but TableRecalibrationWalker doesn't make use of it.");
}
// Read in the covariates that were used from the input file
@ -193,7 +194,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
} catch ( FileNotFoundException e ) {
Utils.scareUser("Can not find input file: " + RECAL_FILE);
} catch ( NumberFormatException e ) {
throw new StingException("Error parsing recalibration data at line " + lineNumber + ". Was your table generated by CountCovariatesRefactored?");
throw new StingException("Error parsing recalibration data at line " + lineNumber + ". Perhaps your table generated by an older version of CovariateCounterWalker.");
}
logger.info( "...done!" );
@ -265,30 +266,31 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
}
byte[] recalQuals = originalQuals.clone();
// These calls are expensive so only do them once for each read
String readGroup = read.getReadGroup().getReadGroupId();
String platform = read.getReadGroup().getPlatform();
// These calls are all expensive so only do them once for each read
final SAMReadGroupRecord readGroup = read.getReadGroup();
final String readGroupId = readGroup.getReadGroupId();
String platform = readGroup.getPlatform();
final boolean isNegStrand = read.getReadNegativeStrandFlag();
if( USE_SLX_PLATFORM ) {
platform = "ILLUMINA";
}
byte[] bases = read.getReadBases();
int startPos = 1;
int stopPos = read.getReadLength();
int stopPos = bases.length;
if( read.getReadNegativeStrandFlag() ) {
bases = BaseUtils.simpleComplement( bases );
if( isNegStrand ) { // DinucCovariate is responsible for getting the complement base if needed
startPos = 0;
stopPos = read.getReadLength() - 1;
stopPos = bases.length - 1;
}
ReadHashDatum readDatum = new ReadHashDatum( readGroupId, platform, originalQuals, bases, isNegStrand, read.getMappingQuality(), bases.length );
// For each base in the read
for( int iii = startPos; iii < stopPos; iii++ ) { // skip first or last base because there is no dinuc depending on the direction of the read
List<Comparable> key = new ArrayList<Comparable>();
// Get the covariate values which make up the key
for( Covariate covariate : requestedCovariates ) {
key.add( covariate.getValue( read, iii, readGroup, platform, originalQuals, bases ) ); // offset is zero based so passing iii is correct here
key.add( covariate.getValue( readDatum, iii ) ); // offset is zero based so passing iii is correct here
}
recalQuals[iii] = performSequentialQualityCalculation( key );