The refactored CountCovariates now hashes the read object into a HashMap which holds all the properties the covariates pull out of the read over and over again such as read group string, bases string and its complement string, quality scores, etc. This results in a big speed up. CountCovariatesRefactored is now just slightly slower than CountCovariates (perhaps 1.07x according to my latest time trial). Thanks to Alec for suggesting IdentityHashMap. CycleCovariate now warns the user that is is defaulting to the Solexa definition of cycle when the platform string pulled out of the read is unrecognized instead of halting with an Exception.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2108 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
27122f7f97
commit
98f921fe24
|
|
@ -38,7 +38,7 @@ import net.sf.samtools.SAMRecord;
|
|||
*/
|
||||
|
||||
public interface Covariate {
|
||||
public Comparable getValue(SAMRecord read, int offset, String readGroup, String platform, byte[] quals, byte[] bases); // used to pick out the value from attributes of the read
|
||||
public Comparable getValue(ReadHashDatum readDatum, int offset); // used to pick out the value from attributes of the read
|
||||
public Comparable getValue(String str); // used to get value from input file
|
||||
public int estimatedNumberOfBins(); // used to estimate the amount space required for the HashMap
|
||||
}
|
||||
|
|
|
|||
|
|
@ -88,8 +88,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
|
||||
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
|
||||
private ArrayList<Covariate> requestedCovariates; // A list to hold the covariate objects that were requested
|
||||
//private HashMap<SAMRecord, String> readGroupHashMap; // A hash map that hashes the read object itself into the read group name
|
||||
// This is done for optimization purposes because pulling the read group out of the SAMRecord is expensive
|
||||
private IdentityHashMap<SAMRecord, ReadHashDatum> readDatumHashMap; // A hash map that hashes the read object itself into properties commonly pulled out of the read. Done for optimization purposes.
|
||||
private int sizeOfReadDatumHashMap = 0;
|
||||
|
||||
private long countedSites = 0; // Number of loci used in the calculations, used for reporting in the output file
|
||||
private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file
|
||||
private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file
|
||||
|
|
@ -204,7 +205,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
|
||||
if(estimatedCapacity > 300 * 40 * 200 * 16) { estimatedCapacity = 300 * 40 * 200 * 16; } // Don't want to crash with out of heap space exception
|
||||
dataManager = new RecalDataManager( estimatedCapacity );
|
||||
//readGroupHashMap = new HashMap<SAMRecord, String>( 50000000, 0.97f );
|
||||
readDatumHashMap = new IdentityHashMap<SAMRecord, ReadHashDatum>();
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -250,73 +251,93 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
byte prevBase;
|
||||
String platform;
|
||||
byte[] colorSpaceQuals;
|
||||
|
||||
ReadHashDatum readDatum;
|
||||
boolean isNegStrand;
|
||||
int mappingQuality;
|
||||
int length;
|
||||
|
||||
final int numReads = reads.size();
|
||||
// For each read at this locus
|
||||
for( int iii = 0; iii < numReads; iii++ ) {
|
||||
read = reads.get(iii);
|
||||
offset = offsets.get(iii); // offset is zero based so quals[offset] and bases[offset] is correct
|
||||
|
||||
//readGroupId = readGroupHashMap.get( read );
|
||||
//if( readGroupId == null ) { // read is not in the hashmap so add it
|
||||
// readGroupId = read.getReadGroup().getReadGroupId();
|
||||
// readGroupHashMap.put( read, readGroupId );
|
||||
//}
|
||||
|
||||
if( read.getMappingQuality() > 0 ) { // BUGBUG: turn this into a read filter after passing the old integration tests
|
||||
readDatum = readDatumHashMap.get( read );
|
||||
if( readDatum == null ) {
|
||||
|
||||
offset = offsets.get(iii); // offset is zero based so quals[offset] and bases[offset] is correct
|
||||
// If the HashMap of read objects has grown too large then throw out the (mostly stale) reads
|
||||
if( sizeOfReadDatumHashMap > 100000 ) { //BUGBUG: Can I make this number larger?
|
||||
readDatumHashMap.clear();
|
||||
sizeOfReadDatumHashMap = 0;
|
||||
}
|
||||
|
||||
// skip first and last base because there is no dinuc, this is mainly done for speed so we don't have to check cases
|
||||
if( offset > 0 && offset < read.getReadLength() - 1 ) {
|
||||
|
||||
quals = read.getBaseQualities();
|
||||
// Check if we need to use the original quality scores instead
|
||||
if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
|
||||
Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
||||
if ( obj instanceof String )
|
||||
quals = QualityUtils.fastqToPhred((String)obj);
|
||||
else {
|
||||
throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
// This read isn't in the hashMap yet so fill out the datum and add it to the map so that we never have to do the work again
|
||||
quals = read.getBaseQualities();
|
||||
// Check if we need to use the original quality scores instead
|
||||
if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
|
||||
Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
||||
if ( obj instanceof String )
|
||||
quals = QualityUtils.fastqToPhred((String)obj);
|
||||
else {
|
||||
throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
}
|
||||
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'. Is this true?
|
||||
isNegStrand = read.getReadNegativeStrandFlag();
|
||||
final SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
readGroupId = readGroup.getReadGroupId();
|
||||
platform = readGroup.getPlatform();
|
||||
mappingQuality = read.getMappingQuality();
|
||||
length = bases.length;
|
||||
if( USE_SLX_PLATFORM ) {
|
||||
platform = "ILLUMINA";
|
||||
}
|
||||
|
||||
// skip if base quality is zero
|
||||
if( quals[offset] > 0 ) {
|
||||
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'
|
||||
refBase = (byte)ref.getBase();
|
||||
prevBase = bases[offset-1];
|
||||
readDatum = new ReadHashDatum( readGroupId, platform, quals, bases, isNegStrand, mappingQuality, length );
|
||||
readDatumHashMap.put( read, readDatum );
|
||||
sizeOfReadDatumHashMap++;
|
||||
}
|
||||
|
||||
// Get the complement base strand if we are a negative strand read
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
bases = BaseUtils.simpleComplement( bases ); // this is an expensive call
|
||||
refBase = (byte)BaseUtils.simpleComplement( ref.getBase() );
|
||||
prevBase = bases[offset+1];
|
||||
}
|
||||
|
||||
// skip if this base or the previous one was an 'N' or etc.
|
||||
if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)bases[offset] ) ) {
|
||||
if( readDatum.mappingQuality > 0 ) { // BUGBUG: turn this into a read filter after passing the old integration tests
|
||||
|
||||
final SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
readGroupId = readGroup.getReadGroupId();
|
||||
platform = readGroup.getPlatform();
|
||||
if( USE_SLX_PLATFORM ) {
|
||||
platform = "ILLUMINA";
|
||||
// skip first and last base because there is no dinuc
|
||||
// BUGBUG: Technically we only have to skip the first base on forward reads and the last base on negative strand reads. Change after passing old integration tests.
|
||||
if( offset > 0 ) {
|
||||
if( offset < readDatum.length - 1 ) {
|
||||
// skip if base quality is zero
|
||||
if( readDatum.quals[offset] > 0 ) {
|
||||
|
||||
refBase = (byte)ref.getBase();
|
||||
prevBase = readDatum.bases[offset-1];
|
||||
|
||||
// Get the complement base strand if we are a negative strand read
|
||||
if( readDatum.isNegStrand ) {
|
||||
prevBase = readDatum.bases[offset+1];
|
||||
}
|
||||
|
||||
// SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them
|
||||
colorSpaceQuals = null;
|
||||
if( platform.equalsIgnoreCase("SOLID") ) {
|
||||
colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
|
||||
}
|
||||
if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet
|
||||
{
|
||||
updateDataFromRead( read, offset, readGroupId, platform, quals, bases, refBase );
|
||||
}
|
||||
} else {
|
||||
if( VALIDATE_OLD_RECALIBRATOR ) {
|
||||
countedBases++; // replicating a small bug in the old recalibrator
|
||||
// skip if this base or the previous one was an 'N' or etc.
|
||||
if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)(readDatum.bases[offset]) ) ) {
|
||||
|
||||
// SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them
|
||||
colorSpaceQuals = null;
|
||||
if( readDatum.platform.equalsIgnoreCase("SOLID") ) {
|
||||
colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
|
||||
}
|
||||
if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet
|
||||
{
|
||||
// This base finally passed all the checks, so add it to the big hashmap
|
||||
updateDataFromRead( readDatum, offset, refBase );
|
||||
}
|
||||
} else {
|
||||
if( VALIDATE_OLD_RECALIBRATOR ) {
|
||||
countedBases++; // replicating a small bug in the old recalibrator
|
||||
}
|
||||
}
|
||||
}
|
||||
} else { // at the last base in the read so we can remove it from our IdentityHashMap
|
||||
readDatumHashMap.remove( read );
|
||||
sizeOfReadDatumHashMap--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -337,22 +358,17 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
* adding one to the number of observations and potentially one to the number of mismatches
|
||||
* Lots of things are passed as parameters to this method as a strategy for optimizing the covariate.getValue calls
|
||||
* because pulling things out of the SAMRecord is an expensive operation.
|
||||
* @param read The read
|
||||
* @param readDatum The ReadHashDatum holding all the important properties of this read
|
||||
* @param offset The offset in the read for this locus
|
||||
* @param readGroup The read group the read is in
|
||||
* @param platform The String that has the platform this read came from: Illumina, 454, or solid
|
||||
* @param quals List of base quality scores
|
||||
* @param bases The bases which make up the read
|
||||
* @param refBase The reference base at this locus
|
||||
*/
|
||||
private void updateDataFromRead(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases, final byte refBase) {
|
||||
private void updateDataFromRead(final ReadHashDatum readDatum, final int offset, final byte refBase) {
|
||||
|
||||
List<Comparable> key = new ArrayList<Comparable>();
|
||||
|
||||
// Loop through the list of requested covariates and pick out the value from the read, offset, and reference
|
||||
for( Covariate covariate : requestedCovariates ) {
|
||||
key.add( covariate.getValue( read, offset, readGroup, platform, quals, bases ) );
|
||||
key.add( covariate.getValue( readDatum, offset ) );
|
||||
}
|
||||
|
||||
// Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap
|
||||
|
|
@ -367,7 +383,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
}
|
||||
|
||||
// Need the bases to determine whether or not we have a mismatch
|
||||
byte base = bases[offset];
|
||||
byte base = readDatum.bases[offset];
|
||||
|
||||
// Add one to the number of observations and potentially one to the number of mismatches
|
||||
datum.increment( (char)base, (char)refBase ); // dangerous: if you don't cast to char than the bytes default to the (long, long) version of the increment method which is really bad
|
||||
|
|
@ -408,9 +424,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
* @param recalTableStream The PrintStream to write out to
|
||||
*/
|
||||
public void onTraversalDone( PrintStream recalTableStream ) {
|
||||
out.print( "Writing raw recalibration data..." );
|
||||
logger.info( "Writing raw recalibration data..." );
|
||||
outputToCSV( recalTableStream );
|
||||
out.println( "...done!" );
|
||||
logger.info( "...done!" );
|
||||
|
||||
recalTableStream.close();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
|
|
@ -44,37 +45,42 @@ import net.sf.samtools.SAMRecord;
|
|||
|
||||
public class CycleCovariate implements Covariate {
|
||||
|
||||
private static boolean warnedUserNoPlatform = false;
|
||||
|
||||
public CycleCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
if( platform.equalsIgnoreCase( "ILLUMINA" ) ) {
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
if( readDatum.platform.equalsIgnoreCase( "ILLUMINA" ) ) {
|
||||
int cycle = offset;
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
cycle = bases.length - (offset + 1);
|
||||
if( readDatum.isNegStrand ) {
|
||||
cycle = readDatum.bases.length - (offset + 1);
|
||||
}
|
||||
return cycle;
|
||||
} else if( platform.contains( "454" ) ) { // some bams have "LS454" and others have just "454"
|
||||
} else if( readDatum.platform.contains( "454" ) ) { // some bams have "LS454" and others have just "454"
|
||||
int cycle = 0;
|
||||
byte prevBase = bases[0];
|
||||
byte prevBase = readDatum.bases[0];
|
||||
for( int iii = 1; iii <= offset; iii++ ) {
|
||||
if(bases[iii] != prevBase) { // this base doesn't match the previous one so it is a new cycle
|
||||
if(readDatum.bases[iii] != prevBase) { // this base doesn't match the previous one so it is a new cycle
|
||||
cycle++;
|
||||
prevBase = bases[iii];
|
||||
prevBase = readDatum.bases[iii];
|
||||
}
|
||||
}
|
||||
return cycle;
|
||||
} else if( platform.equalsIgnoreCase( "SOLID" ) ) {
|
||||
} else if( readDatum.platform.equalsIgnoreCase( "SOLID" ) ) {
|
||||
// the ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
||||
return offset / 5; // integer division
|
||||
} else {
|
||||
throw new StingException( "Platform in read (" + platform + ") is not supported in CycleCovariate. Read = " + read );
|
||||
} else { // platform is unrecognized so revert to Illumina definition of cycle but warn the user
|
||||
if( !warnedUserNoPlatform ) {
|
||||
Utils.warnUser( "Platform (" + readDatum.platform + ") unrecognized. Reverting to Illumina definition of machine cycle." );
|
||||
warnedUserNoPlatform = true;
|
||||
}
|
||||
return PositionCovariate.revertToPositionAsCycle( readDatum, offset );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Comparable getValue( final String str ) {
|
||||
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ import net.sf.samtools.SAMRecord;
|
|||
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -54,16 +56,17 @@ public class DinucCovariate implements Covariate {
|
|||
}
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
|
||||
byte base = bases[offset];
|
||||
byte base;
|
||||
byte prevBase;
|
||||
// If this is a negative strand read then we need to reverse the direction for our previous base
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
prevBase = bases[offset + 1];
|
||||
if( readDatum.isNegStrand ) {
|
||||
base = (byte)BaseUtils.simpleComplement( (char)readDatum.bases[offset] );
|
||||
prevBase = (byte)BaseUtils.simpleComplement( (char)readDatum.bases[offset + 1] );
|
||||
} else {
|
||||
prevBase = bases[offset - 1];
|
||||
base = readDatum.bases[offset];
|
||||
prevBase = readDatum.bases[offset - 1];
|
||||
}
|
||||
//char[] charArray = {(char)prevBase, (char)base};
|
||||
//return new String( charArray ); // This is an expensive call
|
||||
|
|
@ -71,7 +74,7 @@ public class DinucCovariate implements Covariate {
|
|||
//return String.format("%c%c", prevBase, base); // This return statement is too slow
|
||||
}
|
||||
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Comparable getValue( final String str ) {
|
||||
//return str;
|
||||
return dinucHashMap.get( Dinuc.hashBytes( (byte)str.charAt(0), (byte)str.charAt(1) ) );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,13 +40,12 @@ public class MappingQualityCovariate implements Covariate {
|
|||
public MappingQualityCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
|
||||
return read.getMappingQuality();
|
||||
return readDatum.mappingQuality;
|
||||
}
|
||||
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Comparable getValue( final String str ) {
|
||||
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -48,22 +48,21 @@ public class MinimumNQSCovariate implements Covariate {
|
|||
windowReach = windowSize / 2; // integer division
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
|
||||
// Loop over the list of base quality scores in the window and find the minimum
|
||||
int minQual = quals[offset];
|
||||
int minQual = readDatum.quals[offset];
|
||||
int minIndex = Math.max(offset - windowReach, 0);
|
||||
int maxIndex = Math.min(offset + windowReach, quals.length - 1);
|
||||
int maxIndex = Math.min(offset + windowReach, readDatum.quals.length - 1);
|
||||
for ( int iii = minIndex; iii < maxIndex; iii++ ) {
|
||||
if( quals[iii] < minQual ) {
|
||||
minQual = quals[iii];
|
||||
if( readDatum.quals[iii] < minQual ) {
|
||||
minQual = readDatum.quals[iii];
|
||||
}
|
||||
}
|
||||
return minQual;
|
||||
}
|
||||
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Comparable getValue( final String str ) {
|
||||
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -41,16 +42,23 @@ public class PositionCovariate implements Covariate {
|
|||
public PositionCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
int cycle = offset;
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
cycle = bases.length - (offset + 1);
|
||||
if( readDatum.isNegStrand ) {
|
||||
cycle = readDatum.bases.length - (offset + 1);
|
||||
}
|
||||
return cycle;
|
||||
}
|
||||
|
||||
public final Comparable getValue(final String str) {
|
||||
public static Comparable revertToPositionAsCycle( final ReadHashDatum readDatum, final int offset ) { // called from CycleCovariate if platform was unrecognized
|
||||
int cycle = offset;
|
||||
if( readDatum.isNegStrand ) {
|
||||
cycle = readDatum.bases.length - (offset + 1);
|
||||
}
|
||||
return cycle;
|
||||
}
|
||||
|
||||
public final Comparable getValue( final String str ) {
|
||||
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -44,9 +44,8 @@ public class PrimerRoundCovariate implements Covariate {
|
|||
public PrimerRoundCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
if( platform.equalsIgnoreCase( "SOLID" ) ) {
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
if( readDatum.platform.equalsIgnoreCase( "SOLID" ) ) {
|
||||
return offset % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
||||
} else {
|
||||
return 1; // nothing to do here because it is always the same
|
||||
|
|
@ -54,7 +53,7 @@ public class PrimerRoundCovariate implements Covariate {
|
|||
|
||||
}
|
||||
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Comparable getValue( final String str ) {
|
||||
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -40,13 +40,12 @@ public class QualityScoreCovariate implements Covariate {
|
|||
public QualityScoreCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
|
||||
return (int)quals[offset];
|
||||
return (int)(readDatum.quals[offset]);
|
||||
}
|
||||
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Comparable getValue( final String str ) {
|
||||
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -40,12 +40,11 @@ public class ReadGroupCovariate implements Covariate{
|
|||
public ReadGroupCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
return readGroup;
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
return readDatum.readGroup;
|
||||
}
|
||||
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Comparable getValue( final String str ) {
|
||||
return str;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,26 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 14, 2009
|
||||
*/
|
||||
public class ReadHashDatum {
|
||||
public String readGroup;
|
||||
public String platform;
|
||||
public byte[] quals;
|
||||
public byte[] bases;
|
||||
public boolean isNegStrand;
|
||||
public int mappingQuality;
|
||||
public int length;
|
||||
|
||||
public ReadHashDatum(String _readGroup, String _platform, byte[] _quals, byte[] _bases, boolean _isNegStrand, int _mappingQuality, int _length) {
|
||||
readGroup = _readGroup;
|
||||
platform = _platform;
|
||||
quals = _quals;
|
||||
bases = _bases;
|
||||
isNegStrand = _isNegStrand;
|
||||
mappingQuality = _mappingQuality;
|
||||
length = _length;
|
||||
}
|
||||
}
|
||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
|
|
@ -126,7 +127,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
}
|
||||
}
|
||||
if( foundDBSNP ) {
|
||||
Utils.warnUser("A dbSNP rod file was specified but this walker doesn't make use of it.");
|
||||
Utils.warnUser("A dbSNP rod file was specified but TableRecalibrationWalker doesn't make use of it.");
|
||||
}
|
||||
|
||||
// Read in the covariates that were used from the input file
|
||||
|
|
@ -193,7 +194,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
} catch ( FileNotFoundException e ) {
|
||||
Utils.scareUser("Can not find input file: " + RECAL_FILE);
|
||||
} catch ( NumberFormatException e ) {
|
||||
throw new StingException("Error parsing recalibration data at line " + lineNumber + ". Was your table generated by CountCovariatesRefactored?");
|
||||
throw new StingException("Error parsing recalibration data at line " + lineNumber + ". Perhaps your table generated by an older version of CovariateCounterWalker.");
|
||||
}
|
||||
logger.info( "...done!" );
|
||||
|
||||
|
|
@ -265,30 +266,31 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
}
|
||||
byte[] recalQuals = originalQuals.clone();
|
||||
|
||||
// These calls are expensive so only do them once for each read
|
||||
String readGroup = read.getReadGroup().getReadGroupId();
|
||||
String platform = read.getReadGroup().getPlatform();
|
||||
// These calls are all expensive so only do them once for each read
|
||||
final SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
final String readGroupId = readGroup.getReadGroupId();
|
||||
String platform = readGroup.getPlatform();
|
||||
final boolean isNegStrand = read.getReadNegativeStrandFlag();
|
||||
if( USE_SLX_PLATFORM ) {
|
||||
platform = "ILLUMINA";
|
||||
}
|
||||
|
||||
byte[] bases = read.getReadBases();
|
||||
int startPos = 1;
|
||||
int stopPos = read.getReadLength();
|
||||
int stopPos = bases.length;
|
||||
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
bases = BaseUtils.simpleComplement( bases );
|
||||
if( isNegStrand ) { // DinucCovariate is responsible for getting the complement base if needed
|
||||
startPos = 0;
|
||||
stopPos = read.getReadLength() - 1;
|
||||
stopPos = bases.length - 1;
|
||||
}
|
||||
|
||||
ReadHashDatum readDatum = new ReadHashDatum( readGroupId, platform, originalQuals, bases, isNegStrand, read.getMappingQuality(), bases.length );
|
||||
|
||||
// For each base in the read
|
||||
for( int iii = startPos; iii < stopPos; iii++ ) { // skip first or last base because there is no dinuc depending on the direction of the read
|
||||
List<Comparable> key = new ArrayList<Comparable>();
|
||||
// Get the covariate values which make up the key
|
||||
for( Covariate covariate : requestedCovariates ) {
|
||||
key.add( covariate.getValue( read, iii, readGroup, platform, originalQuals, bases ) ); // offset is zero based so passing iii is correct here
|
||||
key.add( covariate.getValue( readDatum, iii ) ); // offset is zero based so passing iii is correct here
|
||||
}
|
||||
|
||||
recalQuals[iii] = performSequentialQualityCalculation( key );
|
||||
|
|
|
|||
Loading…
Reference in New Issue