From 98f921fe24f87e4987bf67195cc5917d44437417 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Sat, 21 Nov 2009 20:38:17 +0000 Subject: [PATCH] The refactored CountCovariates now hashes the read object into a HashMap which holds all the properties the covariates pull out of the read over and over again such as read group string, bases string and its complement string, quality scores, etc. This results in a big speed up. CountCovariatesRefactored is now just slightly slower than CountCovariates (perhaps 1.07x according to my latest time trial). Thanks to Alec for suggesting IdentityHashMap. CycleCovariate now warns the user that is is defaulting to the Solexa definition of cycle when the platform string pulled out of the read is unrecognized instead of halting with an Exception. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2108 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/Recalibration/Covariate.java | 2 +- .../Recalibration/CovariateCounterWalker.java | 146 ++++++++++-------- .../walkers/Recalibration/CycleCovariate.java | 32 ++-- .../walkers/Recalibration/DinucCovariate.java | 17 +- .../MappingQualityCovariate.java | 7 +- .../Recalibration/MinimumNQSCovariate.java | 13 +- .../Recalibration/PositionCovariate.java | 18 ++- .../Recalibration/PrimerRoundCovariate.java | 7 +- .../Recalibration/QualityScoreCovariate.java | 7 +- .../Recalibration/ReadGroupCovariate.java | 7 +- .../walkers/Recalibration/ReadHashDatum.java | 26 ++++ .../TableRecalibrationWalker.java | 24 +-- 12 files changed, 181 insertions(+), 125 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadHashDatum.java diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java index af454e946..b296715d6 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java @@ -38,7 +38,7 @@ import net.sf.samtools.SAMRecord; */ public interface Covariate { - public Comparable getValue(SAMRecord read, int offset, String readGroup, String platform, byte[] quals, byte[] bases); // used to pick out the value from attributes of the read + public Comparable getValue(ReadHashDatum readDatum, int offset); // used to pick out the value from attributes of the read public Comparable getValue(String str); // used to get value from input file public int estimatedNumberOfBins(); // used to estimate the amount space required for the HashMap } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java index 2d0130e8b..fb979b05a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java @@ -88,8 +88,9 @@ public class CovariateCounterWalker extends LocusWalker { private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps private ArrayList requestedCovariates; // A list to hold the covariate objects that were requested - //private HashMap readGroupHashMap; // A hash map that hashes the read object itself into the read group name - // This is done for optimization purposes because pulling the read group out of the SAMRecord is expensive + private IdentityHashMap readDatumHashMap; // A hash map that hashes the read object itself into properties commonly pulled out of the read. Done for optimization purposes. + private int sizeOfReadDatumHashMap = 0; + private long countedSites = 0; // Number of loci used in the calculations, used for reporting in the output file private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file @@ -204,7 +205,7 @@ public class CovariateCounterWalker extends LocusWalker { if(estimatedCapacity > 300 * 40 * 200 * 16) { estimatedCapacity = 300 * 40 * 200 * 16; } // Don't want to crash with out of heap space exception dataManager = new RecalDataManager( estimatedCapacity ); - //readGroupHashMap = new HashMap( 50000000, 0.97f ); + readDatumHashMap = new IdentityHashMap(); } @@ -250,73 +251,93 @@ public class CovariateCounterWalker extends LocusWalker { byte prevBase; String platform; byte[] colorSpaceQuals; - + ReadHashDatum readDatum; + boolean isNegStrand; + int mappingQuality; + int length; + final int numReads = reads.size(); // For each read at this locus for( int iii = 0; iii < numReads; iii++ ) { read = reads.get(iii); + offset = offsets.get(iii); // offset is zero based so quals[offset] and bases[offset] is correct - //readGroupId = readGroupHashMap.get( read ); - //if( readGroupId == null ) { // read is not in the hashmap so add it - // readGroupId = read.getReadGroup().getReadGroupId(); - // readGroupHashMap.put( read, readGroupId ); - //} - - if( read.getMappingQuality() > 0 ) { // BUGBUG: turn this into a read filter after passing the old integration tests + readDatum = readDatumHashMap.get( read ); + if( readDatum == null ) { - offset = offsets.get(iii); // offset is zero based so quals[offset] and bases[offset] is correct + // If the HashMap of read objects has grown too large then throw out the (mostly stale) reads + if( sizeOfReadDatumHashMap > 100000 ) { //BUGBUG: Can I make this number larger? + readDatumHashMap.clear(); + sizeOfReadDatumHashMap = 0; + } - // skip first and last base because there is no dinuc, this is mainly done for speed so we don't have to check cases - if( offset > 0 && offset < read.getReadLength() - 1 ) { - - quals = read.getBaseQualities(); - // Check if we need to use the original quality scores instead - if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) { - Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG); - if ( obj instanceof String ) - quals = QualityUtils.fastqToPhred((String)obj); - else { - throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName())); - } + // This read isn't in the hashMap yet so fill out the datum and add it to the map so that we never have to do the work again + quals = read.getBaseQualities(); + // Check if we need to use the original quality scores instead + if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) { + Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG); + if ( obj instanceof String ) + quals = QualityUtils.fastqToPhred((String)obj); + else { + throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName())); } + } + bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'. Is this true? + isNegStrand = read.getReadNegativeStrandFlag(); + final SAMReadGroupRecord readGroup = read.getReadGroup(); + readGroupId = readGroup.getReadGroupId(); + platform = readGroup.getPlatform(); + mappingQuality = read.getMappingQuality(); + length = bases.length; + if( USE_SLX_PLATFORM ) { + platform = "ILLUMINA"; + } - // skip if base quality is zero - if( quals[offset] > 0 ) { - bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A' - refBase = (byte)ref.getBase(); - prevBase = bases[offset-1]; + readDatum = new ReadHashDatum( readGroupId, platform, quals, bases, isNegStrand, mappingQuality, length ); + readDatumHashMap.put( read, readDatum ); + sizeOfReadDatumHashMap++; + } - // Get the complement base strand if we are a negative strand read - if( read.getReadNegativeStrandFlag() ) { - bases = BaseUtils.simpleComplement( bases ); // this is an expensive call - refBase = (byte)BaseUtils.simpleComplement( ref.getBase() ); - prevBase = bases[offset+1]; - } - // skip if this base or the previous one was an 'N' or etc. - if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)bases[offset] ) ) { + if( readDatum.mappingQuality > 0 ) { // BUGBUG: turn this into a read filter after passing the old integration tests - final SAMReadGroupRecord readGroup = read.getReadGroup(); - readGroupId = readGroup.getReadGroupId(); - platform = readGroup.getPlatform(); - if( USE_SLX_PLATFORM ) { - platform = "ILLUMINA"; + // skip first and last base because there is no dinuc + // BUGBUG: Technically we only have to skip the first base on forward reads and the last base on negative strand reads. Change after passing old integration tests. + if( offset > 0 ) { + if( offset < readDatum.length - 1 ) { + // skip if base quality is zero + if( readDatum.quals[offset] > 0 ) { + + refBase = (byte)ref.getBase(); + prevBase = readDatum.bases[offset-1]; + + // Get the complement base strand if we are a negative strand read + if( readDatum.isNegStrand ) { + prevBase = readDatum.bases[offset+1]; } - // SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them - colorSpaceQuals = null; - if( platform.equalsIgnoreCase("SOLID") ) { - colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG)); - } - if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet - { - updateDataFromRead( read, offset, readGroupId, platform, quals, bases, refBase ); - } - } else { - if( VALIDATE_OLD_RECALIBRATOR ) { - countedBases++; // replicating a small bug in the old recalibrator + // skip if this base or the previous one was an 'N' or etc. + if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)(readDatum.bases[offset]) ) ) { + + // SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them + colorSpaceQuals = null; + if( readDatum.platform.equalsIgnoreCase("SOLID") ) { + colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG)); + } + if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet + { + // This base finally passed all the checks, so add it to the big hashmap + updateDataFromRead( readDatum, offset, refBase ); + } + } else { + if( VALIDATE_OLD_RECALIBRATOR ) { + countedBases++; // replicating a small bug in the old recalibrator + } } } + } else { // at the last base in the read so we can remove it from our IdentityHashMap + readDatumHashMap.remove( read ); + sizeOfReadDatumHashMap--; } } } @@ -337,22 +358,17 @@ public class CovariateCounterWalker extends LocusWalker { * adding one to the number of observations and potentially one to the number of mismatches * Lots of things are passed as parameters to this method as a strategy for optimizing the covariate.getValue calls * because pulling things out of the SAMRecord is an expensive operation. - * @param read The read + * @param readDatum The ReadHashDatum holding all the important properties of this read * @param offset The offset in the read for this locus - * @param readGroup The read group the read is in - * @param platform The String that has the platform this read came from: Illumina, 454, or solid - * @param quals List of base quality scores - * @param bases The bases which make up the read * @param refBase The reference base at this locus */ - private void updateDataFromRead(final SAMRecord read, final int offset, final String readGroup, final String platform, - final byte[] quals, final byte[] bases, final byte refBase) { + private void updateDataFromRead(final ReadHashDatum readDatum, final int offset, final byte refBase) { List key = new ArrayList(); // Loop through the list of requested covariates and pick out the value from the read, offset, and reference for( Covariate covariate : requestedCovariates ) { - key.add( covariate.getValue( read, offset, readGroup, platform, quals, bases ) ); + key.add( covariate.getValue( readDatum, offset ) ); } // Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap @@ -367,7 +383,7 @@ public class CovariateCounterWalker extends LocusWalker { } // Need the bases to determine whether or not we have a mismatch - byte base = bases[offset]; + byte base = readDatum.bases[offset]; // Add one to the number of observations and potentially one to the number of mismatches datum.increment( (char)base, (char)refBase ); // dangerous: if you don't cast to char than the bytes default to the (long, long) version of the increment method which is really bad @@ -408,9 +424,9 @@ public class CovariateCounterWalker extends LocusWalker { * @param recalTableStream The PrintStream to write out to */ public void onTraversalDone( PrintStream recalTableStream ) { - out.print( "Writing raw recalibration data..." ); + logger.info( "Writing raw recalibration data..." ); outputToCSV( recalTableStream ); - out.println( "...done!" ); + logger.info( "...done!" ); recalTableStream.close(); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java index 475563ab6..0438b8bd0 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.Utils; import net.sf.samtools.SAMRecord; @@ -44,37 +45,42 @@ import net.sf.samtools.SAMRecord; public class CycleCovariate implements Covariate { + private static boolean warnedUserNoPlatform = false; + public CycleCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker } - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, - final byte[] quals, final byte[] bases) { - if( platform.equalsIgnoreCase( "ILLUMINA" ) ) { + public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { + if( readDatum.platform.equalsIgnoreCase( "ILLUMINA" ) ) { int cycle = offset; - if( read.getReadNegativeStrandFlag() ) { - cycle = bases.length - (offset + 1); + if( readDatum.isNegStrand ) { + cycle = readDatum.bases.length - (offset + 1); } return cycle; - } else if( platform.contains( "454" ) ) { // some bams have "LS454" and others have just "454" + } else if( readDatum.platform.contains( "454" ) ) { // some bams have "LS454" and others have just "454" int cycle = 0; - byte prevBase = bases[0]; + byte prevBase = readDatum.bases[0]; for( int iii = 1; iii <= offset; iii++ ) { - if(bases[iii] != prevBase) { // this base doesn't match the previous one so it is a new cycle + if(readDatum.bases[iii] != prevBase) { // this base doesn't match the previous one so it is a new cycle cycle++; - prevBase = bases[iii]; + prevBase = readDatum.bases[iii]; } } return cycle; - } else if( platform.equalsIgnoreCase( "SOLID" ) ) { + } else if( readDatum.platform.equalsIgnoreCase( "SOLID" ) ) { // the ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf return offset / 5; // integer division - } else { - throw new StingException( "Platform in read (" + platform + ") is not supported in CycleCovariate. Read = " + read ); + } else { // platform is unrecognized so revert to Illumina definition of cycle but warn the user + if( !warnedUserNoPlatform ) { + Utils.warnUser( "Platform (" + readDatum.platform + ") unrecognized. Reverting to Illumina definition of machine cycle." ); + warnedUserNoPlatform = true; + } + return PositionCovariate.revertToPositionAsCycle( readDatum, offset ); } } - public final Comparable getValue(final String str) { + public final Comparable getValue( final String str ) { return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java index 6ccf2a553..acb6a4128 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java @@ -4,6 +4,8 @@ import net.sf.samtools.SAMRecord; import java.util.HashMap; +import org.broadinstitute.sting.utils.BaseUtils; + /* * Copyright (c) 2009 The Broad Institute * @@ -54,16 +56,17 @@ public class DinucCovariate implements Covariate { } } - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, - final byte[] quals, final byte[] bases) { + public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { - byte base = bases[offset]; + byte base; byte prevBase; // If this is a negative strand read then we need to reverse the direction for our previous base - if( read.getReadNegativeStrandFlag() ) { - prevBase = bases[offset + 1]; + if( readDatum.isNegStrand ) { + base = (byte)BaseUtils.simpleComplement( (char)readDatum.bases[offset] ); + prevBase = (byte)BaseUtils.simpleComplement( (char)readDatum.bases[offset + 1] ); } else { - prevBase = bases[offset - 1]; + base = readDatum.bases[offset]; + prevBase = readDatum.bases[offset - 1]; } //char[] charArray = {(char)prevBase, (char)base}; //return new String( charArray ); // This is an expensive call @@ -71,7 +74,7 @@ public class DinucCovariate implements Covariate { //return String.format("%c%c", prevBase, base); // This return statement is too slow } - public final Comparable getValue(final String str) { + public final Comparable getValue( final String str ) { //return str; return dinucHashMap.get( Dinuc.hashBytes( (byte)str.charAt(0), (byte)str.charAt(1) ) ); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java index 2b9830972..1e0d0a9ff 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java @@ -40,13 +40,12 @@ public class MappingQualityCovariate implements Covariate { public MappingQualityCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker } - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, - final byte[] quals, final byte[] bases) { + public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { - return read.getMappingQuality(); + return readDatum.mappingQuality; } - public final Comparable getValue(final String str) { + public final Comparable getValue( final String str ) { return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java index f8e54d6d4..ca44b2609 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java @@ -48,22 +48,21 @@ public class MinimumNQSCovariate implements Covariate { windowReach = windowSize / 2; // integer division } - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, - final byte[] quals, final byte[] bases) { + public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { // Loop over the list of base quality scores in the window and find the minimum - int minQual = quals[offset]; + int minQual = readDatum.quals[offset]; int minIndex = Math.max(offset - windowReach, 0); - int maxIndex = Math.min(offset + windowReach, quals.length - 1); + int maxIndex = Math.min(offset + windowReach, readDatum.quals.length - 1); for ( int iii = minIndex; iii < maxIndex; iii++ ) { - if( quals[iii] < minQual ) { - minQual = quals[iii]; + if( readDatum.quals[iii] < minQual ) { + minQual = readDatum.quals[iii]; } } return minQual; } - public final Comparable getValue(final String str) { + public final Comparable getValue( final String str ) { return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PositionCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PositionCovariate.java index e7f691739..72002947a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PositionCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PositionCovariate.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.Utils; /* * Copyright (c) 2009 The Broad Institute @@ -41,16 +42,23 @@ public class PositionCovariate implements Covariate { public PositionCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker } - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, - final byte[] quals, final byte[] bases) { + public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { int cycle = offset; - if( read.getReadNegativeStrandFlag() ) { - cycle = bases.length - (offset + 1); + if( readDatum.isNegStrand ) { + cycle = readDatum.bases.length - (offset + 1); } return cycle; } - public final Comparable getValue(final String str) { + public static Comparable revertToPositionAsCycle( final ReadHashDatum readDatum, final int offset ) { // called from CycleCovariate if platform was unrecognized + int cycle = offset; + if( readDatum.isNegStrand ) { + cycle = readDatum.bases.length - (offset + 1); + } + return cycle; + } + + public final Comparable getValue( final String str ) { return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PrimerRoundCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PrimerRoundCovariate.java index 5bb19a9b5..8822415c9 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PrimerRoundCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PrimerRoundCovariate.java @@ -44,9 +44,8 @@ public class PrimerRoundCovariate implements Covariate { public PrimerRoundCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker } - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, - final byte[] quals, final byte[] bases) { - if( platform.equalsIgnoreCase( "SOLID" ) ) { + public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { + if( readDatum.platform.equalsIgnoreCase( "SOLID" ) ) { return offset % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf } else { return 1; // nothing to do here because it is always the same @@ -54,7 +53,7 @@ public class PrimerRoundCovariate implements Covariate { } - public final Comparable getValue(final String str) { + public final Comparable getValue( final String str ) { return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java index 437aa78da..699395b45 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java @@ -40,13 +40,12 @@ public class QualityScoreCovariate implements Covariate { public QualityScoreCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker } - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, - final byte[] quals, final byte[] bases) { + public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { - return (int)quals[offset]; + return (int)(readDatum.quals[offset]); } - public final Comparable getValue(final String str) { + public final Comparable getValue( final String str ) { return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java index 92ab81a10..b22520e37 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java @@ -40,12 +40,11 @@ public class ReadGroupCovariate implements Covariate{ public ReadGroupCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker } - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, - final byte[] quals, final byte[] bases) { - return readGroup; + public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { + return readDatum.readGroup; } - public final Comparable getValue(final String str) { + public final Comparable getValue( final String str ) { return str; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadHashDatum.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadHashDatum.java new file mode 100755 index 000000000..d2a2fde6c --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadHashDatum.java @@ -0,0 +1,26 @@ +package org.broadinstitute.sting.playground.gatk.walkers.Recalibration; + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: Nov 14, 2009 + */ +public class ReadHashDatum { + public String readGroup; + public String platform; + public byte[] quals; + public byte[] bases; + public boolean isNegStrand; + public int mappingQuality; + public int length; + + public ReadHashDatum(String _readGroup, String _platform, byte[] _quals, byte[] _bases, boolean _isNegStrand, int _mappingQuality, int _length) { + readGroup = _readGroup; + platform = _platform; + quals = _quals; + bases = _bases; + isNegStrand = _isNegStrand; + mappingQuality = _mappingQuality; + length = _length; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java index 3e8ce3966..d49b0aefe 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.gatk.walkers.Requires; @@ -126,7 +127,7 @@ public class TableRecalibrationWalker extends ReadWalker key = new ArrayList(); // Get the covariate values which make up the key for( Covariate covariate : requestedCovariates ) { - key.add( covariate.getValue( read, iii, readGroup, platform, originalQuals, bases ) ); // offset is zero based so passing iii is correct here + key.add( covariate.getValue( readDatum, iii ) ); // offset is zero based so passing iii is correct here } recalQuals[iii] = performSequentialQualityCalculation( key );