diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java index 717bc7c64..cc298ee78 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; +import net.sf.samtools.SAMRecord; + /* * Copyright (c) 2009 The Broad Institute * @@ -37,7 +39,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; public interface Covariate { public void initialize( RecalibrationArgumentCollection RAC ); // Initialize any member variables using the command-line arguments passed to the walkers - public Comparable getValue( ReadHashDatum readDatum, int offset ); // Used to pick out the covariate's value from attributes of the read + public Comparable getValue( SAMRecord read, int offset ); // Used to pick out the covariate's value from attributes of the read public Comparable getValue( String str ); // Used to get the covariate's value from input csv file in TableRecalibrationWalker public int estimatedNumberOfBins(); // Used to estimate the amount space required for the full data HashMap } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index d8fba4d30..0770c358e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -96,13 +96,11 @@ public class CovariateCounterWalker extends LocusWalker { ///////////////////////////// private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps private ArrayList requestedCovariates; // A list to hold the covariate objects that were requested - private IdentityHashMap readDatumHashMap; // A hash map that hashes the read object itself into properties commonly pulled out of the read. Done for optimization purposes. - private int sizeOfReadDatumHashMap = 0; private long countedSites = 0; // Number of loci used in the calculations, used for reporting in the output file private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site - private static final String versionString = "v2.0.8"; // Major version, minor version, and build number + private static final String versionString = "v2.0.9"; // Major version, minor version, and build number private Pair dbSNP_counts = new Pair(0L, 0L); // mismatch/base counts for dbSNP loci private Pair novel_counts = new Pair(0L, 0L); // mismatch/base counts for non-dbSNP loci private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) @@ -228,7 +226,6 @@ public class CovariateCounterWalker extends LocusWalker { estimatedCapacity = 300 * 40 * 200; } dataManager = new RecalDataManager( estimatedCapacity ); - readDatumHashMap = new IdentityHashMap(); } @@ -273,7 +270,6 @@ public class CovariateCounterWalker extends LocusWalker { byte refBase; byte prevBase; byte[] colorSpaceQuals; - ReadHashDatum readDatum; final int numReads = reads.size(); // For each read at this locus @@ -281,49 +277,35 @@ public class CovariateCounterWalker extends LocusWalker { read = reads.get(iii); offset = offsets.get(iii); // offset is zero based so quals[offset] and bases[offset] is correct - // Try to pull the read out of the read IdentityHashMap - readDatum = readDatumHashMap.get( read ); - if( readDatum == null ) { - - // If the HashMap of read objects has grown too large then throw out the (mostly stale) reads - if( sizeOfReadDatumHashMap > 100000 ) { //BUGBUG: Can I make this number larger? - readDatumHashMap.clear(); - sizeOfReadDatumHashMap = 0; - } - - // This read isn't in the hashMap yet so fill out the datum and add it to the map so that we never have to do the work again - readDatum = ReadHashDatum.parseSAMRecord( read, RAC ); - readDatumHashMap.put( read, readDatum ); - sizeOfReadDatumHashMap++; - } + RecalDataManager.parseSAMRecord(read, RAC); // Skip first and last base because there is no dinuc // BUGBUG: Technically we only have to skip the first base on forward reads and the last base on negative strand reads. Change after passing old integration tests. if( offset > 0 ) { - if( offset < readDatum.length - 1 ) { + if( offset < read.getReadLength() - 1 ) { // Skip if base quality is zero - if( readDatum.quals[offset] > 0 ) { + if( read.getBaseQualities()[offset] > 0 ) { refBase = (byte)ref.getBase(); - prevBase = readDatum.bases[offset - 1]; + prevBase = read.getReadBases()[offset - 1]; // DinucCovariate is responsible for getting the complement bases if needed - if( readDatum.isNegStrand ) { - prevBase = readDatum.bases[offset + 1]; + if( read.getReadNegativeStrandFlag() ) { + prevBase = read.getReadBases()[offset + 1]; } // Skip if this base or the previous one was an 'N' or etc. - if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)(readDatum.bases[offset]) ) ) { + if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)(read.getReadBases()[offset]) ) ) { // SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them colorSpaceQuals = null; - if( readDatum.platform.equalsIgnoreCase("SOLID") ) { + if( read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") ) { colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG)); } if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet { // This base finally passed all the checks for a good base, so add it to the big data hashmap - updateDataFromRead( readDatum, offset, refBase ); + updateDataFromRead( read, offset, refBase ); } } else { if( RAC.VALIDATE_OLD_RECALIBRATOR ) { @@ -331,9 +313,6 @@ public class CovariateCounterWalker extends LocusWalker { } } } - } else { // At the last base in the read so we can remove the read from our IdentityHashMap since we will never see it again - readDatumHashMap.remove( read ); - sizeOfReadDatumHashMap--; } } } @@ -405,17 +384,17 @@ public class CovariateCounterWalker extends LocusWalker { * adding one to the number of observations and potentially one to the number of mismatches * Lots of things are passed as parameters to this method as a strategy for optimizing the covariate.getValue calls * because pulling things out of the SAMRecord is an expensive operation. - * @param readDatum The ReadHashDatum holding all the important properties of this read + * @param read The SAMRecord holding all the data for this read * @param offset The offset in the read for this locus * @param refBase The reference base at this locus */ - private void updateDataFromRead(final ReadHashDatum readDatum, final int offset, final byte refBase) { + private void updateDataFromRead(final SAMRecord read, final int offset, final byte refBase) { List key = new ArrayList(); // Loop through the list of requested covariates and pick out the value from the read, offset, and reference for( Covariate covariate : requestedCovariates ) { - key.add( covariate.getValue( readDatum, offset ) ); + key.add( covariate.getValue( read, offset ) ); } // Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap @@ -430,7 +409,7 @@ public class CovariateCounterWalker extends LocusWalker { } // Need the bases to determine whether or not we have a mismatch - byte base = readDatum.bases[offset]; + byte base = read.getReadBases()[offset]; // Add one to the number of observations and potentially one to the number of mismatches datum.increment( (char)base, (char)refBase ); // Dangerous: If you don't cast to char than the bytes default to the (long, long) version of the increment method which is really bad diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index d359bf41d..cc78e91d2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.StingException; +import net.sf.samtools.SAMRecord; /* * Copyright (c) 2009 The Broad Institute @@ -56,16 +57,16 @@ public class CycleCovariate implements Covariate { } // Used to pick out the covariate's value from attributes of the read - public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { + public final Comparable getValue( final SAMRecord read, final int offset ) { //----------------------------- // ILLUMINA //----------------------------- - if( readDatum.platform.equalsIgnoreCase( "ILLUMINA" ) ) { + if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) ) { int cycle = offset; - if( readDatum.isNegStrand ) { - cycle = readDatum.bases.length - (offset + 1); + if( read.getReadNegativeStrandFlag() ) { + cycle = read.getReadLength() - (offset + 1); } return cycle; } @@ -74,14 +75,14 @@ public class CycleCovariate implements Covariate { // 454 //----------------------------- - else if( readDatum.platform.contains( "454" ) ) { // Some bams have "LS454" and others have just "454" + else if( read.getReadGroup().getPlatform().contains( "454" ) ) { // Some bams have "LS454" and others have just "454" int cycle = 0; //BUGBUG: should reverse directions on negative strand reads! - byte prevBase = readDatum.bases[0]; + byte prevBase = read.getReadBases()[0]; for( int iii = 1; iii <= offset; iii++ ) { - if(readDatum.bases[iii] != prevBase) { // This base doesn't match the previous one so it is a new cycle + if(read.getReadBases()[iii] != prevBase) { // This base doesn't match the previous one so it is a new cycle cycle++; - prevBase = readDatum.bases[iii]; + prevBase = read.getReadBases()[iii]; } } return cycle; @@ -91,7 +92,7 @@ public class CycleCovariate implements Covariate { // SOLID //----------------------------- - else if( readDatum.platform.equalsIgnoreCase( "SOLID" ) ) { + else if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) { // The ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf //BUGBUG: should reverse directions on negative strand reads! return offset / 5; // integer division @@ -104,18 +105,19 @@ public class CycleCovariate implements Covariate { else { // Platform is unrecognized so revert to the default platform but warn the user first if( !warnedUserBadPlatform ) { if( defaultPlatform != null) { // the user set a default platform - Utils.warnUser( "Platform string (" + readDatum.platform + ") unrecognized in CycleCovariate. " + + Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " + "Reverting to " + defaultPlatform + " definition of machine cycle." ); } else { // the user did not set a default platform - Utils.warnUser( "Platform string (" + readDatum.platform + ") unrecognized in CycleCovariate. " + + Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " + "Reverting to Illumina definition of machine cycle. Users may set the default platform using the --default_platform argument." ); defaultPlatform = "Illumina"; } warnedUserBadPlatform = true; } - ReadHashDatum correctedReadDatum = new ReadHashDatum( readDatum ); - correctedReadDatum.platform = defaultPlatform; - return getValue( correctedReadDatum, offset ); // a recursive call + //ReadHashDatum correctedReadDatum = new ReadHashDatum( readDatum ); + //correctedReadDatum.platform = defaultPlatform; + return 0; // BUGBUG: broken at the moment + //return getValue( correctedReadDatum, offset ); // a recursive call } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java index 636f5eb43..d6371b6c1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import java.util.HashMap; import org.broadinstitute.sting.utils.BaseUtils; +import net.sf.samtools.SAMRecord; /* * Copyright (c) 2009 The Broad Institute @@ -55,17 +56,17 @@ public class DinucCovariate implements Covariate { } // Used to pick out the covariate's value from attributes of the read - public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { + public final Comparable getValue( final SAMRecord read, final int offset ) { byte base; byte prevBase; // If this is a negative strand read then we need to reverse the direction for our previous base - if( readDatum.isNegStrand ) { - base = (byte)BaseUtils.simpleComplement( (char)readDatum.bases[offset] ); - prevBase = (byte)BaseUtils.simpleComplement( (char)readDatum.bases[offset + 1] ); + if( read.getReadNegativeStrandFlag() ) { + base = (byte)BaseUtils.simpleComplement( (char)(read.getReadBases()[offset]) ); + prevBase = (byte)BaseUtils.simpleComplement( (char)(read.getReadBases()[offset + 1]) ); } else { - base = readDatum.bases[offset]; - prevBase = readDatum.bases[offset - 1]; + base = read.getReadBases()[offset]; + prevBase = read.getReadBases()[offset - 1]; } //char[] charArray = {(char)prevBase, (char)base}; //return new String( charArray ); // This is an expensive call diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java index ae6fc552f..882785dde 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; +import net.sf.samtools.SAMRecord; + /* * Copyright (c) 2009 The Broad Institute * @@ -40,9 +42,9 @@ public class MappingQualityCovariate implements Covariate { } // Used to pick out the covariate's value from attributes of the read - public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { + public final Comparable getValue( final SAMRecord read, final int offset ) { - return readDatum.mappingQuality; + return read.getMappingQuality(); } // Used to get the covariate's value from input csv file in TableRecalibrationWalker diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java index d0f9699f1..ef7855c39 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; +import net.sf.samtools.SAMRecord; + /* * Copyright (c) 2009 The Broad Institute * @@ -44,15 +46,15 @@ public class MinimumNQSCovariate implements Covariate { } // Used to pick out the covariate's value from attributes of the read - public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { + public final Comparable getValue( final SAMRecord read, final int offset ) { // Loop over the list of base quality scores in the window and find the minimum - int minQual = readDatum.quals[offset]; + int minQual = read.getBaseQualities()[offset]; int minIndex = Math.max(offset - windowReach, 0); - int maxIndex = Math.min(offset + windowReach, readDatum.quals.length - 1); + int maxIndex = Math.min(offset + windowReach, read.getBaseQualities().length - 1); for ( int iii = minIndex; iii < maxIndex; iii++ ) { - if( readDatum.quals[iii] < minQual ) { - minQual = readDatum.quals[iii]; + if( read.getBaseQualities()[iii] < minQual ) { + minQual = read.getBaseQualities()[iii]; } } return minQual; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java index 6f4d6f787..17301460a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; +import net.sf.samtools.SAMRecord; + /* * Copyright (c) 2009 The Broad Institute * @@ -41,10 +43,10 @@ public class PositionCovariate implements Covariate { } // Used to pick out the covariate's value from attributes of the read - public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { + public final Comparable getValue( final SAMRecord read, final int offset ) { int cycle = offset; - if( readDatum.isNegStrand ) { - cycle = readDatum.bases.length - (offset + 1); + if( read.getReadNegativeStrandFlag() ) { + cycle = read.getReadLength() - (offset + 1); } return cycle; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java index 5445deb18..329ba75e6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; +import net.sf.samtools.SAMRecord; + /* * Copyright (c) 2009 The Broad Institute * @@ -42,8 +44,8 @@ public class PrimerRoundCovariate implements Covariate { } // Used to pick out the covariate's value from attributes of the read - public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { - if( readDatum.platform.equalsIgnoreCase( "SOLID" ) ) { + public final Comparable getValue( final SAMRecord read, final int offset ) { + if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) { return offset % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf } else { return 1; // nothing to do here because it is always the same diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java index ab083fefc..0b4a9e285 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; +import net.sf.samtools.SAMRecord; + /* * Copyright (c) 2009 The Broad Institute * @@ -40,9 +42,9 @@ public class QualityScoreCovariate implements Covariate { } // Used to pick out the covariate's value from attributes of the read - public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { + public final Comparable getValue( final SAMRecord read, final int offset ) { - return (int)(readDatum.quals[offset]); + return (int)(read.getBaseQualities()[offset]); } // Used to get the covariate's value from input csv file in TableRecalibrationWalker diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java index 400571864..fa847bcd1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; +import net.sf.samtools.SAMRecord; + /* * Copyright (c) 2009 The Broad Institute * @@ -42,8 +44,8 @@ public class ReadGroupCovariate implements Covariate{ } // Used to pick out the covariate's value from attributes of the read - public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) { - return readDatum.readGroup; + public final Comparable getValue( final SAMRecord read, final int offset ) { + return read.getReadGroup().getReadGroupId(); } // Used to get the covariate's value from input csv file in TableRecalibrationWalker diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadHashDatum.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadHashDatum.java deleted file mode 100755 index 3af1339c1..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadHashDatum.java +++ /dev/null @@ -1,133 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.recalibration; - -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMReadGroupRecord; -import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.Utils; -import edu.mit.broad.picard.illumina.parser.IlluminaUtil; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 14, 2009 - */ - -public class ReadHashDatum { - public String readGroup; - public String platform; - public byte[] quals; - public byte[] bases; - public boolean isNegStrand; - public int mappingQuality; - public int length; - public Integer tile; - private static boolean warnUserNullReadGroup = false; // Has the walker warned the user about null read groups yet? - - public ReadHashDatum(String _readGroup, String _platform, byte[] _quals, byte[] _bases, boolean _isNegStrand, - int _mappingQuality, int _length, Integer _tile) { - readGroup = _readGroup; - platform = _platform; - quals = _quals; - bases = _bases; - isNegStrand = _isNegStrand; - mappingQuality = _mappingQuality; - length = _length; - tile = _tile; - } - - public ReadHashDatum(ReadHashDatum that) { - this.readGroup = that.readGroup; - this.platform = that.platform; - this.quals = that.quals; - this.bases = that.bases; - this.isNegStrand = that.isNegStrand; - this.mappingQuality = that.mappingQuality; - this.length = that.length; - this.tile = that.tile; - } - - /** - * Pulls the important bits out of a SAMRecord and stuffs them into a leaner Object to be used by the Covariates and cached by the Recalibration walkers - * @param read The SAMRecord whose data we want to get at - * @param RAC The collection of arguments that are shared between CovariateCounterWalker and TableRecalibrationWalker - * @return A new ReadHashDatum holding all the important parts of the read - */ - public static ReadHashDatum parseSAMRecord( final SAMRecord read, final RecalibrationArgumentCollection RAC ) { - - // Lots of lines just pulling things out of the SAMRecord and stuffing into local variables, many edge cases to worry about - byte[] quals = read.getBaseQualities(); - - // Check if we need to use the original quality scores instead - if ( RAC.USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) { - Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG); - if ( obj instanceof String ) - quals = QualityUtils.fastqToPhred((String)obj); - else { - throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName())); - } - } - - final byte[] bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'. - final boolean isNegStrand = read.getReadNegativeStrandFlag(); - final SAMReadGroupRecord readGroup = read.getReadGroup(); - String readGroupId; - String platform; - - // If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments - if( readGroup == null ) { - if( !warnUserNullReadGroup && RAC.FORCE_READ_GROUP == null ) { - Utils.warnUser("The input .bam file contains reads with no read group. " + - "Defaulting to read group ID = " + RAC.DEFAULT_READ_GROUP + " and platform = " + RAC.DEFAULT_PLATFORM + ". " + - "First observed at read with name = " + read.getReadName() ); - warnUserNullReadGroup = true; - } - // There is no readGroup so defaulting to these values - readGroupId = RAC.DEFAULT_READ_GROUP; - platform = RAC.DEFAULT_PLATFORM; - } else { - readGroupId = readGroup.getReadGroupId(); - platform = readGroup.getPlatform(); - } - - final int mappingQuality = read.getMappingQuality(); - final int length = bases.length; - - if( RAC.FORCE_READ_GROUP != null ) { // Collapse all the read groups into a single common String provided by the user - readGroupId = RAC.FORCE_READ_GROUP; - } - - if( RAC.FORCE_PLATFORM != null ) { - platform = RAC.FORCE_PLATFORM; - } - - Integer tile = IlluminaUtil.getTileFromReadName(read.getReadName()); - - return new ReadHashDatum( readGroupId, platform, quals, bases, isNegStrand, mappingQuality, length, tile ); - - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index f8ac343f6..a1a69dd61 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -1,9 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.*; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMReadGroupRecord; + /* * Copyright (c) 2009 The Broad Institute * @@ -51,6 +56,7 @@ public class RecalDataManager { public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams + private static boolean warnUserNullReadGroup = false; RecalDataManager() { data = new NHashMap(); @@ -212,4 +218,49 @@ public class RecalDataManager { return dataCollapsedByCovariateDouble.get( covariate - 2 ); // Table of empirical qualities where everything except read group, quality score, and given covariate has been collapsed } } + + /** + * Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string + * @param read The read to adjust + * @param RAC The list of shared command line arguments + */ + public static void parseSAMRecord( final SAMRecord read, final RecalibrationArgumentCollection RAC) { + + // Check if we need to use the original quality scores instead + if ( RAC.USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) { + Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG); + if ( obj instanceof String ) + read.setBaseQualities( QualityUtils.fastqToPhred((String)obj) ); + else { + throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName())); + } + } + + SAMReadGroupRecord readGroup = read.getReadGroup(); + + // If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments + if( readGroup == null ) { + if( !warnUserNullReadGroup && RAC.FORCE_READ_GROUP == null ) { + Utils.warnUser("The input .bam file contains reads with no read group. " + + "Defaulting to read group ID = " + RAC.DEFAULT_READ_GROUP + " and platform = " + RAC.DEFAULT_PLATFORM + ". " + + "First observed at read with name = " + read.getReadName() ); + warnUserNullReadGroup = true; + } + // There is no readGroup so defaulting to these values + readGroup = new SAMReadGroupRecord( RAC.DEFAULT_READ_GROUP ); + readGroup.setPlatform( RAC.DEFAULT_PLATFORM ); + ((GATKSAMRecord)read).setReadGroup( readGroup ); + } + + if( RAC.FORCE_READ_GROUP != null && !read.getReadGroup().getReadGroupId().equals(RAC.FORCE_READ_GROUP) ) { // Collapse all the read groups into a single common String provided by the user + String oldPlatform = readGroup.getPlatform(); + readGroup = new SAMReadGroupRecord( RAC.FORCE_READ_GROUP ); + readGroup.setPlatform( oldPlatform ); + ((GATKSAMRecord)read).setReadGroup( readGroup ); + } + + if( RAC.FORCE_PLATFORM != null && !read.getReadGroup().getPlatform().equals(RAC.FORCE_PLATFORM)) { + read.getReadGroup().setPlatform( RAC.FORCE_PLATFORM ); + } + } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 68f9815bf..399412ae5 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -93,7 +93,7 @@ public class TableRecalibrationWalker extends ReadWalker