diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 15186f166..1ce05679c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -258,7 +258,6 @@ public class CovariateCounterWalker extends LocusWalker { SAMRecord read; int offset; byte refBase; - byte prevBase; byte[] bases; // For each read at this locus @@ -269,39 +268,29 @@ public class CovariateCounterWalker extends LocusWalker { RecalDataManager.parseSAMRecord( read, RAC ); RecalDataManager.parseColorSpace( read ); - // Skip first and last base because there is no dinuc - // BUGBUG: Technically we only have to skip the first base on forward reads and the last base on negative strand reads. Change after passing old integration tests. - if( offset > 0 && offset < read.getReadLength() - 1) { - // Skip if base quality is zero - if( read.getBaseQualities()[offset] > 0 ) { + // Skip if base quality is zero + if( read.getBaseQualities()[offset] > 0 ) { - bases = read.getReadBases(); - refBase = (byte)ref.getBase(); - prevBase = bases[offset - 1]; + bases = read.getReadBases(); + refBase = (byte)ref.getBase(); - // DinucCovariate is responsible for getting the complement bases if needed - if( read.getReadNegativeStrandFlag() ) { - prevBase = bases[offset + 1]; - } + // Skip if this base is an 'N' or etc. + // BUGBUG: For DinucCovariate we should use previous reference base, not the previous base in this read. + if( BaseUtils.isRegularBase( (char)(bases[offset]) ) ) { - // Skip if this base or the previous one was an 'N' or etc. - // BUGBUG: For DinucCovariate we should use previous reference base, not the previous base in this read. - if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)(bases[offset]) ) ) { + // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it + if( !read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( read, offset ) ) { - // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it - if( !read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( read, offset ) ) { + // This base finally passed all the checks for a good base, so add it to the big data hashmap + updateDataFromRead( read, offset, refBase ); - // This base finally passed all the checks for a good base, so add it to the big data hashmap - updateDataFromRead( read, offset, refBase ); - - } else { // calculate SOLID reference insertion rate - if( ref.getBase() == (char)bases[offset] ) { - solidInsertedReferenceBases++; - } else { - otherColorSpaceInconsistency++; - } + } else { // calculate SOLID reference insertion rate + if( ref.getBase() == (char)bases[offset] ) { + solidInsertedReferenceBases++; + } else { + otherColorSpaceInconsistency++; } - } + } } } } @@ -380,7 +369,7 @@ public class CovariateCounterWalker extends LocusWalker { // Loop through the list of requested covariates and pick out the value from the read, offset, and reference for( Covariate covariate : requestedCovariates ) { - key.add( covariate.getValue( read, offset ) ); + key.add(covariate.getValue( read, offset )); } // Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index bc57f754f..fab76357d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -141,7 +141,6 @@ public class CycleCovariate implements StandardCovariate { return getValue( read, offset ); // a recursive call } - // TODO -- Ryan: sanity check me [EB] // differentiate between first and second of pair if ( read.getReadPairedFlag() && read.getSecondOfPairFlag() ) cycle *= -1; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java index 1278e7292..a2a7d79d4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java @@ -42,6 +42,9 @@ import net.sf.samtools.SAMRecord; public class DinucCovariate implements StandardCovariate { + private static final byte NO_CALL = (byte)'N'; + private static final Dinuc NO_DINUC = new Dinuc(NO_CALL, NO_CALL); + HashMap dinucHashMap; // Initialize any member variables using the command-line arguments passed to the walkers @@ -53,6 +56,8 @@ public class DinucCovariate implements StandardCovariate { dinucHashMap.put( Dinuc.hashBytes(byte1, byte2), new Dinuc(byte1, byte2) ); // This might seem silly, but Strings are too slow } } + // add the "no dinuc" entry too + dinucHashMap.put( Dinuc.hashBytes(NO_CALL, NO_CALL), NO_DINUC); } // Used to pick out the covariate's value from attributes of the read @@ -63,12 +68,23 @@ public class DinucCovariate implements StandardCovariate { byte[] bases = read.getReadBases(); // If this is a negative strand read then we need to reverse the direction for our previous base if( read.getReadNegativeStrandFlag() ) { + // no dinuc at the beginning of the read + if( offset == bases.length-1 ) + return NO_DINUC; base = (byte)BaseUtils.simpleComplement( (char)(bases[offset]) ); prevBase = (byte)BaseUtils.simpleComplement( (char)(bases[offset + 1]) ); } else { + // no dinuc at the beginning of the read + if( offset == 0 ) + return NO_DINUC; base = bases[offset]; prevBase = bases[offset - 1]; } + + // make sure the previous base is good + if( !BaseUtils.isRegularBase(prevBase) ) + return NO_DINUC; + return dinucHashMap.get( Dinuc.hashBytes( prevBase, base ) ); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index cd2c74273..c54e34375 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -468,8 +468,9 @@ public class RecalDataManager { * @return Returns true if the base is inconsistent with the color space */ public static boolean isInconsistentColorSpace( final SAMRecord read, final int offset ) { - if( read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) != null ) { - byte[] colorSpace = ((byte[])read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG)); + Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG); + if( attr != null ) { + byte[] colorSpace = (byte[])attr; return colorSpace[offset] != 0; } else { return false; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index bb3b79161..b61e04a7a 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -310,17 +310,10 @@ public class TableRecalibrationWalker extends ReadWalker e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "9eddef42ae847f2edd966d3a5b463e50" ); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "22ee532c7e521d8cbfabedb07673429d"); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "8b91c48f186c1e44723ac4e047ac553c" ); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "f586fcd8642deae6504d39430b20b713" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "c1b54d4221fb4fa88e0231a74310708e" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "95e26e8247d0c5e43705048c5ee64873"); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "ffbfd38b1720cfb67ba1bb63d4308552" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "427306f07f5fd905439b28a770f3d3d6" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -49,10 +49,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibrator1() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "a3d4153765625a451bee768ad46775b3" ); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "5954b7246d2e0c5c65267026819d5933"); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "91fd3c557db56fd969523d8c06d71a0b" ); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "9655f57949b01d6ee8a7c9b92e61a47f" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "ca839a4eb0ef443e0486b96843304f92" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "fd27c3ab424ef01c77d2dbdedf721a8a"); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b6e16538bda18336f176417cdf686f6a" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "be9a46e0d9b7ad90ce303d08dfb7b4de" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -80,7 +80,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCF() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "79e6738031f5a6f993838b33d70716c3"); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "5e132283b906f6de3328986fd0101be7"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -108,7 +108,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesNoReadGroups() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "d6f3ea0cf2129ca5488977f087f1aa25" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "199979b483f6c03c0977141f4fea9961" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -138,7 +138,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoReadGroups() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "c0d5fdd6e07395ae2d7b08431995b30d" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "2889c63854e233fadd9178ccf5b2517b" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -166,7 +166,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesNoIndex() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "f5a0adce5458ab661029fa5056a55551" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "906e5a08401722cc9a5528d2fd20ea6a" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -193,7 +193,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoIndex() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "33cb41fa3e959ac15ac9af329167a736" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "aa38b04c6b58badabb6b09d590284a2a" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey();