diff --git a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index cc447b5f7..f44df4189 100755 --- a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -84,7 +84,6 @@ class AnalyzeCovariatesCLP extends CommandLineProgram { int lineNumber = 0; boolean foundAllCovariates = false; - int estimatedCapacity = 1; // Capacity is multiplicitive so this starts at one // Read in the covariates that were used from the input file requestedCovariates = new ArrayList(); @@ -108,7 +107,6 @@ class AnalyzeCovariatesCLP extends CommandLineProgram { try { Covariate covariate = (Covariate)covClass.newInstance(); requestedCovariates.add( covariate ); - estimatedCapacity *= covariate.estimatedNumberOfBins(); } catch ( InstantiationException e ) { throw new RuntimeException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) ); @@ -135,11 +133,6 @@ class AnalyzeCovariatesCLP extends CommandLineProgram { throw new RuntimeException( "Malformed input recalibration file. Covariate names can't be found in file: " + RECAL_FILE ); } - // Don't want to crash with out of heap space exception - if( estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed - estimatedCapacity = 300 * 40 * 200; - } - // Initialize any covariate member variables using the shared argument collection for( Covariate cov : requestedCovariates ) { cov.initialize( new RecalibrationArgumentCollection() ); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java index 0fb6c179f..70b8d5fcf 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java @@ -41,7 +41,6 @@ public interface Covariate { public void initialize( RecalibrationArgumentCollection RAC ); // Initialize any member variables using the command-line arguments passed to the walkers public Comparable getValue( SAMRecord read, int offset ); // Used to pick out the covariate's value from attributes of the read public Comparable getValue( String str ); // Used to get the covariate's value from input csv file in TableRecalibrationWalker - public int estimatedNumberOfBins(); // Used to estimate the amount space required for the full data HashMap } interface RequiredCovariate extends Covariate { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index 7d2fdfc65..c9e9ec5f9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -60,16 +60,16 @@ public class CycleCovariate implements StandardCovariate { // Used to pick out the covariate's value from attributes of the read public final Comparable getValue( final SAMRecord read, final int offset ) { - int cycle = 0; + int cycle = 1; //----------------------------- // ILLUMINA and SOLID //----------------------------- if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) { - cycle = offset; + cycle = offset + 1; if( read.getReadNegativeStrandFlag() ) { - cycle = read.getReadLength() - (offset + 1); + cycle = read.getReadLength() - offset; } } @@ -114,9 +114,9 @@ public class CycleCovariate implements StandardCovariate { //else if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) { // // The ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf - // int pos = offset; + // int pos = offset + 1; // if( read.getReadNegativeStrandFlag() ) { - // pos = read.getReadLength() - (offset + 1); + // pos = read.getReadLength() - offset; // } // cycle = pos / 5; // integer division //} @@ -153,9 +153,4 @@ public class CycleCovariate implements StandardCovariate { public final Comparable getValue( final String str ) { return Integer.parseInt( str ); } - - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 80; - } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java index 5133812dc..aa4d50b0a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java @@ -103,8 +103,4 @@ public class DinucCovariate implements StandardCovariate { } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 16; - } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java index b5ea4e8fa..cfeebdfd1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java @@ -95,8 +95,4 @@ public class HomopolymerCovariate implements ExperimentalCovariate { return Integer.parseInt( str ); } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return numBack + 1; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java index 589031c79..398884dbb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java @@ -51,8 +51,4 @@ public class MappingQualityCovariate implements ExperimentalCovariate { return Integer.parseInt( str ); } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 100; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java index af316c4a7..355e4bb5c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java @@ -66,8 +66,4 @@ public class MinimumNQSCovariate implements ExperimentalCovariate { return Integer.parseInt( str ); } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 40; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PairedReadOrderCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PairedReadOrderCovariate.java index 9d04be16e..d070d1b3a 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PairedReadOrderCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PairedReadOrderCovariate.java @@ -21,8 +21,4 @@ public class PairedReadOrderCovariate implements ExperimentalCovariate{ return str; } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 4; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java index 7c71db430..21a3bbef9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java @@ -56,8 +56,4 @@ public class PositionCovariate implements ExperimentalCovariate { return Integer.parseInt( str ); } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 100; - } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java index e8cc4313b..2613d6248 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java @@ -62,8 +62,4 @@ public class PrimerRoundCovariate implements ExperimentalCovariate { return Integer.parseInt( str ); } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 5; - } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java index 021130609..337fcf4b5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java @@ -51,8 +51,4 @@ public class QualityScoreCovariate implements RequiredCovariate { return Integer.parseInt( str ); } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 40; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java index 255417409..4a87c1f8d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java @@ -53,10 +53,6 @@ public class ReadGroupCovariate implements RequiredCovariate{ return str; } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 60; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index a25045ffe..53e8294c6 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -40,6 +40,7 @@ import net.sf.samtools.SAMReadGroupRecord; * * This helper class holds the data HashMap as well as submaps that represent the marginal distributions collapsed over all needed dimensions. * It also has static methods that are used to perform the various solid recalibration modes that attempt to correct the reference bias. + * This class holds the parsing methods that are shared between CountCovariates and TableRecalibration. */ public class RecalDataManager { @@ -347,10 +348,10 @@ public class RecalDataManager { originalQualScores[iii-1] = (byte)0; if( setBaseN ) { readBases[iii-1] = (byte)'N'; } } - if( !isMappedToRef || (char)readBases[iii+1] == refBases[iii+1] ) { - originalQualScores[iii+1] = (byte)0; - if( setBaseN ) { readBases[iii+1] = (byte)'N'; } - } + //if( !isMappedToRef || (char)readBases[iii+1] == refBases[iii+1] ) { + // originalQualScores[iii+1] = (byte)0; + // if( setBaseN ) { readBases[iii+1] = (byte)'N'; } + //} } } if( read.getReadNegativeStrandFlag() ) { @@ -382,9 +383,9 @@ public class RecalDataManager { throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG, read.getReadName())); } - for( int iii = 1; iii < inconsistency.length - 2; iii++ ) { + for( int iii = 1; iii < inconsistency.length - 1; iii++ ) { if( inconsistency[iii] == 1 ) { - for( int jjj = iii - 1; jjj <= iii + 1; jjj++ ) { // Correct this base and the one before it along the direction of the read + for( int jjj = iii - 1; jjj <= iii; jjj++ ) { // Correct this base and the one before it along the direction of the read if( !isMappedToRef || (char)readBases[jjj] == refBases[jjj] ) { if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin int rand = coinFlip.nextInt( 2 ); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 8c911dbf4..f445ac669 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -92,7 +92,7 @@ public class TableRecalibrationWalker extends ReadWalker 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed - estimatedCapacity = 300 * 40 * 200; - } final boolean createCollapsedTables = true; // Initialize any covariate member variables using the shared argument collection diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java index cd1551791..246930908 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java @@ -50,8 +50,4 @@ public class TileCovariate implements ExperimentalCovariate { return Integer.parseInt( str ); } - // Used to estimate the amount space required for the full data HashMap - public int estimatedNumberOfBins() { - return 120; - } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index d85341150..7772aeb66 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -16,10 +16,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariates1() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "c1b54d4221fb4fa88e0231a74310708e" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "337ea30c4dcc2fe6a9adc442ffd0706b"); - e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "ffbfd38b1720cfb67ba1bb63d4308552" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "60e227ea8c3409fa85b92cae7ea6574f" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "604c0d898c9df9acbeeade9979707546" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f03f6186f54bc3f841639a206d424d97"); + e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "fe873886e3efa3566f08a6ae26d71b43" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "996450b7c2e9ef68b9e76d57f2d5288a" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -48,10 +48,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibrator1() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "f7749792ffffbb86aec66e92a3bddf7f" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f1780e3c3e12f07527e0468149312f10"); - e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "c54a67a1687a4139a8ae19762217987f" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "d9ddbacdafc621d830a1db637973d795" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "d1a9b38f782af3edf223908cb71c7205" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "27b3eaf3c02ffc5fb3d7815468d9958e"); + e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b7f4d3bfb761f29531a37336615046ff" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1faadda11d5c0278575d2f0368a65f14" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -79,7 +79,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCF() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "3ee0e811682c0f29951128204765ece9"); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d90342547ed228cf446caf594586f4b0"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -106,7 +106,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesNoReadGroups() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "f49bc79225bffbf8b64590b65a4b4305" ); + e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "7e3045dcb2da1f4b305db7fa72bd1b51" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -135,7 +135,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoReadGroups() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "62413fdbfe99cd6e24992de4234de5bc" ); + e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "ad345fcfb2faaf97eb0291ffa61b3228" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey();