From 0a6bd5a270ce99892469247b2a841624f2e4a5c3 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Mon, 4 Jan 2010 20:52:15 +0000 Subject: [PATCH] CycleCovariate is now one-based so that 0 and -0 don't collide with each other. Solid recal modes now only change the inconsistent base and the previous base (along the direction of the read) instead of both the bases before and after. Removed estimatedNumberOfBins from the Covariate interface because it wasn't being used. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2498 348d0f76-0448-11de-a6fe-93d51630548a --- .../analyzecovariates/AnalyzeCovariates.java | 7 ------ .../gatk/walkers/recalibration/Covariate.java | 1 - .../walkers/recalibration/CycleCovariate.java | 15 +++++-------- .../walkers/recalibration/DinucCovariate.java | 4 ---- .../recalibration/HomopolymerCovariate.java | 4 ---- .../MappingQualityCovariate.java | 4 ---- .../recalibration/MinimumNQSCovariate.java | 4 ---- .../PairedReadOrderCovariate.java | 4 ---- .../recalibration/PositionCovariate.java | 4 ---- .../recalibration/PrimerRoundCovariate.java | 4 ---- .../recalibration/QualityScoreCovariate.java | 4 ---- .../recalibration/ReadGroupCovariate.java | 4 ---- .../recalibration/RecalDataManager.java | 13 ++++++----- .../TableRecalibrationWalker.java | 8 +------ .../walkers/recalibration/TileCovariate.java | 4 ---- .../RecalibrationWalkersIntegrationTest.java | 22 +++++++++---------- 16 files changed, 24 insertions(+), 82 deletions(-) diff --git a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index cc447b5f7..f44df4189 100755 --- a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -84,7 +84,6 @@ class AnalyzeCovariatesCLP extends CommandLineProgram { int lineNumber = 0; boolean foundAllCovariates = false; - int estimatedCapacity = 1; // Capacity is multiplicitive so this starts at one // Read in the covariates that were used from the input file requestedCovariates = new ArrayList(); @@ -108,7 +107,6 @@ class AnalyzeCovariatesCLP extends CommandLineProgram { try { Covariate covariate = (Covariate)covClass.newInstance(); requestedCovariates.add( covariate ); - estimatedCapacity *= covariate.estimatedNumberOfBins(); } catch ( InstantiationException e ) { throw new RuntimeException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) ); @@ -135,11 +133,6 @@ class AnalyzeCovariatesCLP extends CommandLineProgram { throw new RuntimeException( "Malformed input recalibration file. Covariate names can't be found in file: " + RECAL_FILE ); } - // Don't want to crash with out of heap space exception - if( estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed - estimatedCapacity = 300 * 40 * 200; - } - // Initialize any covariate member variables using the shared argument collection for( Covariate cov : requestedCovariates ) { cov.initialize( new RecalibrationArgumentCollection() ); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java index 0fb6c179f..70b8d5fcf 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java @@ -41,7 +41,6 @@ public interface Covariate { public void initialize( RecalibrationArgumentCollection RAC ); // Initialize any member variables using the command-line arguments passed to the walkers public Comparable getValue( SAMRecord read, int offset ); // Used to pick out the covariate's value from attributes of the read public Comparable getValue( String str ); // Used to get the covariate's value from input csv file in TableRecalibrationWalker - public int estimatedNumberOfBins(); // Used to estimate the amount space required for the full data HashMap } interface RequiredCovariate extends Covariate { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index 7d2fdfc65..c9e9ec5f9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -60,16 +60,16 @@ public class CycleCovariate implements StandardCovariate { // Used to pick out the covariate's value from attributes of the read public final Comparable getValue( final SAMRecord read, final int offset ) { - int cycle = 0; + int cycle = 1; //----------------------------- // ILLUMINA and SOLID //----------------------------- if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) { - cycle = offset; + cycle = offset + 1; if( read.getReadNegativeStrandFlag() ) { - cycle = read.getReadLength() - (offset + 1); + cycle = read.getReadLength() - offset; } } @@ -114,9 +114,9 @@ public class CycleCovariate implements StandardCovariate { //else if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) { // // The ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf - // int pos = offset; + // int pos = offset + 1; // if( read.getReadNegativeStrandFlag() ) { - // pos = read.getReadLength() - (offset + 1); + // pos = read.getReadLength() - offset; // } // cycle = pos / 5; // integer division //} @@ -153,9 +153,4 @@ public class CycleCovariate implements StandardCovariate { public final Comparable getValue( final String str ) { return Integer.parseInt( str ); } - - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 80; - } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java index 5133812dc..aa4d50b0a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java @@ -103,8 +103,4 @@ public class DinucCovariate implements StandardCovariate { } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 16; - } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java index b5ea4e8fa..cfeebdfd1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java @@ -95,8 +95,4 @@ public class HomopolymerCovariate implements ExperimentalCovariate { return Integer.parseInt( str ); } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return numBack + 1; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java index 589031c79..398884dbb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java @@ -51,8 +51,4 @@ public class MappingQualityCovariate implements ExperimentalCovariate { return Integer.parseInt( str ); } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 100; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java index af316c4a7..355e4bb5c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java @@ -66,8 +66,4 @@ public class MinimumNQSCovariate implements ExperimentalCovariate { return Integer.parseInt( str ); } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 40; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PairedReadOrderCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PairedReadOrderCovariate.java index 9d04be16e..d070d1b3a 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PairedReadOrderCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PairedReadOrderCovariate.java @@ -21,8 +21,4 @@ public class PairedReadOrderCovariate implements ExperimentalCovariate{ return str; } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 4; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java index 7c71db430..21a3bbef9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java @@ -56,8 +56,4 @@ public class PositionCovariate implements ExperimentalCovariate { return Integer.parseInt( str ); } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 100; - } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java index e8cc4313b..2613d6248 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java @@ -62,8 +62,4 @@ public class PrimerRoundCovariate implements ExperimentalCovariate { return Integer.parseInt( str ); } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 5; - } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java index 021130609..337fcf4b5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java @@ -51,8 +51,4 @@ public class QualityScoreCovariate implements RequiredCovariate { return Integer.parseInt( str ); } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 40; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java index 255417409..4a87c1f8d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java @@ -53,10 +53,6 @@ public class ReadGroupCovariate implements RequiredCovariate{ return str; } - // Used to estimate the amount space required for the full data HashMap - public final int estimatedNumberOfBins() { - return 60; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index a25045ffe..53e8294c6 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -40,6 +40,7 @@ import net.sf.samtools.SAMReadGroupRecord; * * This helper class holds the data HashMap as well as submaps that represent the marginal distributions collapsed over all needed dimensions. * It also has static methods that are used to perform the various solid recalibration modes that attempt to correct the reference bias. + * This class holds the parsing methods that are shared between CountCovariates and TableRecalibration. */ public class RecalDataManager { @@ -347,10 +348,10 @@ public class RecalDataManager { originalQualScores[iii-1] = (byte)0; if( setBaseN ) { readBases[iii-1] = (byte)'N'; } } - if( !isMappedToRef || (char)readBases[iii+1] == refBases[iii+1] ) { - originalQualScores[iii+1] = (byte)0; - if( setBaseN ) { readBases[iii+1] = (byte)'N'; } - } + //if( !isMappedToRef || (char)readBases[iii+1] == refBases[iii+1] ) { + // originalQualScores[iii+1] = (byte)0; + // if( setBaseN ) { readBases[iii+1] = (byte)'N'; } + //} } } if( read.getReadNegativeStrandFlag() ) { @@ -382,9 +383,9 @@ public class RecalDataManager { throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG, read.getReadName())); } - for( int iii = 1; iii < inconsistency.length - 2; iii++ ) { + for( int iii = 1; iii < inconsistency.length - 1; iii++ ) { if( inconsistency[iii] == 1 ) { - for( int jjj = iii - 1; jjj <= iii + 1; jjj++ ) { // Correct this base and the one before it along the direction of the read + for( int jjj = iii - 1; jjj <= iii; jjj++ ) { // Correct this base and the one before it along the direction of the read if( !isMappedToRef || (char)readBases[jjj] == refBases[jjj] ) { if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin int rand = coinFlip.nextInt( 2 ); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 8c911dbf4..f445ac669 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -92,7 +92,7 @@ public class TableRecalibrationWalker extends ReadWalker 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed - estimatedCapacity = 300 * 40 * 200; - } final boolean createCollapsedTables = true; // Initialize any covariate member variables using the shared argument collection diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java index cd1551791..246930908 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java @@ -50,8 +50,4 @@ public class TileCovariate implements ExperimentalCovariate { return Integer.parseInt( str ); } - // Used to estimate the amount space required for the full data HashMap - public int estimatedNumberOfBins() { - return 120; - } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index d85341150..7772aeb66 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -16,10 +16,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariates1() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "c1b54d4221fb4fa88e0231a74310708e" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "337ea30c4dcc2fe6a9adc442ffd0706b"); - e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "ffbfd38b1720cfb67ba1bb63d4308552" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "60e227ea8c3409fa85b92cae7ea6574f" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "604c0d898c9df9acbeeade9979707546" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f03f6186f54bc3f841639a206d424d97"); + e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "fe873886e3efa3566f08a6ae26d71b43" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "996450b7c2e9ef68b9e76d57f2d5288a" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -48,10 +48,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibrator1() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "f7749792ffffbb86aec66e92a3bddf7f" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f1780e3c3e12f07527e0468149312f10"); - e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "c54a67a1687a4139a8ae19762217987f" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "d9ddbacdafc621d830a1db637973d795" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "d1a9b38f782af3edf223908cb71c7205" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "27b3eaf3c02ffc5fb3d7815468d9958e"); + e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b7f4d3bfb761f29531a37336615046ff" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1faadda11d5c0278575d2f0368a65f14" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -79,7 +79,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCF() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "3ee0e811682c0f29951128204765ece9"); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d90342547ed228cf446caf594586f4b0"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -106,7 +106,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesNoReadGroups() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "f49bc79225bffbf8b64590b65a4b4305" ); + e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "7e3045dcb2da1f4b305db7fa72bd1b51" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -135,7 +135,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoReadGroups() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "62413fdbfe99cd6e24992de4234de5bc" ); + e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "ad345fcfb2faaf97eb0291ffa61b3228" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey();