From f587ff46af8800fa70ca0408ca89bed635000a21 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Thu, 7 Jan 2010 22:51:41 +0000 Subject: [PATCH] Tile is now a standard covariate. By default the TileCovariate returns -1 if tile can't be derived from the read's name. Added a new command line option -throwTileException which will force TileCovariate to throw an exception if tile can't be derived for a read. Singleton covariates, such as any read group without tile info, must be skipped over in TableRecalibration so that the sequential formulation doesn't apply the same correction more than once. TileCovariate class has been added to the Early Access package. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2544 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/recalibration/RecalDataManager.java | 15 +++++++++++++++ .../RecalibrationArgumentCollection.java | 3 +++ .../gatk/walkers/recalibration/TileCovariate.java | 13 +++++++++++-- .../RecalibrationWalkersIntegrationTest.java | 15 ++++++++------- packages/GenomeAnalysisTK.xml | 3 +++ 5 files changed, 40 insertions(+), 9 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index 14565d141..7e1f60c2a 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -141,6 +141,7 @@ public class RecalDataManager { recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.data, smoothing, maxQual); for( NestedHashMap map : dataCollapsedByCovariate ) { recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual); + checkForSingletons(map.data); } } @@ -156,6 +157,20 @@ public class RecalDataManager { } } + private void checkForSingletons( final Map data ) { + + for( Object comp : data.keySet() ) { + final Object val = data.get(comp); + if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps + if( data.keySet().size() == 1) { + data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done in a previous sequential step + } + } else { // Another layer in the nested hash map + checkForSingletons( (Map) val ); + } + } + } + /** * Get the appropriate collapsed table out of the set of all the tables held by this Object * @param covariate Which covariate indexes the desired collapsed HashMap diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java index ed355404d..6e307b705 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java @@ -59,6 +59,9 @@ public class RecalibrationArgumentCollection { public int WINDOW_SIZE = 5; @Argument(fullName = "homopolymer_nback", shortName="nback", doc="The number of previous bases to look at in HomopolymerCovariate", required=false) public int HOMOPOLYMER_NBACK = 7; + @Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false) + public boolean EXCEPTION_IF_NO_TILE = false; + public boolean checkSolidRecalMode() { return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") || diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java index 246930908..03434be12 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ + package org.broadinstitute.sting.gatk.walkers.recalibration; import org.broadinstitute.sting.utils.StingException; @@ -30,17 +31,25 @@ import edu.mit.broad.picard.illumina.parser.IlluminaUtil; /** * @author alecw@broadinstitute.org */ -public class TileCovariate implements ExperimentalCovariate { + +public class TileCovariate implements StandardCovariate { + + private static boolean exceptionWhenNoTile = false; // Initialize any member variables using the command-line arguments passed to the walkers public void initialize( final RecalibrationArgumentCollection RAC ) { + exceptionWhenNoTile = RAC.EXCEPTION_IF_NO_TILE; } // Used to pick out the covariate's value from attributes of the read public Comparable getValue(final SAMRecord read, final int offset) { Integer tile = IlluminaUtil.getTileFromReadName(read.getReadName()); if (tile == null) { - throw new StingException("Tile number not defined for read"); + if( exceptionWhenNoTile ) { + throw new StingException( "Tile number not defined for read: " + read.getReadName() ); + } else { + return -1; + } } return tile; } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index ac61dddee..36201dc26 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -16,10 +16,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariates1() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "604c0d898c9df9acbeeade9979707546" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f03f6186f54bc3f841639a206d424d97"); - e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "fe873886e3efa3566f08a6ae26d71b43" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "996450b7c2e9ef68b9e76d57f2d5288a" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "e5b2d5a2f4283718dae678cbc84be847" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "ff1b3a18b67b09560cacc3b5dea0a034"); + e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7d6428a76e07ed4b99351aa4df89634d" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "a93b0263acdc856b885f95848852140d" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -36,6 +36,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -cov QualityScoreCovariate" + " -cov CycleCovariate" + " -cov DinucCovariate" + + " -cov TileCovariate" + " --solid_recal_mode SET_Q_ZERO" + " -recalFile %s", 1, // just one output file @@ -48,10 +49,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibrator1() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "d1a9b38f782af3edf223908cb71c7205" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "6c59d291c37d053e0f188b762f3060a5" ); e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "27b3eaf3c02ffc5fb3d7815468d9958e"); - e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b7f4d3bfb761f29531a37336615046ff" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1faadda11d5c0278575d2f0368a65f14" ); + e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7ebdce416b72679e1cf88cc9886a5edc" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "a39afc94ed74f8137c9d43285997bd90" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); diff --git a/packages/GenomeAnalysisTK.xml b/packages/GenomeAnalysisTK.xml index 33f9f5e95..9a32eb94d 100644 --- a/packages/GenomeAnalysisTK.xml +++ b/packages/GenomeAnalysisTK.xml @@ -25,6 +25,7 @@ org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate + org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate org.broadinstitute.sting.gatk.walkers.indels.CleanedReadInjector org.broadinstitute.sting.gatk.walkers.indels.IndelIntervalWalker @@ -62,6 +63,8 @@ org.broadinstitute.sting.gatk.walkers.recalibration.PrimerRoundCovariate org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate + org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate + org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate