From 872a9d1c7bad615b7ae8e339b154af273e142c09 Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 23 Dec 2009 16:26:43 +0000 Subject: [PATCH] I'm making this change now (as opposed to waiting until Monday) to honor Tim's request. The cycle covariate is now first/second of pair aware. I'm taking it on faith from both Chris Hartl (waiting on slides from him) and Tim that this is the right thing to do. We'll have Ryan confirm it all next week. The only change is that if a read is the second of a pair, we multiple the cycle by -1 (a simple way of separating its index from that of its mate). Of course, this broke all integration tests. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2431 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/recalibration/CycleCovariate.java | 17 +++++++++----- .../RecalibrationWalkersIntegrationTest.java | 22 +++++++++---------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index 74956feef..bc57f754f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -60,16 +60,17 @@ public class CycleCovariate implements StandardCovariate { // Used to pick out the covariate's value from attributes of the read public final Comparable getValue( final SAMRecord read, final int offset ) { + int cycle = 0; + //----------------------------- // ILLUMINA //----------------------------- if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) ) { - int cycle = offset; + cycle = offset; if( read.getReadNegativeStrandFlag() ) { cycle = read.getReadLength() - (offset + 1); } - return cycle; } //----------------------------- @@ -77,7 +78,6 @@ public class CycleCovariate implements StandardCovariate { //----------------------------- else if( read.getReadGroup().getPlatform().contains( "454" ) ) { // Some bams have "LS454" and others have just "454" - int cycle = 0; byte[] bases = read.getReadBases(); // BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change @@ -106,8 +106,6 @@ public class CycleCovariate implements StandardCovariate { if( iii >= offset && !BaseUtils.isRegularBase(bases[iii]) ) { iii--; } } } - - return cycle; } //----------------------------- @@ -120,7 +118,7 @@ public class CycleCovariate implements StandardCovariate { if( read.getReadNegativeStrandFlag() ) { pos = read.getReadLength() - (offset + 1); } - return pos / 5; // integer division + cycle = pos / 5; // integer division } //----------------------------- @@ -142,6 +140,13 @@ public class CycleCovariate implements StandardCovariate { read.getReadGroup().setPlatform( defaultPlatform ); return getValue( read, offset ); // a recursive call } + + // TODO -- Ryan: sanity check me [EB] + // differentiate between first and second of pair + if ( read.getReadPairedFlag() && read.getSecondOfPairFlag() ) + cycle *= -1; + + return cycle; } // Used to get the covariate's value from input csv file in TableRecalibrationWalker diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 3436a075e..1fa61fbf1 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -16,10 +16,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariates1() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "16f87c9644b27c3c3fe7a963eed45d2d" ); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "5a4873064e602e1f945f3881c6b4a3e5"); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "42f78368cc85eb3963329ad48c244e80" ); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "f97ec61e98db1a580900d9f4985080d0" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "9eddef42ae847f2edd966d3a5b463e50" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "22ee532c7e521d8cbfabedb07673429d"); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "8b91c48f186c1e44723ac4e047ac553c" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "f586fcd8642deae6504d39430b20b713" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -49,10 +49,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibrator1() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "d7f3e0db5ed9fefc917144a0f937d50d" ); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "1f1770363f381d06e8db9fead03ad7bb"); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "563cdb25b5a963121299ad319d5ec2ae" ); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "dd8b61060914b75199ca5babc60248fa" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "8d68e99f5ef1138f25893a6b2725d2e9" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "5954b7246d2e0c5c65267026819d5933"); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "91fd3c557db56fd969523d8c06d71a0b" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "7dd4c6a3f318a1dccb3f33d08bff953b" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -80,7 +80,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCF() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "bc0e62dc366f2f6ca70847a832b4aad4"); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "79e6738031f5a6f993838b33d70716c3"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -108,7 +108,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesNoReadGroups() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "9d83a1653e076e028fecf4c2575f56e9" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "d6f3ea0cf2129ca5488977f087f1aa25" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -138,7 +138,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoReadGroups() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "8853add9cfd0a8171ca917b1c6452e0a" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "c0d5fdd6e07395ae2d7b08431995b30d" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey();