diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index c9e9ec5f9..6c601a61c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -39,7 +39,7 @@ import net.sf.samtools.SAMRecord; * For Solexa the cycle is simply the position in the read (counting backwards if it is a negative strand read) * For 454 the cycle is the TACG flow cycle, that is, each flow grabs all the TACG's in order in a single cycle * For example, for the read: AAACCCCGAAATTTTTACTG - * the cycle would be 00000000111222222233 + * the cycle would be 11111111222333333344 * For SOLiD the cycle is a more complicated mixture of ligation cycle and primer round */ @@ -82,7 +82,7 @@ public class CycleCovariate implements StandardCovariate { // BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change // For example, AAAAAAA was probably read in two flow cycles but here we count it as one - if( !read.getReadNegativeStrandFlag() ) { // forward direction + if( !read.getReadNegativeStrandFlag() ) { // Forward direction int iii = 0; while( iii <= offset ) { @@ -94,7 +94,7 @@ public class CycleCovariate implements StandardCovariate { if( iii <= offset && !BaseUtils.isRegularBase(bases[iii]) ) { iii++; } } - } else { // negative direction + } else { // Negative direction int iii = bases.length-1; while( iii >= offset ) { @@ -141,10 +141,11 @@ public class CycleCovariate implements StandardCovariate { return getValue( read, offset ); // A recursive call } + // This functionality was moved to PairedReadOrderCovariate // Differentiate between first and second of pair - if( read.getReadPairedFlag() && read.getSecondOfPairFlag() ) { - cycle *= -1; - } + //if( read.getReadPairedFlag() && read.getSecondOfPairFlag() ) { + // cycle *= -1; + //} return cycle; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PairedReadOrderCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PairedReadOrderCovariate.java index d070d1b3a..bc3ffda0d 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PairedReadOrderCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PairedReadOrderCovariate.java @@ -6,17 +6,19 @@ import net.sf.samtools.SAMRecord; * Created by IntelliJ IDEA. * User: chartl * Date: Dec 16, 2009 - * Time: 3:22:19 PM - * To change this template use File | Settings | File Templates. */ -public class PairedReadOrderCovariate implements ExperimentalCovariate{ - public void initialize (final RecalibrationArgumentCollection rac ) { /* do nothing */ } +public class PairedReadOrderCovariate implements StandardCovariate{ - public final Comparable getValue(final SAMRecord read, final int offset) { - return read.getReadPairedFlag() ? "Not_Paired" : read.getMateUnmappedFlag() ? "Mate_Unmapped" : read.getFirstOfPairFlag() ? "First_Read" : "Second_Read"; + // Initialize any member variables using the command-line arguments passed to the walkers + public void initialize ( final RecalibrationArgumentCollection rac ) { /* do nothing */ } + + // Used to pick out the covariate's value from attributes of the read + public final Comparable getValue( final SAMRecord read, final int offset ) { + return read.getReadPairedFlag() ? (read.getFirstOfPairFlag() ? "First_Read" : "Second_Read") : "Not_Paired"; } + // Used to get the covariate's value from input csv file in TableRecalibrationWalker public final Comparable getValue( final String str ) { return str; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index c8ea00bd1..586b30050 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -123,7 +123,7 @@ public class RecalDataManager { for( int iii = 0; iii < dataCollapsedByCovariate.size(); iii++ ) { covariateCollapsedKey[0] = key[0]; // Make a new key with the read group ... covariateCollapsedKey[1] = key[1]; // and quality score ... - Object theCovariateElement = key[iii + 2]; // and the given covariate + final Object theCovariateElement = key[iii + 2]; // and the given covariate if( theCovariateElement != null ) { covariateCollapsedKey[2] = theCovariateElement; collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(iii).get( covariateCollapsedKey ); @@ -202,7 +202,7 @@ public class RecalDataManager { // Check if we need to use the original quality scores instead if( RAC.USE_ORIGINAL_QUALS ) { - Object attr = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG); + final Object attr = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG); if( attr != null ) { if( attr instanceof String ) { read.setBaseQualities( QualityUtils.fastqToPhred((String)attr) ); @@ -229,7 +229,7 @@ public class RecalDataManager { } if( RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP) ) { // Collapse all the read groups into a single common String provided by the user - String oldPlatform = readGroup.getPlatform(); + final String oldPlatform = readGroup.getPlatform(); readGroup = new SAMReadGroupRecord( RAC.FORCE_READ_GROUP ); readGroup.setPlatform( oldPlatform ); ((GATKSAMRecord)read).setReadGroup( readGroup ); @@ -252,7 +252,7 @@ public class RecalDataManager { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base if( read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") ) { if( read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null ) { // Haven't calculated the inconsistency array yet for this read - Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); + final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); if( attr != null ) { char[] colorSpace; if( attr instanceof String ) { @@ -309,7 +309,7 @@ public class RecalDataManager { // Loop over the read and calculate first the infered bases from the color and then check if it is consistent with the read byte[] readBases = read.getReadBases(); - byte[] colorImpliedBases = readBases.clone(); + final byte[] colorImpliedBases = readBases.clone(); char[] refBasesDirRead = refBases; if( read.getReadNegativeStrandFlag() ) { readBases = BaseUtils.simpleReverseComplement( read.getReadBases() ); @@ -489,7 +489,7 @@ public class RecalDataManager { return inconsistency[offset] != 0; } - // This block of code is if you want to check both the offset and the next base for color space inconsistency + // This block of code is for if you want to check both the offset and the next base for color space inconsistency //if( read.getReadNegativeStrandFlag() ) { // Negative direction // if( offset == 0 ) { // return inconsistency[0] != 0; @@ -503,6 +503,7 @@ public class RecalDataManager { // return (inconsistency[offset] != 0) || (inconsistency[offset + 1] != 0); // } //} + } else { return false; } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 65ad18d84..901819f5f 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -16,10 +16,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariates1() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "e5b2d5a2f4283718dae678cbc84be847" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "89084b43b824f9e3c5e2afdfe0930542"); - e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7d6428a76e07ed4b99351aa4df89634d" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "6ede6fc840c4e5070a58a919b48e7504" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "e67a329112061beba0f774844e5f4635" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "a626fa8f9c3836ec9764a718c4b6a27f"); + e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "284939b3760a95f51c350c8340b99b31" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "c7c28128edf4a54e6ac9e752ebaca697" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -37,6 +37,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -cov CycleCovariate" + " -cov DinucCovariate" + " -cov TileCovariate" + + " -cov PairedReadOrderCovariate" + " --solid_recal_mode SET_Q_ZERO" + " -recalFile %s", 1, // just one output file @@ -49,10 +50,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibrator1() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "6c59d291c37d053e0f188b762f3060a5" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "e06f1397b9c40f75e96cd3df76730ee0"); - e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7ebdce416b72679e1cf88cc9886a5edc" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "48ddc93cae054f9423f3a7ed9f36540e" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "c0dbc69726e1827e1eabdc777d8435e2" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "b6a1749aa311f5c50097280579c638a3"); + e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b5bd41672805e15c6a5187c902c1ef9a" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "51aeb06db11b66d9e1199ebed9c0562c" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -80,7 +81,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorMaxQ70() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "665711dfb81d67582b28faea24e26160" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "df4f9bf64cf1a7aa890b28de043cac40" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -109,7 +110,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCF() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "9b9d21ffb70f15ef2aebad21f3fc05cb"); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "e11048d253a5ca719a373a9dd8e18f40"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -136,7 +137,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCFPlusDBsnp() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "cc1cc9c1ff184d388d81574fdccc608e"); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "33fa86ad0116f727dd6428bb5a2daf30"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -164,7 +165,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesNoReadGroups() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "a86c64f649b847b7f81ac50a808d3d45" ); + e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "e7031fdf3a88c27ce1b3d5d425860286" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -193,7 +194,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoReadGroups() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "32ad300e8c094ed2c1ec6c531180fe70" ); + e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "de1a08b45250f090f19e6a35d449c50c" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); diff --git a/packages/GenomeAnalysisTK.xml b/packages/GenomeAnalysisTK.xml index 9a32eb94d..4c917ad2b 100644 --- a/packages/GenomeAnalysisTK.xml +++ b/packages/GenomeAnalysisTK.xml @@ -26,6 +26,7 @@ org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate + org.broadinstitute.sting.gatk.walkers.recalibration.PairedReadOrderCovariate org.broadinstitute.sting.gatk.walkers.indels.CleanedReadInjector org.broadinstitute.sting.gatk.walkers.indels.IndelIntervalWalker @@ -65,6 +66,7 @@ org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate + org.broadinstitute.sting.gatk.walkers.recalibration.PairedReadOrderCovariate