diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 24445fef6..cbf75a758 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -265,7 +265,7 @@ public class CovariateCounterWalker extends LocusWalker { if( BaseUtils.isRegularBase( (char)(bases[offset]) ) ) { // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it - if( !read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( read, offset ) ) { + if( !read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( read, offset ) ) { // This base finally passed all the checks for a good base, so add it to the big data hashmap updateDataFromRead( read, offset, refBase ); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index 5826263f9..1d3cd5c38 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -51,7 +51,7 @@ public class CycleCovariate implements StandardCovariate { // Initialize any member variables using the command-line arguments passed to the walkers public void initialize( final RecalibrationArgumentCollection RAC ) { if( RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SLX" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ILLUMINA" ) || - RAC.DEFAULT_PLATFORM.contains( "454" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SOLID" ) ) { + RAC.DEFAULT_PLATFORM.contains( "454" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SOLID" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ABI_SOLID" ) ) { defaultPlatform = RAC.DEFAULT_PLATFORM; } else { throw new StingException( "The requested default platform (" + RAC.DEFAULT_PLATFORM +") is not a recognized platform. Implemented options are illumina, 454, and solid"); @@ -67,8 +67,8 @@ public class CycleCovariate implements StandardCovariate { // ILLUMINA and SOLID //----------------------------- - if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) || - read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) { // Some bams have "illumina" and others have "SLX" + if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) || // Some bams have "illumina" and others have "SLX" + read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "ABI_SOLID" )) { // Some bams have "solid" and others have "ABI_SOLID" cycle = offset + 1; if( read.getReadNegativeStrandFlag() ) { cycle = read.getReadLength() - offset; @@ -131,10 +131,10 @@ public class CycleCovariate implements StandardCovariate { if( !warnedUserBadPlatform ) { if( defaultPlatform != null) { // The user set a default platform Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " + - "Reverting to " + defaultPlatform + " definition of machine cycle." ); + "Reverting to platform = " + defaultPlatform + ". Users may set the default platform using the --default_platform argument." ); } else { // The user did not set a default platform Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " + - "Reverting to Illumina definition of machine cycle. Users may set the default platform using the --default_platform argument." ); + "Reverting to platform = Illumina. Users may set the default platform using the --default_platform argument." ); defaultPlatform = "Illumina"; } warnedUserBadPlatform = true; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index a17221614..114b2672b 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -57,7 +57,7 @@ public class RecalDataManager { private static boolean warnUserNullReadGroup = false; private static boolean warnUserNoColorSpace = false; - public static final String versionString = "v2.2.14"; // Major version, minor version, and build number + public static final String versionString = "v2.2.15"; // Major version, minor version, and build number RecalDataManager() { data = new NestedHashMap(); @@ -253,7 +253,7 @@ public class RecalDataManager { public static void parseColorSpace( final SAMRecord read ) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base - if( read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") ) { + if( read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") ) { if( read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null ) { // Haven't calculated the inconsistency array yet for this read final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); if( attr != null ) { @@ -360,7 +360,7 @@ public class RecalDataManager { final char[] refBases, final boolean setBaseN ) { final boolean negStrand = read.getReadNegativeStrandFlag(); - for( int iii = 1; iii < originalQualScores.length - 1; iii++ ) { + for( int iii = 1; iii < originalQualScores.length; iii++ ) { if( inconsistency[iii] == 1 ) { if( (char)readBases[iii] == refBases[iii] ) { if( negStrand ) { originalQualScores[originalQualScores.length-(iii+1)] = (byte)0; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 807b7865f..c61557a0d 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -298,7 +298,7 @@ public class TableRecalibrationWalker extends ReadWalker { ///////////////////////////// // Command Line Arguments ///////////////////////////// - @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false) private String OUTPUT_DIR = "analyzeAnnotations/"; @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is probably /broad/tools/apps/R-2.6.0/bin/Rscript", required = false) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java index 9f85af89a..ce903a8ee 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java @@ -8,6 +8,31 @@ import java.util.*; import java.io.PrintStream; import java.io.FileNotFoundException; +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + /** * Created by IntelliJ IDEA. * User: rpoplin diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDatum.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDatum.java index 32f3b4021..7512938eb 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDatum.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDatum.java @@ -4,6 +4,31 @@ import org.broadinstitute.sting.utils.StingException; import java.util.Comparator; +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + /** * Created by IntelliJ IDEA. * User: rpoplin @@ -11,6 +36,7 @@ import java.util.Comparator; */ public class AnnotationDatum implements Comparator { + public final float value; public int numTransitions; public int numTransversions; diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index e9aa37be2..64fe63872 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -12,6 +12,7 @@ import java.io.File; public class RecalibrationWalkersIntegrationTest extends WalkerTest { static HashMap paramsFiles = new HashMap(); static HashMap paramsFilesNoReadGroupTest = new HashMap(); + static HashMap paramsFilesSolidIndels = new HashMap(); @Test public void testCountCovariates1() { @@ -50,9 +51,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { public void testTableRecalibrator1() { HashMap e = new HashMap(); e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "6c59d291c37d053e0f188b762f3060a5" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "e06f1397b9c40f75e96cd3df76730ee0"); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d0e902b071831bc10cc396e7e082b3c1"); e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7ebdce416b72679e1cf88cc9886a5edc" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "48ddc93cae054f9423f3a7ed9f36540e" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "467c7304cd049d1629c3675fdd61fc00" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -80,7 +81,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorMaxQ70() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "665711dfb81d67582b28faea24e26160" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "e7e6443bc4debc26e5e06b8765b60042" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -106,8 +107,67 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { } } - //TODO -- Add an integration test which tests SOLiD files that contain indels to make sure the Cigar string is processed correctly in the solid_recal_modes - // Currently we don't have any such data + + + @Test + public void testCountCovariatesSolidIndelsRemoveRefBias() { + HashMap e = new HashMap(); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "3889abcc7f6fe420f546fc049bfc2b5a" ); + + for ( Map.Entry entry : e.entrySet() ) { + String bam = entry.getKey(); + String md5 = entry.getValue(); + + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + oneKGLocation + "reference/human_b36_both.fasta" + + " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + + " -T CountCovariates" + + " -I " + bam + + " -cov ReadGroupCovariate" + + " -cov QualityScoreCovariate" + + " -cov CycleCovariate" + + " -cov DinucCovariate" + + " -U" + + " -L 1:10,000,000-20,000,000" + + " --solid_recal_mode REMOVE_REF_BIAS" + + " -recalFile %s", + 1, // just one output file + Arrays.asList(md5)); + List result = executeTest("testCountCovariatesSolidIndelsRemoveRefBias", spec).getFirst(); + paramsFilesSolidIndels.put(bam, result.get(0).getAbsolutePath()); + } + } + + @Test + public void testTableRecalibratorSolidIndelsRemoveRefBias() { + HashMap e = new HashMap(); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "a6eb2f8f531164b0a3cb19b4bb1d2f4f" ); + + for ( Map.Entry entry : e.entrySet() ) { + String bam = entry.getKey(); + String md5 = entry.getValue(); + String paramsFile = paramsFilesSolidIndels.get(bam); + System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile); + if ( paramsFile != null ) { + WalkerTestSpec spec = new WalkerTestSpec( + "-R " + oneKGLocation + "reference/human_b36_both.fasta" + + " -T TableRecalibration" + + " -I " + bam + + " -outputBam %s" + + " --no_pg_tag" + + " -U" + + " -L 1:10,000,000-20,000,000" + + " --solid_recal_mode REMOVE_REF_BIAS" + + " -recalFile " + paramsFile, + 1, // just one output file + Arrays.asList(md5)); + executeTest("testTableRecalibratorSolidIndelsRemoveRefBias", spec); + } + } + } + + + @Test public void testCountCovariatesVCF() { @@ -196,7 +256,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoReadGroups() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "32ad300e8c094ed2c1ec6c531180fe70" ); + e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "474e05b5a0f13776daebeb964a5e0e2b" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey();