Updated solid_recal_modes to work with bfast aligned data. Added an integration test that uses the BFAST file provided by TGen.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2630 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-01-19 21:18:02 +00:00
parent 53352e1bb4
commit c98df0a862
8 changed files with 127 additions and 17 deletions

View File

@ -265,7 +265,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
if( BaseUtils.isRegularBase( (char)(bases[offset]) ) ) {
// SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
if( !read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( read, offset ) ) {
if( !read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( read, offset ) ) {
// This base finally passed all the checks for a good base, so add it to the big data hashmap
updateDataFromRead( read, offset, refBase );

View File

@ -51,7 +51,7 @@ public class CycleCovariate implements StandardCovariate {
// Initialize any member variables using the command-line arguments passed to the walkers
public void initialize( final RecalibrationArgumentCollection RAC ) {
if( RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SLX" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ILLUMINA" ) ||
RAC.DEFAULT_PLATFORM.contains( "454" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SOLID" ) ) {
RAC.DEFAULT_PLATFORM.contains( "454" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SOLID" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ABI_SOLID" ) ) {
defaultPlatform = RAC.DEFAULT_PLATFORM;
} else {
throw new StingException( "The requested default platform (" + RAC.DEFAULT_PLATFORM +") is not a recognized platform. Implemented options are illumina, 454, and solid");
@ -67,8 +67,8 @@ public class CycleCovariate implements StandardCovariate {
// ILLUMINA and SOLID
//-----------------------------
if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) ||
read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) { // Some bams have "illumina" and others have "SLX"
if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) || // Some bams have "illumina" and others have "SLX"
read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "ABI_SOLID" )) { // Some bams have "solid" and others have "ABI_SOLID"
cycle = offset + 1;
if( read.getReadNegativeStrandFlag() ) {
cycle = read.getReadLength() - offset;
@ -131,10 +131,10 @@ public class CycleCovariate implements StandardCovariate {
if( !warnedUserBadPlatform ) {
if( defaultPlatform != null) { // The user set a default platform
Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " +
"Reverting to " + defaultPlatform + " definition of machine cycle." );
"Reverting to platform = " + defaultPlatform + ". Users may set the default platform using the --default_platform <String> argument." );
} else { // The user did not set a default platform
Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " +
"Reverting to Illumina definition of machine cycle. Users may set the default platform using the --default_platform <String> argument." );
"Reverting to platform = Illumina. Users may set the default platform using the --default_platform <String> argument." );
defaultPlatform = "Illumina";
}
warnedUserBadPlatform = true;

View File

@ -57,7 +57,7 @@ public class RecalDataManager {
private static boolean warnUserNullReadGroup = false;
private static boolean warnUserNoColorSpace = false;
public static final String versionString = "v2.2.14"; // Major version, minor version, and build number
public static final String versionString = "v2.2.15"; // Major version, minor version, and build number
RecalDataManager() {
data = new NestedHashMap();
@ -253,7 +253,7 @@ public class RecalDataManager {
public static void parseColorSpace( final SAMRecord read ) {
// If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
if( read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") ) {
if( read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") ) {
if( read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null ) { // Haven't calculated the inconsistency array yet for this read
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
if( attr != null ) {
@ -360,7 +360,7 @@ public class RecalDataManager {
final char[] refBases, final boolean setBaseN ) {
final boolean negStrand = read.getReadNegativeStrandFlag();
for( int iii = 1; iii < originalQualScores.length - 1; iii++ ) {
for( int iii = 1; iii < originalQualScores.length; iii++ ) {
if( inconsistency[iii] == 1 ) {
if( (char)readBases[iii] == refBases[iii] ) {
if( negStrand ) { originalQualScores[originalQualScores.length-(iii+1)] = (byte)0; }

View File

@ -298,7 +298,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
final byte[] recalQuals = originalQuals.clone();
final String platform = read.getReadGroup().getPlatform();
if( platform.equalsIgnoreCase("SOLID") && !RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") ) {
if( platform.toUpperCase().contains("SOLID") && !RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") ) {
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, coinFlip, refBases );
}

View File

@ -46,7 +46,6 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
/////////////////////////////
// Command Line Arguments
/////////////////////////////
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
private String OUTPUT_DIR = "analyzeAnnotations/";
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is probably /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)

View File

@ -8,6 +8,31 @@ import java.util.*;
import java.io.PrintStream;
import java.io.FileNotFoundException;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* Created by IntelliJ IDEA.
* User: rpoplin

View File

@ -4,6 +4,31 @@ import org.broadinstitute.sting.utils.StingException;
import java.util.Comparator;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* Created by IntelliJ IDEA.
* User: rpoplin
@ -11,6 +36,7 @@ import java.util.Comparator;
*/
public class AnnotationDatum implements Comparator<AnnotationDatum> {
public final float value;
public int numTransitions;
public int numTransversions;

View File

@ -12,6 +12,7 @@ import java.io.File;
public class RecalibrationWalkersIntegrationTest extends WalkerTest {
static HashMap<String, String> paramsFiles = new HashMap<String, String>();
static HashMap<String, String> paramsFilesNoReadGroupTest = new HashMap<String, String>();
static HashMap<String, String> paramsFilesSolidIndels = new HashMap<String, String>();
@Test
public void testCountCovariates1() {
@ -50,9 +51,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
public void testTableRecalibrator1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "6c59d291c37d053e0f188b762f3060a5" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "e06f1397b9c40f75e96cd3df76730ee0");
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d0e902b071831bc10cc396e7e082b3c1");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7ebdce416b72679e1cf88cc9886a5edc" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "48ddc93cae054f9423f3a7ed9f36540e" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "467c7304cd049d1629c3675fdd61fc00" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -80,7 +81,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorMaxQ70() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "665711dfb81d67582b28faea24e26160" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "e7e6443bc4debc26e5e06b8765b60042" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -106,8 +107,67 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
}
}
//TODO -- Add an integration test which tests SOLiD files that contain indels to make sure the Cigar string is processed correctly in the solid_recal_modes
// Currently we don't have any such data
@Test
public void testCountCovariatesSolidIndelsRemoveRefBias() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "3889abcc7f6fe420f546fc049bfc2b5a" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
String md5 = entry.getValue();
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
" --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
" -T CountCovariates" +
" -I " + bam +
" -cov ReadGroupCovariate" +
" -cov QualityScoreCovariate" +
" -cov CycleCovariate" +
" -cov DinucCovariate" +
" -U" +
" -L 1:10,000,000-20,000,000" +
" --solid_recal_mode REMOVE_REF_BIAS" +
" -recalFile %s",
1, // just one output file
Arrays.asList(md5));
List<File> result = executeTest("testCountCovariatesSolidIndelsRemoveRefBias", spec).getFirst();
paramsFilesSolidIndels.put(bam, result.get(0).getAbsolutePath());
}
}
@Test
public void testTableRecalibratorSolidIndelsRemoveRefBias() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "a6eb2f8f531164b0a3cb19b4bb1d2f4f" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
String md5 = entry.getValue();
String paramsFile = paramsFilesSolidIndels.get(bam);
System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile);
if ( paramsFile != null ) {
WalkerTestSpec spec = new WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
" -T TableRecalibration" +
" -I " + bam +
" -outputBam %s" +
" --no_pg_tag" +
" -U" +
" -L 1:10,000,000-20,000,000" +
" --solid_recal_mode REMOVE_REF_BIAS" +
" -recalFile " + paramsFile,
1, // just one output file
Arrays.asList(md5));
executeTest("testTableRecalibratorSolidIndelsRemoveRefBias", spec);
}
}
}
@Test
public void testCountCovariatesVCF() {
@ -196,7 +256,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorNoReadGroups() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "32ad300e8c094ed2c1ec6c531180fe70" );
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "474e05b5a0f13776daebeb964a5e0e2b" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();