Updated solid_recal_modes to work with bfast aligned data. Added an integration test that uses the BFAST file provided by TGen.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2630 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
53352e1bb4
commit
c98df0a862
|
|
@ -265,7 +265,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
if( BaseUtils.isRegularBase( (char)(bases[offset]) ) ) {
|
||||
|
||||
// SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
|
||||
if( !read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( read, offset ) ) {
|
||||
if( !read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( read, offset ) ) {
|
||||
|
||||
// This base finally passed all the checks for a good base, so add it to the big data hashmap
|
||||
updateDataFromRead( read, offset, refBase );
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
public void initialize( final RecalibrationArgumentCollection RAC ) {
|
||||
if( RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SLX" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ILLUMINA" ) ||
|
||||
RAC.DEFAULT_PLATFORM.contains( "454" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SOLID" ) ) {
|
||||
RAC.DEFAULT_PLATFORM.contains( "454" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SOLID" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ABI_SOLID" ) ) {
|
||||
defaultPlatform = RAC.DEFAULT_PLATFORM;
|
||||
} else {
|
||||
throw new StingException( "The requested default platform (" + RAC.DEFAULT_PLATFORM +") is not a recognized platform. Implemented options are illumina, 454, and solid");
|
||||
|
|
@ -67,8 +67,8 @@ public class CycleCovariate implements StandardCovariate {
|
|||
// ILLUMINA and SOLID
|
||||
//-----------------------------
|
||||
|
||||
if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) ||
|
||||
read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) { // Some bams have "illumina" and others have "SLX"
|
||||
if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) || // Some bams have "illumina" and others have "SLX"
|
||||
read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "ABI_SOLID" )) { // Some bams have "solid" and others have "ABI_SOLID"
|
||||
cycle = offset + 1;
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
cycle = read.getReadLength() - offset;
|
||||
|
|
@ -131,10 +131,10 @@ public class CycleCovariate implements StandardCovariate {
|
|||
if( !warnedUserBadPlatform ) {
|
||||
if( defaultPlatform != null) { // The user set a default platform
|
||||
Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " +
|
||||
"Reverting to " + defaultPlatform + " definition of machine cycle." );
|
||||
"Reverting to platform = " + defaultPlatform + ". Users may set the default platform using the --default_platform <String> argument." );
|
||||
} else { // The user did not set a default platform
|
||||
Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " +
|
||||
"Reverting to Illumina definition of machine cycle. Users may set the default platform using the --default_platform <String> argument." );
|
||||
"Reverting to platform = Illumina. Users may set the default platform using the --default_platform <String> argument." );
|
||||
defaultPlatform = "Illumina";
|
||||
}
|
||||
warnedUserBadPlatform = true;
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ public class RecalDataManager {
|
|||
private static boolean warnUserNullReadGroup = false;
|
||||
private static boolean warnUserNoColorSpace = false;
|
||||
|
||||
public static final String versionString = "v2.2.14"; // Major version, minor version, and build number
|
||||
public static final String versionString = "v2.2.15"; // Major version, minor version, and build number
|
||||
|
||||
RecalDataManager() {
|
||||
data = new NestedHashMap();
|
||||
|
|
@ -253,7 +253,7 @@ public class RecalDataManager {
|
|||
public static void parseColorSpace( final SAMRecord read ) {
|
||||
|
||||
// If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
||||
if( read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") ) {
|
||||
if( read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") ) {
|
||||
if( read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null ) { // Haven't calculated the inconsistency array yet for this read
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||
if( attr != null ) {
|
||||
|
|
@ -360,7 +360,7 @@ public class RecalDataManager {
|
|||
final char[] refBases, final boolean setBaseN ) {
|
||||
|
||||
final boolean negStrand = read.getReadNegativeStrandFlag();
|
||||
for( int iii = 1; iii < originalQualScores.length - 1; iii++ ) {
|
||||
for( int iii = 1; iii < originalQualScores.length; iii++ ) {
|
||||
if( inconsistency[iii] == 1 ) {
|
||||
if( (char)readBases[iii] == refBases[iii] ) {
|
||||
if( negStrand ) { originalQualScores[originalQualScores.length-(iii+1)] = (byte)0; }
|
||||
|
|
|
|||
|
|
@ -298,7 +298,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
final byte[] recalQuals = originalQuals.clone();
|
||||
|
||||
final String platform = read.getReadGroup().getPlatform();
|
||||
if( platform.equalsIgnoreCase("SOLID") && !RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") ) {
|
||||
if( platform.toUpperCase().contains("SOLID") && !RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") ) {
|
||||
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, coinFlip, refBases );
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -46,7 +46,6 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
|
|||
/////////////////////////////
|
||||
// Command Line Arguments
|
||||
/////////////////////////////
|
||||
|
||||
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
|
||||
private String OUTPUT_DIR = "analyzeAnnotations/";
|
||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is probably /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,31 @@ import java.util.*;
|
|||
import java.io.PrintStream;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
|
|
|
|||
|
|
@ -4,6 +4,31 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
|
||||
import java.util.Comparator;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
|
|
@ -11,6 +36,7 @@ import java.util.Comparator;
|
|||
*/
|
||||
|
||||
public class AnnotationDatum implements Comparator<AnnotationDatum> {
|
||||
|
||||
public final float value;
|
||||
public int numTransitions;
|
||||
public int numTransversions;
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import java.io.File;
|
|||
public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||
static HashMap<String, String> paramsFiles = new HashMap<String, String>();
|
||||
static HashMap<String, String> paramsFilesNoReadGroupTest = new HashMap<String, String>();
|
||||
static HashMap<String, String> paramsFilesSolidIndels = new HashMap<String, String>();
|
||||
|
||||
@Test
|
||||
public void testCountCovariates1() {
|
||||
|
|
@ -50,9 +51,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
public void testTableRecalibrator1() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "6c59d291c37d053e0f188b762f3060a5" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "e06f1397b9c40f75e96cd3df76730ee0");
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d0e902b071831bc10cc396e7e082b3c1");
|
||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7ebdce416b72679e1cf88cc9886a5edc" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "48ddc93cae054f9423f3a7ed9f36540e" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "467c7304cd049d1629c3675fdd61fc00" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -80,7 +81,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testTableRecalibratorMaxQ70() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "665711dfb81d67582b28faea24e26160" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "e7e6443bc4debc26e5e06b8765b60042" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -106,8 +107,67 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
}
|
||||
}
|
||||
|
||||
//TODO -- Add an integration test which tests SOLiD files that contain indels to make sure the Cigar string is processed correctly in the solid_recal_modes
|
||||
// Currently we don't have any such data
|
||||
|
||||
|
||||
@Test
|
||||
public void testCountCovariatesSolidIndelsRemoveRefBias() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "3889abcc7f6fe420f546fc049bfc2b5a" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
|
||||
" --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
|
||||
" -T CountCovariates" +
|
||||
" -I " + bam +
|
||||
" -cov ReadGroupCovariate" +
|
||||
" -cov QualityScoreCovariate" +
|
||||
" -cov CycleCovariate" +
|
||||
" -cov DinucCovariate" +
|
||||
" -U" +
|
||||
" -L 1:10,000,000-20,000,000" +
|
||||
" --solid_recal_mode REMOVE_REF_BIAS" +
|
||||
" -recalFile %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
List<File> result = executeTest("testCountCovariatesSolidIndelsRemoveRefBias", spec).getFirst();
|
||||
paramsFilesSolidIndels.put(bam, result.get(0).getAbsolutePath());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTableRecalibratorSolidIndelsRemoveRefBias() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "a6eb2f8f531164b0a3cb19b4bb1d2f4f" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
String paramsFile = paramsFilesSolidIndels.get(bam);
|
||||
System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile);
|
||||
if ( paramsFile != null ) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
|
||||
" -T TableRecalibration" +
|
||||
" -I " + bam +
|
||||
" -outputBam %s" +
|
||||
" --no_pg_tag" +
|
||||
" -U" +
|
||||
" -L 1:10,000,000-20,000,000" +
|
||||
" --solid_recal_mode REMOVE_REF_BIAS" +
|
||||
" -recalFile " + paramsFile,
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testTableRecalibratorSolidIndelsRemoveRefBias", spec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testCountCovariatesVCF() {
|
||||
|
|
@ -196,7 +256,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testTableRecalibratorNoReadGroups() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "32ad300e8c094ed2c1ec6c531180fe70" );
|
||||
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "474e05b5a0f13776daebeb964a5e0e2b" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
|
|||
Loading…
Reference in New Issue