CycleCovariate is now one-based so that 0 and -0 don't collide with each other. Solid recal modes now only change the inconsistent base and the previous base (along the direction of the read) instead of both the bases before and after. Removed estimatedNumberOfBins from the Covariate interface because it wasn't being used.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2498 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ed2fff13aa
commit
0a6bd5a270
|
|
@ -84,7 +84,6 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
|
|||
|
||||
int lineNumber = 0;
|
||||
boolean foundAllCovariates = false;
|
||||
int estimatedCapacity = 1; // Capacity is multiplicitive so this starts at one
|
||||
|
||||
// Read in the covariates that were used from the input file
|
||||
requestedCovariates = new ArrayList<Covariate>();
|
||||
|
|
@ -108,7 +107,6 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
|
|||
try {
|
||||
Covariate covariate = (Covariate)covClass.newInstance();
|
||||
requestedCovariates.add( covariate );
|
||||
estimatedCapacity *= covariate.estimatedNumberOfBins();
|
||||
|
||||
} catch ( InstantiationException e ) {
|
||||
throw new RuntimeException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
|
||||
|
|
@ -135,11 +133,6 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
|
|||
throw new RuntimeException( "Malformed input recalibration file. Covariate names can't be found in file: " + RECAL_FILE );
|
||||
}
|
||||
|
||||
// Don't want to crash with out of heap space exception
|
||||
if( estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed
|
||||
estimatedCapacity = 300 * 40 * 200;
|
||||
}
|
||||
|
||||
// Initialize any covariate member variables using the shared argument collection
|
||||
for( Covariate cov : requestedCovariates ) {
|
||||
cov.initialize( new RecalibrationArgumentCollection() );
|
||||
|
|
|
|||
|
|
@ -41,7 +41,6 @@ public interface Covariate {
|
|||
public void initialize( RecalibrationArgumentCollection RAC ); // Initialize any member variables using the command-line arguments passed to the walkers
|
||||
public Comparable getValue( SAMRecord read, int offset ); // Used to pick out the covariate's value from attributes of the read
|
||||
public Comparable getValue( String str ); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||
public int estimatedNumberOfBins(); // Used to estimate the amount space required for the full data HashMap
|
||||
}
|
||||
|
||||
interface RequiredCovariate extends Covariate {
|
||||
|
|
|
|||
|
|
@ -60,16 +60,16 @@ public class CycleCovariate implements StandardCovariate {
|
|||
// Used to pick out the covariate's value from attributes of the read
|
||||
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
||||
|
||||
int cycle = 0;
|
||||
int cycle = 1;
|
||||
|
||||
//-----------------------------
|
||||
// ILLUMINA and SOLID
|
||||
//-----------------------------
|
||||
|
||||
if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) {
|
||||
cycle = offset;
|
||||
cycle = offset + 1;
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
cycle = read.getReadLength() - (offset + 1);
|
||||
cycle = read.getReadLength() - offset;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -114,9 +114,9 @@ public class CycleCovariate implements StandardCovariate {
|
|||
|
||||
//else if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) {
|
||||
// // The ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
||||
// int pos = offset;
|
||||
// int pos = offset + 1;
|
||||
// if( read.getReadNegativeStrandFlag() ) {
|
||||
// pos = read.getReadLength() - (offset + 1);
|
||||
// pos = read.getReadLength() - offset;
|
||||
// }
|
||||
// cycle = pos / 5; // integer division
|
||||
//}
|
||||
|
|
@ -153,9 +153,4 @@ public class CycleCovariate implements StandardCovariate {
|
|||
public final Comparable getValue( final String str ) {
|
||||
return Integer.parseInt( str );
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 80;
|
||||
}
|
||||
}
|
||||
|
|
@ -103,8 +103,4 @@ public class DinucCovariate implements StandardCovariate {
|
|||
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 16;
|
||||
}
|
||||
}
|
||||
|
|
@ -95,8 +95,4 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
|
|||
return Integer.parseInt( str );
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return numBack + 1;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,8 +51,4 @@ public class MappingQualityCovariate implements ExperimentalCovariate {
|
|||
return Integer.parseInt( str );
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 100;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,8 +66,4 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
|
|||
return Integer.parseInt( str );
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 40;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,8 +21,4 @@ public class PairedReadOrderCovariate implements ExperimentalCovariate{
|
|||
return str;
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,8 +56,4 @@ public class PositionCovariate implements ExperimentalCovariate {
|
|||
return Integer.parseInt( str );
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 100;
|
||||
}
|
||||
}
|
||||
|
|
@ -62,8 +62,4 @@ public class PrimerRoundCovariate implements ExperimentalCovariate {
|
|||
return Integer.parseInt( str );
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 5;
|
||||
}
|
||||
}
|
||||
|
|
@ -51,8 +51,4 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
return Integer.parseInt( str );
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 40;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,10 +53,6 @@ public class ReadGroupCovariate implements RequiredCovariate{
|
|||
return str;
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 60;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ import net.sf.samtools.SAMReadGroupRecord;
|
|||
*
|
||||
* This helper class holds the data HashMap as well as submaps that represent the marginal distributions collapsed over all needed dimensions.
|
||||
* It also has static methods that are used to perform the various solid recalibration modes that attempt to correct the reference bias.
|
||||
* This class holds the parsing methods that are shared between CountCovariates and TableRecalibration.
|
||||
*/
|
||||
|
||||
public class RecalDataManager {
|
||||
|
|
@ -347,10 +348,10 @@ public class RecalDataManager {
|
|||
originalQualScores[iii-1] = (byte)0;
|
||||
if( setBaseN ) { readBases[iii-1] = (byte)'N'; }
|
||||
}
|
||||
if( !isMappedToRef || (char)readBases[iii+1] == refBases[iii+1] ) {
|
||||
originalQualScores[iii+1] = (byte)0;
|
||||
if( setBaseN ) { readBases[iii+1] = (byte)'N'; }
|
||||
}
|
||||
//if( !isMappedToRef || (char)readBases[iii+1] == refBases[iii+1] ) {
|
||||
// originalQualScores[iii+1] = (byte)0;
|
||||
// if( setBaseN ) { readBases[iii+1] = (byte)'N'; }
|
||||
//}
|
||||
}
|
||||
}
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
|
|
@ -382,9 +383,9 @@ public class RecalDataManager {
|
|||
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
|
||||
for( int iii = 1; iii < inconsistency.length - 2; iii++ ) {
|
||||
for( int iii = 1; iii < inconsistency.length - 1; iii++ ) {
|
||||
if( inconsistency[iii] == 1 ) {
|
||||
for( int jjj = iii - 1; jjj <= iii + 1; jjj++ ) { // Correct this base and the one before it along the direction of the read
|
||||
for( int jjj = iii - 1; jjj <= iii; jjj++ ) { // Correct this base and the one before it along the direction of the read
|
||||
if( !isMappedToRef || (char)readBases[jjj] == refBases[jjj] ) {
|
||||
if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin
|
||||
int rand = coinFlip.nextInt( 2 );
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||
private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
|
||||
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
|
||||
private static final String versionString = "v2.2.1"; // Major version, minor version, and build number
|
||||
private static final String versionString = "v2.2.2"; // Major version, minor version, and build number
|
||||
private SAMFileWriter OUTPUT_BAM = null;// The File Writer that will write out the recalibrated bam
|
||||
private Random coinFlip; // Random number generator is used to remove reference bias in solid bams
|
||||
private static final long RANDOM_SEED = 1032861495;
|
||||
|
|
@ -123,7 +123,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
int lineNumber = 0;
|
||||
boolean foundAllCovariates = false;
|
||||
int estimatedCapacity = 1; // Capacity is multiplicitive so this starts at one
|
||||
|
||||
// Warn the user if a dbSNP file was specified since it isn't being used here
|
||||
boolean foundDBSNP = false;
|
||||
|
|
@ -161,7 +160,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
try {
|
||||
Covariate covariate = (Covariate)covClass.newInstance();
|
||||
requestedCovariates.add( covariate );
|
||||
estimatedCapacity *= covariate.estimatedNumberOfBins();
|
||||
|
||||
} catch ( InstantiationException e ) {
|
||||
throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
|
||||
|
|
@ -187,10 +185,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
throw new StingException( "Malformed input recalibration file. Covariate names can't be found in file: " + RAC.RECAL_FILE );
|
||||
}
|
||||
|
||||
// Don't want to crash with out of heap space exception
|
||||
if( estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed
|
||||
estimatedCapacity = 300 * 40 * 200;
|
||||
}
|
||||
final boolean createCollapsedTables = true;
|
||||
|
||||
// Initialize any covariate member variables using the shared argument collection
|
||||
|
|
|
|||
|
|
@ -50,8 +50,4 @@ public class TileCovariate implements ExperimentalCovariate {
|
|||
return Integer.parseInt( str );
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public int estimatedNumberOfBins() {
|
||||
return 120;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,10 +16,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCountCovariates1() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "c1b54d4221fb4fa88e0231a74310708e" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "337ea30c4dcc2fe6a9adc442ffd0706b");
|
||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "ffbfd38b1720cfb67ba1bb63d4308552" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "60e227ea8c3409fa85b92cae7ea6574f" );
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "604c0d898c9df9acbeeade9979707546" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f03f6186f54bc3f841639a206d424d97");
|
||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "fe873886e3efa3566f08a6ae26d71b43" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "996450b7c2e9ef68b9e76d57f2d5288a" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -48,10 +48,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testTableRecalibrator1() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "f7749792ffffbb86aec66e92a3bddf7f" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f1780e3c3e12f07527e0468149312f10");
|
||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "c54a67a1687a4139a8ae19762217987f" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "d9ddbacdafc621d830a1db637973d795" );
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "d1a9b38f782af3edf223908cb71c7205" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "27b3eaf3c02ffc5fb3d7815468d9958e");
|
||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b7f4d3bfb761f29531a37336615046ff" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1faadda11d5c0278575d2f0368a65f14" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -79,7 +79,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCountCovariatesVCF() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "3ee0e811682c0f29951128204765ece9");
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d90342547ed228cf446caf594586f4b0");
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -106,7 +106,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCountCovariatesNoReadGroups() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "f49bc79225bffbf8b64590b65a4b4305" );
|
||||
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "7e3045dcb2da1f4b305db7fa72bd1b51" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -135,7 +135,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testTableRecalibratorNoReadGroups() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "62413fdbfe99cd6e24992de4234de5bc" );
|
||||
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "ad345fcfb2faaf97eb0291ffa61b3228" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
|
|||
Loading…
Reference in New Issue