CycleCovariate is now one-based so that 0 and -0 don't collide with each other. Solid recal modes now only change the inconsistent base and the previous base (along the direction of the read) instead of both the bases before and after. Removed estimatedNumberOfBins from the Covariate interface because it wasn't being used.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2498 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-01-04 20:52:15 +00:00
parent ed2fff13aa
commit 0a6bd5a270
16 changed files with 24 additions and 82 deletions

View File

@ -84,7 +84,6 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
int lineNumber = 0;
boolean foundAllCovariates = false;
int estimatedCapacity = 1; // Capacity is multiplicitive so this starts at one
// Read in the covariates that were used from the input file
requestedCovariates = new ArrayList<Covariate>();
@ -108,7 +107,6 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
try {
Covariate covariate = (Covariate)covClass.newInstance();
requestedCovariates.add( covariate );
estimatedCapacity *= covariate.estimatedNumberOfBins();
} catch ( InstantiationException e ) {
throw new RuntimeException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
@ -135,11 +133,6 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
throw new RuntimeException( "Malformed input recalibration file. Covariate names can't be found in file: " + RECAL_FILE );
}
// Don't want to crash with out of heap space exception
if( estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed
estimatedCapacity = 300 * 40 * 200;
}
// Initialize any covariate member variables using the shared argument collection
for( Covariate cov : requestedCovariates ) {
cov.initialize( new RecalibrationArgumentCollection() );

View File

@ -41,7 +41,6 @@ public interface Covariate {
public void initialize( RecalibrationArgumentCollection RAC ); // Initialize any member variables using the command-line arguments passed to the walkers
public Comparable getValue( SAMRecord read, int offset ); // Used to pick out the covariate's value from attributes of the read
public Comparable getValue( String str ); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
public int estimatedNumberOfBins(); // Used to estimate the amount space required for the full data HashMap
}
interface RequiredCovariate extends Covariate {

View File

@ -60,16 +60,16 @@ public class CycleCovariate implements StandardCovariate {
// Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) {
int cycle = 0;
int cycle = 1;
//-----------------------------
// ILLUMINA and SOLID
//-----------------------------
if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) {
cycle = offset;
cycle = offset + 1;
if( read.getReadNegativeStrandFlag() ) {
cycle = read.getReadLength() - (offset + 1);
cycle = read.getReadLength() - offset;
}
}
@ -114,9 +114,9 @@ public class CycleCovariate implements StandardCovariate {
//else if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) {
// // The ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
// int pos = offset;
// int pos = offset + 1;
// if( read.getReadNegativeStrandFlag() ) {
// pos = read.getReadLength() - (offset + 1);
// pos = read.getReadLength() - offset;
// }
// cycle = pos / 5; // integer division
//}
@ -153,9 +153,4 @@ public class CycleCovariate implements StandardCovariate {
public final Comparable getValue( final String str ) {
return Integer.parseInt( str );
}
// Used to estimate the amount space required for the full data HashMap
public final int estimatedNumberOfBins() {
return 80;
}
}

View File

@ -103,8 +103,4 @@ public class DinucCovariate implements StandardCovariate {
}
// Used to estimate the amount space required for the full data HashMap
public final int estimatedNumberOfBins() {
return 16;
}
}

View File

@ -95,8 +95,4 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
return Integer.parseInt( str );
}
// Used to estimate the amount space required for the full data HashMap
public final int estimatedNumberOfBins() {
return numBack + 1;
}
}

View File

@ -51,8 +51,4 @@ public class MappingQualityCovariate implements ExperimentalCovariate {
return Integer.parseInt( str );
}
// Used to estimate the amount space required for the full data HashMap
public final int estimatedNumberOfBins() {
return 100;
}
}

View File

@ -66,8 +66,4 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
return Integer.parseInt( str );
}
// Used to estimate the amount space required for the full data HashMap
public final int estimatedNumberOfBins() {
return 40;
}
}

View File

@ -21,8 +21,4 @@ public class PairedReadOrderCovariate implements ExperimentalCovariate{
return str;
}
// Used to estimate the amount space required for the full data HashMap
public final int estimatedNumberOfBins() {
return 4;
}
}

View File

@ -56,8 +56,4 @@ public class PositionCovariate implements ExperimentalCovariate {
return Integer.parseInt( str );
}
// Used to estimate the amount space required for the full data HashMap
public final int estimatedNumberOfBins() {
return 100;
}
}

View File

@ -62,8 +62,4 @@ public class PrimerRoundCovariate implements ExperimentalCovariate {
return Integer.parseInt( str );
}
// Used to estimate the amount space required for the full data HashMap
public final int estimatedNumberOfBins() {
return 5;
}
}

View File

@ -51,8 +51,4 @@ public class QualityScoreCovariate implements RequiredCovariate {
return Integer.parseInt( str );
}
// Used to estimate the amount space required for the full data HashMap
public final int estimatedNumberOfBins() {
return 40;
}
}

View File

@ -53,10 +53,6 @@ public class ReadGroupCovariate implements RequiredCovariate{
return str;
}
// Used to estimate the amount space required for the full data HashMap
public final int estimatedNumberOfBins() {
return 60;
}
}

View File

@ -40,6 +40,7 @@ import net.sf.samtools.SAMReadGroupRecord;
*
* This helper class holds the data HashMap as well as submaps that represent the marginal distributions collapsed over all needed dimensions.
* It also has static methods that are used to perform the various solid recalibration modes that attempt to correct the reference bias.
* This class holds the parsing methods that are shared between CountCovariates and TableRecalibration.
*/
public class RecalDataManager {
@ -347,10 +348,10 @@ public class RecalDataManager {
originalQualScores[iii-1] = (byte)0;
if( setBaseN ) { readBases[iii-1] = (byte)'N'; }
}
if( !isMappedToRef || (char)readBases[iii+1] == refBases[iii+1] ) {
originalQualScores[iii+1] = (byte)0;
if( setBaseN ) { readBases[iii+1] = (byte)'N'; }
}
//if( !isMappedToRef || (char)readBases[iii+1] == refBases[iii+1] ) {
// originalQualScores[iii+1] = (byte)0;
// if( setBaseN ) { readBases[iii+1] = (byte)'N'; }
//}
}
}
if( read.getReadNegativeStrandFlag() ) {
@ -382,9 +383,9 @@ public class RecalDataManager {
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG, read.getReadName()));
}
for( int iii = 1; iii < inconsistency.length - 2; iii++ ) {
for( int iii = 1; iii < inconsistency.length - 1; iii++ ) {
if( inconsistency[iii] == 1 ) {
for( int jjj = iii - 1; jjj <= iii + 1; jjj++ ) { // Correct this base and the one before it along the direction of the read
for( int jjj = iii - 1; jjj <= iii; jjj++ ) { // Correct this base and the one before it along the direction of the read
if( !isMappedToRef || (char)readBases[jjj] == refBases[jjj] ) {
if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin
int rand = coinFlip.nextInt( 2 );

View File

@ -92,7 +92,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
private static final String versionString = "v2.2.1"; // Major version, minor version, and build number
private static final String versionString = "v2.2.2"; // Major version, minor version, and build number
private SAMFileWriter OUTPUT_BAM = null;// The File Writer that will write out the recalibrated bam
private Random coinFlip; // Random number generator is used to remove reference bias in solid bams
private static final long RANDOM_SEED = 1032861495;
@ -123,7 +123,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
int lineNumber = 0;
boolean foundAllCovariates = false;
int estimatedCapacity = 1; // Capacity is multiplicitive so this starts at one
// Warn the user if a dbSNP file was specified since it isn't being used here
boolean foundDBSNP = false;
@ -161,7 +160,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
try {
Covariate covariate = (Covariate)covClass.newInstance();
requestedCovariates.add( covariate );
estimatedCapacity *= covariate.estimatedNumberOfBins();
} catch ( InstantiationException e ) {
throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
@ -187,10 +185,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
throw new StingException( "Malformed input recalibration file. Covariate names can't be found in file: " + RAC.RECAL_FILE );
}
// Don't want to crash with out of heap space exception
if( estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed
estimatedCapacity = 300 * 40 * 200;
}
final boolean createCollapsedTables = true;
// Initialize any covariate member variables using the shared argument collection

View File

@ -50,8 +50,4 @@ public class TileCovariate implements ExperimentalCovariate {
return Integer.parseInt( str );
}
// Used to estimate the amount space required for the full data HashMap
public int estimatedNumberOfBins() {
return 120;
}
}

View File

@ -16,10 +16,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariates1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "c1b54d4221fb4fa88e0231a74310708e" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "337ea30c4dcc2fe6a9adc442ffd0706b");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "ffbfd38b1720cfb67ba1bb63d4308552" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "60e227ea8c3409fa85b92cae7ea6574f" );
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "604c0d898c9df9acbeeade9979707546" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f03f6186f54bc3f841639a206d424d97");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "fe873886e3efa3566f08a6ae26d71b43" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "996450b7c2e9ef68b9e76d57f2d5288a" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -48,10 +48,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibrator1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "f7749792ffffbb86aec66e92a3bddf7f" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f1780e3c3e12f07527e0468149312f10");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "c54a67a1687a4139a8ae19762217987f" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "d9ddbacdafc621d830a1db637973d795" );
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "d1a9b38f782af3edf223908cb71c7205" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "27b3eaf3c02ffc5fb3d7815468d9958e");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b7f4d3bfb761f29531a37336615046ff" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1faadda11d5c0278575d2f0368a65f14" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -79,7 +79,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesVCF() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "3ee0e811682c0f29951128204765ece9");
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d90342547ed228cf446caf594586f4b0");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -106,7 +106,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesNoReadGroups() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "f49bc79225bffbf8b64590b65a4b4305" );
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "7e3045dcb2da1f4b305db7fa72bd1b51" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -135,7 +135,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorNoReadGroups() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "62413fdbfe99cd6e24992de4234de5bc" );
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "ad345fcfb2faaf97eb0291ffa61b3228" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();