Fixing another bug in solid recal regarding negative strand reads. The isInconsistentColorSpace method incorrectly used the inconsistent tag added by parseColorSpace, the inconsistent tag is in the direction of the read like the color space tag, and not in the direction of the reference like everything else. This affects the recalibrated quality scores but the improvment in SNP calling performance is minor when using the default UG settings (min base quality 10).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2553 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-01-11 14:28:52 +00:00
parent 971834ca90
commit 62dd2fa5be
5 changed files with 57 additions and 30 deletions

View File

@ -103,7 +103,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
private long solidInsertedReferenceBases = 0; // Number of bases where we believe SOLID has inserted the reference because the color space is inconsistent with the read base
private long otherColorSpaceInconsistency = 0; // Number of bases where the color space is inconsistent with the read but the reference wasn't inserted.
private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site
private static final String versionString = "v2.2.3"; // Major version, minor version, and build number
private static final String versionString = "v2.2.4"; // Major version, minor version, and build number
private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for dbSNP loci
private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for non-dbSNP loci
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
@ -170,7 +170,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
if( USE_STANDARD_COVARIATES ) {
for( Class<?> covClass : standardClasses ) {
try {
Covariate covariate = (Covariate)covClass.newInstance();
final Covariate covariate = (Covariate)covClass.newInstance();
requestedCovariates.add( covariate );
} catch ( InstantiationException e ) {
throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
@ -189,7 +189,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
if( !requiredClasses.contains( covClass ) && (!USE_STANDARD_COVARIATES || !standardClasses.contains( covClass )) ) {
try {
// Now that we've found a matching class, try to instantiate it
Covariate covariate = (Covariate)covClass.newInstance();
final Covariate covariate = (Covariate)covClass.newInstance();
requestedCovariates.add( covariate );
} catch ( InstantiationException e ) {
throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );

View File

@ -45,10 +45,10 @@ import net.sf.samtools.SAMReadGroupRecord;
public class RecalDataManager {
public NestedHashMap data; // The full dataset
private NestedHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed
private NestedHashMap dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
private ArrayList<NestedHashMap> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed
public final NestedHashMap data; // The full dataset
private final NestedHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed
private final NestedHashMap dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
private final ArrayList<NestedHashMap> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
@ -59,10 +59,14 @@ public class RecalDataManager {
RecalDataManager() {
data = new NestedHashMap();
dataCollapsedReadGroup = null;
dataCollapsedQualityScore = null;
dataCollapsedByCovariate = null;
}
RecalDataManager( final boolean createCollapsedTables, final int numCovariates ) {
if( createCollapsedTables ) { // Initialize all the collapsed tables, only used by TableRecalibrationWalker
data = null;
dataCollapsedReadGroup = new NestedHashMap();
dataCollapsedQualityScore = new NestedHashMap();
dataCollapsedByCovariate = new ArrayList<NestedHashMap>();
@ -71,6 +75,9 @@ public class RecalDataManager {
}
} else {
data = new NestedHashMap();
dataCollapsedReadGroup = null;
dataCollapsedQualityScore = null;
dataCollapsedByCovariate = null;
}
}
@ -321,10 +328,10 @@ public class RecalDataManager {
// Now that we have the inconsistency array apply the desired correction to the inconsistent bases
if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ) { // Set inconsistent bases and the one before it to Q0
boolean setBaseN = false;
final boolean setBaseN = false;
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, isMappedToReference, setBaseN);
} else if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") ) {
boolean setBaseN = true;
final boolean setBaseN = true;
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, isMappedToReference, setBaseN);
} else if( SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead, isMappedToReference, coinFlip);
@ -469,13 +476,33 @@ public class RecalDataManager {
* Check if this base is inconsistent with its color space. If it is then SOLID inserted the reference here and we should reduce the quality
* @param read The read which contains the color space to check against
* @param offset The offset in the read at which to check
* @return Returns true if the base is inconsistent with the color space
* @return Returns true if the base was inconsistent with the color space
*/
public static boolean isInconsistentColorSpace( final SAMRecord read, final int offset ) {
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG);
if( attr != null ) {
final byte[] colorSpace = (byte[])attr;
return colorSpace[offset] != 0;
final byte[] inconsistency = (byte[])attr;
// NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
if( read.getReadNegativeStrandFlag() ) { // Negative direction
return inconsistency[inconsistency.length - offset - 1] != 0;
} else { // Forward direction
return inconsistency[offset] != 0;
}
// This block of code is if you want to check both the offset and the next base for color space inconsistency
//if( read.getReadNegativeStrandFlag() ) { // Negative direction
// if( offset == 0 ) {
// return inconsistency[0] != 0;
// } else {
// return (inconsistency[inconsistency.length - offset - 1] != 0) || (inconsistency[inconsistency.length - offset] != 0);
// }
//} else { // Forward direction
// if( offset == inconsistency.length - 1 ) {
// return inconsistency[inconsistency.length - 1] != 0;
// } else {
// return (inconsistency[offset] != 0) || (inconsistency[offset + 1] != 0);
// }
//}
} else {
return false;
}

View File

@ -63,7 +63,7 @@ public class RecalibrationArgumentCollection {
public boolean EXCEPTION_IF_NO_TILE = false;
public boolean checkSolidRecalMode() {
public final boolean checkSolidRecalMode() {
return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ||
SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") || SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") );
}

View File

@ -94,7 +94,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
private static final String versionString = "v2.2.6"; // Major version, minor version, and build number
private static final String versionString = "v2.2.7"; // Major version, minor version, and build number
private Random coinFlip; // Random number generator is used to remove reference bias in solid bams
private static final long RANDOM_SEED = 1032861495;
@ -120,7 +120,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
}
// Get a list of all available covariates
List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
final List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
int lineNumber = 0;
boolean foundAllCovariates = false;
@ -223,7 +223,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
// Take the header of the input SAM file and tweak it by adding in a new programRecord with the version number and list of covariates that were used
final SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
if( !NO_PG_TAG ) {
SAMProgramRecord programRecord = new SAMProgramRecord( "GATK TableRecalibration" );
final SAMProgramRecord programRecord = new SAMProgramRecord( "GATK TableRecalibration" );
programRecord.setProgramVersion( versionString );
String commandLineString = "Covariates used = [";
for( Covariate cov : requestedCovariates ) {
@ -232,10 +232,10 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
commandLineString = commandLineString.substring(0, commandLineString.length() - 2); // trim off the trailing comma
commandLineString += "] ";
// Add all of the arguments from the recalibraiton argument collection to the command line string
Field[] fields = RAC.getClass().getFields();
final Field[] fields = RAC.getClass().getFields();
for( Field field : fields ) {
ArgumentTypeDescriptor atd = ArgumentTypeDescriptor.create(field.getType());
List<ArgumentDefinition> adList = atd.createArgumentDefinitions(new ArgumentSource(field.getType(), field));
final ArgumentTypeDescriptor atd = ArgumentTypeDescriptor.create(field.getType());
final List<ArgumentDefinition> adList = atd.createArgumentDefinitions(new ArgumentSource(field.getType(), field));
for( ArgumentDefinition ad : adList ) {
commandLineString += (ad.fullName + "=" + JVMUtils.getFieldValue(field, RAC) + ", ");
}
@ -355,8 +355,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey ));
double globalDeltaQ = 0.0;
if( globalRecalDatum != null ) {
double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
}
@ -366,7 +366,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey ));
double deltaQReported = 0.0;
if( qReportedRecalDatum != null ) {
double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
}

View File

@ -17,9 +17,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
public void testCountCovariates1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "e5b2d5a2f4283718dae678cbc84be847" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "ff1b3a18b67b09560cacc3b5dea0a034");
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "89084b43b824f9e3c5e2afdfe0930542");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7d6428a76e07ed4b99351aa4df89634d" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "a93b0263acdc856b885f95848852140d" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "6ede6fc840c4e5070a58a919b48e7504" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -50,9 +50,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
public void testTableRecalibrator1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "6c59d291c37d053e0f188b762f3060a5" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "0f4a882d78747380af2822317c7b6045");
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "e06f1397b9c40f75e96cd3df76730ee0");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7ebdce416b72679e1cf88cc9886a5edc" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "41b0313bfcaf5afb3cb85f9830e79769" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "48ddc93cae054f9423f3a7ed9f36540e" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -80,7 +80,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorMaxQ70() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "8120b648a43b293b84a7cb6374513fb8" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "665711dfb81d67582b28faea24e26160" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -109,7 +109,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesVCF() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d90342547ed228cf446caf594586f4b0");
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "9b9d21ffb70f15ef2aebad21f3fc05cb");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -136,7 +136,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesNoReadGroups() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "7e3045dcb2da1f4b305db7fa72bd1b51" );
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "a86c64f649b847b7f81ac50a808d3d45" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -165,7 +165,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorNoReadGroups() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "d9d29f38f8ea31ac267f9d937cad9291" );
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "32ad300e8c094ed2c1ec6c531180fe70" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();