diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 9eebf9223..b814db015 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -103,7 +103,7 @@ public class CovariateCounterWalker extends LocusWalker { private long solidInsertedReferenceBases = 0; // Number of bases where we believe SOLID has inserted the reference because the color space is inconsistent with the read base private long otherColorSpaceInconsistency = 0; // Number of bases where the color space is inconsistent with the read but the reference wasn't inserted. private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site - private static final String versionString = "v2.2.3"; // Major version, minor version, and build number + private static final String versionString = "v2.2.4"; // Major version, minor version, and build number private Pair dbSNP_counts = new Pair(0L, 0L); // mismatch/base counts for dbSNP loci private Pair novel_counts = new Pair(0L, 0L); // mismatch/base counts for non-dbSNP loci private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) @@ -170,7 +170,7 @@ public class CovariateCounterWalker extends LocusWalker { if( USE_STANDARD_COVARIATES ) { for( Class covClass : standardClasses ) { try { - Covariate covariate = (Covariate)covClass.newInstance(); + final Covariate covariate = (Covariate)covClass.newInstance(); requestedCovariates.add( covariate ); } catch ( InstantiationException e ) { throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) ); @@ -189,7 +189,7 @@ public class CovariateCounterWalker extends LocusWalker { if( !requiredClasses.contains( covClass ) && (!USE_STANDARD_COVARIATES || !standardClasses.contains( covClass )) ) { try { // Now that we've found a matching class, try to instantiate it - Covariate covariate = (Covariate)covClass.newInstance(); + final Covariate covariate = (Covariate)covClass.newInstance(); requestedCovariates.add( covariate ); } catch ( InstantiationException e ) { throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) ); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index 07cd9a175..c8ea00bd1 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -45,10 +45,10 @@ import net.sf.samtools.SAMReadGroupRecord; public class RecalDataManager { - public NestedHashMap data; // The full dataset - private NestedHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed - private NestedHashMap dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed - private ArrayList dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed + public final NestedHashMap data; // The full dataset + private final NestedHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed + private final NestedHashMap dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed + private final ArrayList dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams @@ -59,10 +59,14 @@ public class RecalDataManager { RecalDataManager() { data = new NestedHashMap(); + dataCollapsedReadGroup = null; + dataCollapsedQualityScore = null; + dataCollapsedByCovariate = null; } RecalDataManager( final boolean createCollapsedTables, final int numCovariates ) { if( createCollapsedTables ) { // Initialize all the collapsed tables, only used by TableRecalibrationWalker + data = null; dataCollapsedReadGroup = new NestedHashMap(); dataCollapsedQualityScore = new NestedHashMap(); dataCollapsedByCovariate = new ArrayList(); @@ -71,6 +75,9 @@ public class RecalDataManager { } } else { data = new NestedHashMap(); + dataCollapsedReadGroup = null; + dataCollapsedQualityScore = null; + dataCollapsedByCovariate = null; } } @@ -321,10 +328,10 @@ public class RecalDataManager { // Now that we have the inconsistency array apply the desired correction to the inconsistent bases if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ) { // Set inconsistent bases and the one before it to Q0 - boolean setBaseN = false; + final boolean setBaseN = false; originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, isMappedToReference, setBaseN); } else if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") ) { - boolean setBaseN = true; + final boolean setBaseN = true; originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, isMappedToReference, setBaseN); } else if( SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead, isMappedToReference, coinFlip); @@ -469,13 +476,33 @@ public class RecalDataManager { * Check if this base is inconsistent with its color space. If it is then SOLID inserted the reference here and we should reduce the quality * @param read The read which contains the color space to check against * @param offset The offset in the read at which to check - * @return Returns true if the base is inconsistent with the color space + * @return Returns true if the base was inconsistent with the color space */ public static boolean isInconsistentColorSpace( final SAMRecord read, final int offset ) { final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG); if( attr != null ) { - final byte[] colorSpace = (byte[])attr; - return colorSpace[offset] != 0; + final byte[] inconsistency = (byte[])attr; + // NOTE: The inconsistency array is in the direction of the read, not aligned to the reference! + if( read.getReadNegativeStrandFlag() ) { // Negative direction + return inconsistency[inconsistency.length - offset - 1] != 0; + } else { // Forward direction + return inconsistency[offset] != 0; + } + + // This block of code is if you want to check both the offset and the next base for color space inconsistency + //if( read.getReadNegativeStrandFlag() ) { // Negative direction + // if( offset == 0 ) { + // return inconsistency[0] != 0; + // } else { + // return (inconsistency[inconsistency.length - offset - 1] != 0) || (inconsistency[inconsistency.length - offset] != 0); + // } + //} else { // Forward direction + // if( offset == inconsistency.length - 1 ) { + // return inconsistency[inconsistency.length - 1] != 0; + // } else { + // return (inconsistency[offset] != 0) || (inconsistency[offset + 1] != 0); + // } + //} } else { return false; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java index 6e307b705..ecc07afec 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java @@ -63,7 +63,7 @@ public class RecalibrationArgumentCollection { public boolean EXCEPTION_IF_NO_TILE = false; - public boolean checkSolidRecalMode() { + public final boolean checkSolidRecalMode() { return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") || SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 16bcdd2a3..40adfeab4 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -94,7 +94,7 @@ public class TableRecalibrationWalker extends ReadWalker> classes = PackageUtils.getClassesImplementingInterface(Covariate.class); + final List> classes = PackageUtils.getClassesImplementingInterface(Covariate.class); int lineNumber = 0; boolean foundAllCovariates = false; @@ -223,7 +223,7 @@ public class TableRecalibrationWalker extends ReadWalker adList = atd.createArgumentDefinitions(new ArgumentSource(field.getType(), field)); + final ArgumentTypeDescriptor atd = ArgumentTypeDescriptor.create(field.getType()); + final List adList = atd.createArgumentDefinitions(new ArgumentSource(field.getType(), field)); for( ArgumentDefinition ad : adList ) { commandLineString += (ad.fullName + "=" + JVMUtils.getFieldValue(field, RAC) + ", "); } @@ -355,8 +355,8 @@ public class TableRecalibrationWalker extends ReadWalker e = new HashMap(); e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "e5b2d5a2f4283718dae678cbc84be847" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "ff1b3a18b67b09560cacc3b5dea0a034"); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "89084b43b824f9e3c5e2afdfe0930542"); e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7d6428a76e07ed4b99351aa4df89634d" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "a93b0263acdc856b885f95848852140d" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "6ede6fc840c4e5070a58a919b48e7504" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -50,9 +50,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { public void testTableRecalibrator1() { HashMap e = new HashMap(); e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "6c59d291c37d053e0f188b762f3060a5" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "0f4a882d78747380af2822317c7b6045"); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "e06f1397b9c40f75e96cd3df76730ee0"); e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7ebdce416b72679e1cf88cc9886a5edc" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "41b0313bfcaf5afb3cb85f9830e79769" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "48ddc93cae054f9423f3a7ed9f36540e" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -80,7 +80,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorMaxQ70() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "8120b648a43b293b84a7cb6374513fb8" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "665711dfb81d67582b28faea24e26160" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -109,7 +109,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCF() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d90342547ed228cf446caf594586f4b0"); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "9b9d21ffb70f15ef2aebad21f3fc05cb"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -136,7 +136,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesNoReadGroups() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "7e3045dcb2da1f4b305db7fa72bd1b51" ); + e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "a86c64f649b847b7f81ac50a808d3d45" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -165,7 +165,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoReadGroups() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "d9d29f38f8ea31ac267f9d937cad9291" ); + e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "32ad300e8c094ed2c1ec6c531180fe70" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey();