diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 0770c358e..323a8d4e4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrde import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.genotype.Variation; import java.io.PrintStream; @@ -100,7 +101,7 @@ public class CovariateCounterWalker extends LocusWalker { private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site - private static final String versionString = "v2.0.9"; // Major version, minor version, and build number + private static final String versionString = "v2.0.10"; // Major version, minor version, and build number private Pair dbSNP_counts = new Pair(0L, 0L); // mismatch/base counts for dbSNP loci private Pair novel_counts = new Pair(0L, 0L); // mismatch/base counts for non-dbSNP loci private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) @@ -122,6 +123,7 @@ public class CovariateCounterWalker extends LocusWalker { logger.info( "CovariateCounterWalker version: " + versionString ); if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; } if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; } + DBSNP_VALIDATION_CHECK_FREQUENCY *= PROCESS_EVERY_NTH_LOCUS; // Get a list of all available covariates final List> classes = PackageUtils.getClassesImplementingInterface( Covariate.class ); @@ -262,20 +264,16 @@ public class CovariateCounterWalker extends LocusWalker { if( !isSNP && ( ++numUnprocessed >= PROCESS_EVERY_NTH_LOCUS ) ) { numUnprocessed = 0; // Reset the counter because we are processing this very locus - // Long list of variables that are preallocated for use in for loop below - final List reads = context.getReads(); - final List offsets = context.getOffsets(); SAMRecord read; int offset; byte refBase; byte prevBase; byte[] colorSpaceQuals; - final int numReads = reads.size(); // For each read at this locus - for( int iii = 0; iii < numReads; iii++ ) { - read = reads.get(iii); - offset = offsets.get(iii); // offset is zero based so quals[offset] and bases[offset] is correct + for ( PileupElement p : context.getPileup() ) { + read = p.getRead(); + offset = p.getOffset(); RecalDataManager.parseSAMRecord(read, RAC); @@ -317,11 +315,9 @@ public class CovariateCounterWalker extends LocusWalker { } } countedSites++; - updateMismatchCounts(novel_counts, context, ref.getBase()); // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable - } else { // We skipped over the dbSNP site, and we are only processing every Nth locus skippedSites++; - if( isSNP) { + if( isSNP ) { updateMismatchCounts(dbSNP_counts, context, ref.getBase());// For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable } } @@ -343,10 +339,8 @@ public class CovariateCounterWalker extends LocusWalker { * @param ref The reference base */ private static void updateMismatchCounts(Pair counts, AlignmentContext context, char ref) { - List reads = context.getReads(); - List offsets = context.getOffsets(); - for(int iii = 0; iii < reads.size(); iii++ ) { - char readChar = (char)(reads.get(iii).getReadBases()[offsets.get(iii)]); + for( PileupElement p : context.getPileup() ) { + char readChar = (char)(p.getBase()); int readCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(readChar); int refCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(ref); @@ -410,10 +404,13 @@ public class CovariateCounterWalker extends LocusWalker { // Need the bases to determine whether or not we have a mismatch byte base = read.getReadBases()[offset]; - + long curMismatches = datum.getNumMismatches(); + // Add one to the number of observations and potentially one to the number of mismatches datum.increment( (char)base, (char)refBase ); // Dangerous: If you don't cast to char than the bytes default to the (long, long) version of the increment method which is really bad countedBases++; + novel_counts.second++; + novel_counts.first += datum.getNumMismatches() - curMismatches; // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatum.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatum.java index 0a73a6a48..28251908c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatum.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatum.java @@ -129,6 +129,10 @@ public class RecalDatum { public final Long getNumObservations() { return numObservations; } + + public final Long getNumMismatches() { + return numMismatches; + } public String toString() { return String.format( "RecalDatum: %d,%d,%d", numObservations, numMismatches, (int)empiricalQualByte() );