diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index f56a43c0b..620a14e95 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -110,7 +110,7 @@ public class CovariateCounterWalker extends LocusWalker { private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site - private final String versionString = "v2.0.4"; // Major version, minor version, and build number + private final String versionString = "v2.0.5"; // Major version, minor version, and build number private Pair dbSNP_counts = new Pair(0L, 0L); // mismatch/base counts for dbSNP loci private Pair novel_counts = new Pair(0L, 0L); // mismatch/base counts for non-dbSNP loci private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) @@ -301,7 +301,8 @@ public class CovariateCounterWalker extends LocusWalker { } // This read isn't in the hashMap yet so fill out the datum and add it to the map so that we never have to do the work again - // Lots of lines just pulling things out of the SAMRecord and stuffing into local variables + // Lots of lines just pulling things out of the SAMRecord and stuffing into local variables, many edge cases to worry about + // These lines should be exactly the same in TableRecalibrationWalker quals = read.getBaseQualities(); // Check if we need to use the original quality scores instead if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) { @@ -350,7 +351,7 @@ public class CovariateCounterWalker extends LocusWalker { refBase = (byte)ref.getBase(); prevBase = readDatum.bases[offset - 1]; - // Get the complement base strand if we are a negative strand read, DinucCovariate is responsible for getting the complement bases if needed + // DinucCovariate is responsible for getting the complement bases if needed if( readDatum.isNegStrand ) { prevBase = readDatum.bases[offset + 1]; } @@ -415,8 +416,9 @@ public class CovariateCounterWalker extends LocusWalker { int refCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(ref); if ( readCharBaseIndex != -1 && refCharBaseIndex != -1 ) { - if ( readCharBaseIndex != refCharBaseIndex ) + if ( readCharBaseIndex != refCharBaseIndex ) { counts.first++; + } counts.second++; } } @@ -426,8 +428,9 @@ public class CovariateCounterWalker extends LocusWalker { * Validate the dbSNP mismatch rates. */ private void validateDbsnpMismatchRate() { - if ( novel_counts.second == 0 || dbSNP_counts.second == 0 ) + if ( novel_counts.second == 0 || dbSNP_counts.second == 0 ) { return; + } double fractionMM_novel = (double)novel_counts.first / (double)novel_counts.second; double fractionMM_dbsnp = (double)dbSNP_counts.first / (double)dbSNP_counts.second; @@ -464,7 +467,7 @@ public class CovariateCounterWalker extends LocusWalker { if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it datum = new RecalDatum(); // initialized with zeros, will be incremented at end of method if( VALIDATE_OLD_RECALIBRATOR || SORTED_OUTPUT ) { - dataManager.data.myPut( key, datum ); + dataManager.data.sortedPut( key, datum ); } else { dataManager.data.put( key, datum ); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/NHashMap.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/NHashMap.java index 5580c8ab8..8ce436838 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/NHashMap.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/NHashMap.java @@ -35,7 +35,7 @@ import java.util.*; * User: rpoplin * Date: Oct 30, 2009 * - * A HashMap that maps a list of comparables to any object . + * A HashMap that maps a list of comparables to any Object . * There is functionality for the mappings to be given back to you in sorted order. */ @@ -55,9 +55,8 @@ public class NHashMap extends HashMap, T> { } - // This method is here only to help facilitate direct comparison of old and refactored recalibrator. - // The old recalibrator prints out its mappings in a sorted order but the refactored recalibrator doesn't need to. - public T myPut(List key, T value) { + // This method is here only to help facilitate outputting the mappings in sorted order + public T sortedPut(List key, T value) { if( keyLists == null ) { keyLists = new ArrayList>(); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 230179740..016d794e4 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -20,6 +20,8 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "7842b33245a53b13c7b6cd4e4616ac11"); e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "cbe17568c5f03516a88ff0c5ce33a87b" ); e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "8806fb7f0e669d1abab61dc4b3d10d8f" ); + + for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); String md5 = entry.getValue(); @@ -30,7 +32,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -T CountCovariates" + " -I " + bam + ( bam.equals( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) - ? " -L 1:10,800,000-10,810,000" : " -L 1:10,000,000-10,200,000" ) + + ? " -L 1:10,800,000-10,810,000" : " -L 1:10,000,000-10,200,000" ) + " -cov ReadGroupCovariate" + " -cov QualityScoreCovariate" + " -cov CycleCovariate" + @@ -74,4 +76,32 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { } } } + + @Test + public void testCountCovariatesVCF() { + HashMap e = new HashMap(); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d5962c39a53a511d7ec710d5343e7a37"); + + + for ( Map.Entry entry : e.entrySet() ) { + String bam = entry.getKey(); + String md5 = entry.getValue(); + + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R /broad/1KG/reference/human_b36_both.fasta" + + " -B dbsnp,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf" + + " -T CountCovariates" + + " -I " + bam + + " -L 1:10,000,000-10,200,000" + + " -cov ReadGroupCovariate" + + " -cov QualityScoreCovariate" + + " -cov CycleCovariate" + + " -cov DinucCovariate" + + " --sorted_output" + + " -recalFile %s", + 1, // just one output file + Arrays.asList(md5)); + executeTest("testCountCovariatesVCF", spec); + } + } }