diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java index c7c281943..c5aabc64d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java @@ -125,8 +125,8 @@ public class ContextCovariate implements StandardCovariate { */ private BitSet contextWith(byte[] bases, int offset, int contextSize) { BitSet result = null; - if (offset >= contextSize) { - String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset)); + if (offset - contextSize + 1 >= 0) { + String context = new String(Arrays.copyOfRange(bases, offset - contextSize + 1, offset + 1)); if (!context.contains("N")) result = BitSetUtils.bitSetFrom(context); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java old mode 100755 new mode 100644 index b3ea88d58..77e4cc8c7 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java @@ -76,7 +76,7 @@ public class Datum { final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT); final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT); double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); - return Math.min(empiricalQual, (double) QualityUtils.MAX_QUAL_SCORE); + return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); } byte empiricalQualByte() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java index cedff0a80..64dba0551 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java @@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.collections.Pair; @@ -152,7 +151,7 @@ public class RecalDataManager { ArrayList optionalCovariatesToAdd = new ArrayList(); // initialize an empty array of optional covariates to create the first few tables for (Covariate covariate : requiredCovariates) { requiredCovariatesToAdd.add(covariate); - final Map recalTable = new HashMap(QualityUtils.MAX_QUAL_SCORE); // initializing a new recal table for each required covariate (cumulatively) + final Map recalTable = new HashMap(); // initializing a new recal table for each required covariate (cumulatively) final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java index c71a00a3a..2dac90252 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java @@ -74,7 +74,7 @@ public class RecalDatum extends Datum { } public final void calcCombinedEmpiricalQuality() { - this.empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again + this.empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again } public final void calcEstimatedReportedQuality() { @@ -102,7 +102,7 @@ public class RecalDatum extends Datum { @Override public String toString() { - return String.format("%d,%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()), (byte) Math.floor(getEstimatedQReported())); + return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality())); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index 072962436..7a3b85567 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -20,10 +20,8 @@ import java.util.*; public class AlleleCount extends VariantStratifier { @Override public void initialize() { - List> evals = getVariantEvalWalker().getEvals(); - // we can only work with a single eval VCF, and it must have genotypes - if ( evals.size() != 1 ) + if ( getVariantEvalWalker().getEvals().size() != 1 && !getVariantEvalWalker().mergeEvals ) throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification only works with a single eval vcf"); // There are 2 x n sample chromosomes for diploids diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java index b5aa2598e..f53b439da 100755 --- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java @@ -9,6 +9,7 @@ import net.sf.samtools.SAMUtils; * @author Kiran Garimella */ public class QualityUtils { + public final static byte MAX_RECALIBRATED_Q_SCORE = 50; public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE; public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE); diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 70eb9426b..d85fb03cd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -68,9 +68,9 @@ public class BaseRecalibration { /** * This constructor only exists for testing purposes. * - * @param quantizationInfo - * @param keysAndTablesMap - * @param requestedCovariates + * @param quantizationInfo the quantization info object + * @param keysAndTablesMap the map of key managers and recalibration tables + * @param requestedCovariates the list of requested covariates */ protected BaseRecalibration(QuantizationInfo quantizationInfo, LinkedHashMap> keysAndTablesMap, ArrayList requestedCovariates) { this.quantizationInfo = quantizationInfo; @@ -179,9 +179,8 @@ public class BaseRecalibration { } double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula - recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_QUAL_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL + recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL - return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java index 2b4cb2ae3..4b384aac0 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java @@ -39,8 +39,8 @@ public class ContextCovariateUnitTest { private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) { for (int i = 0; i < values.length; i++) { String expectedContext = null; - if (i >= contextSize) { - String context = bases.substring(i - contextSize, i); + if (i - contextSize + 1 >= 0) { + String context = bases.substring(i - contextSize + 1, i + 1); if (!context.contains("N")) expectedContext = context; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 1ab7b679e..71c014f2c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -34,6 +34,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval"; private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf"; + private static String fundamentalTestSNPsSplit1of2VCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.split_1_of_2.vcf"; + private static String fundamentalTestSNPsSplit2of2VCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.split_2_of_2.vcf"; private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.NA12045.vcf"; private static String cmdRoot = "-T VariantEval" + @@ -437,24 +439,69 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testAlleleCountStrat() { WalkerTestSpec spec = new WalkerTestSpec( - buildCommandLine( - "-T VariantEval", - "-R " + b37KGReference, - "--dbsnp " + b37dbSNP132, - "--eval " + fundamentalTestSNPsVCF, - "-noEV", - "-EV CountVariants", - "-noST", - "-ST AlleleCount", - "-L " + fundamentalTestSNPsVCF, - "-o %s" - ), - 1, - Arrays.asList("1198bfea6183bd43219071a84c79a386") - ); + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestSNPsVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST AlleleCount", + "-L " + fundamentalTestSNPsVCF, + "-o %s" + ), + 1, + Arrays.asList("1198bfea6183bd43219071a84c79a386") + ); executeTest("testAlleleCountStrat", spec); } + @Test + public void testMultipleEvalTracksAlleleCountWithMerge() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestSNPsSplit1of2VCF, + "--eval " + fundamentalTestSNPsSplit2of2VCF, + "--mergeEvals", + "-noEV", + "-EV CountVariants", + "-noST", + "-ST AlleleCount", + "-L " + fundamentalTestSNPsVCF, + "-o %s" + ), + 1, + Arrays.asList("1198bfea6183bd43219071a84c79a386") + ); + executeTest("testMultipleEvalTracksAlleleCountWithMerge", spec); + } + + @Test + public void testMultipleEvalTracksAlleleCountWithoutMerge() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestSNPsSplit1of2VCF, + "--eval " + fundamentalTestSNPsSplit2of2VCF, + //"--mergeEvals", No merge with AC strat ==> error + "-noEV", + "-EV CountVariants", + "-noST", + "-ST AlleleCount", + "-L " + fundamentalTestSNPsVCF + ), + 0, + UserException.class + ); + executeTest("testMultipleEvalTracksAlleleCountWithoutMerge", spec); + } + @Test public void testIntervalStrat() { WalkerTestSpec spec = new WalkerTestSpec( diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java index 4f0d39991..1193b0aea 100644 --- a/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java @@ -24,10 +24,6 @@ public class BaseRecalibrationUnitTest { private org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager dataManager; private LinkedHashMap> keysAndTablesMap; - private BQSRKeyManager rgKeyManager; - private BQSRKeyManager qsKeyManager; - private BQSRKeyManager cvKeyManager; - private ReadGroupCovariate rgCovariate; private QualityScoreCovariate qsCovariate; private ContextCovariate cxCovariate; @@ -60,13 +56,13 @@ public class BaseRecalibrationUnitTest { rgCovariate = new ReadGroupCovariate(); rgCovariate.initialize(RAC); requiredCovariates.add(rgCovariate); - rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); + BQSRKeyManager rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); keysAndTablesMap.put(rgKeyManager, new HashMap()); qsCovariate = new QualityScoreCovariate(); qsCovariate.initialize(RAC); requiredCovariates.add(qsCovariate); - qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); + BQSRKeyManager qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); keysAndTablesMap.put(qsKeyManager, new HashMap()); cxCovariate = new ContextCovariate(); @@ -75,7 +71,7 @@ public class BaseRecalibrationUnitTest { cyCovariate = new CycleCovariate(); cyCovariate.initialize(RAC); optionalCovariates.add(cyCovariate); - cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); + BQSRKeyManager cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); keysAndTablesMap.put(cvKeyManager, new HashMap()); @@ -108,7 +104,7 @@ public class BaseRecalibrationUnitTest { updateCovariateWithKeySet(mapEntry.getValue(), key, newDatum); } } - dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_QUAL_SCORE); + dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE); List quantizedQuals = new ArrayList(); List qualCounts = new ArrayList(); @@ -179,7 +175,7 @@ public class BaseRecalibrationUnitTest { BitSet key = entry.getKey(); RecalDatum datum = entry.getValue(); List keySet = keyManager.keySetFrom(key); - System.out.println(String.format("%s => %s", Utils.join(",", keySet), datum)); + System.out.println(String.format("%s => %s", Utils.join(",", keySet), datum) + "," + datum.getEstimatedQReported()); } System.out.println(); } @@ -187,9 +183,9 @@ public class BaseRecalibrationUnitTest { } - private static void printNestedHashMap(Map table, String output) { + private static void printNestedHashMap(Map table, String output) { for (Object key : table.keySet()) { - String ret = ""; + String ret; if (output.isEmpty()) ret = "" + key; else @@ -199,7 +195,7 @@ public class BaseRecalibrationUnitTest { if (next instanceof org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum) System.out.println(ret + " => " + next); else - printNestedHashMap((Map) next, "" + ret); + printNestedHashMap((Map) next, "" + ret); } } @@ -269,7 +265,7 @@ public class BaseRecalibrationUnitTest { } final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; - return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_QUAL_SCORE); + return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // Verbose printouts used to validate with old recalibrator //if(key.contains(null)) { @@ -289,6 +285,6 @@ public class BaseRecalibrationUnitTest { final double doubleMismatches = (double) (errors + smoothing); final double doubleObservations = (double) ( observations + smoothing ); double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); - return Math.min(QualityUtils.MAX_QUAL_SCORE, empiricalQual); + return Math.min(QualityUtils.MAX_RECALIBRATED_Q_SCORE, empiricalQual); } }