diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index 942fe7d24..ccc9084b4 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -1,35 +1,126 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.Arrays; /** - * @author carneiro - * @since 3/27/12 + * @author ebanks + * @since 7/16/12 */ public class BQSRIntegrationTest extends WalkerTest { - @Test(enabled = true) - public void recalibrateTest() { - String REF = "public/testdata/exampleFASTA.fasta"; - String BAM = "public/testdata/exampleBAM.bam"; - String DBSNP = "public/testdata/exampleDBSNP.vcf"; - String base = String.format("-T BaseQualityScoreRecalibrator -R %s -I %s -knownSites %s", REF, BAM, DBSNP) + " -o %s "; - WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("583517f4c0b480268edf3c36755e07a8")); - executeTest("recalibrateTest", spec); + + private static class BQSRTest { + final String reference; + final String interval; + final String bam; + final String args; + final String md5; + + private BQSRTest(String reference, String bam, String interval, String args, String md5) { + this.reference = reference; + this.bam = bam; + this.interval = interval; + this.args = args; + this.md5 = md5; + } + + @Override + public String toString() { + return String.format("BQSR(bam='%s', args='%s')", bam, args); + } } - @Test(enabled = true) - public void onTheFlyRecalibrationTest() { - String REF = "public/testdata/exampleFASTA.fasta"; - String BAM = "public/testdata/exampleBAM.bam"; - String GRP = "public/testdata/exampleGRP.grp"; - String base = String.format("-T PrintReads -qq -1 -R %s -I %s -BQSR %s", REF, BAM, GRP) + " -o %s "; - WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("690c7ab414bd39de72a635aedc1fcd66")); - executeTest("onTheFlyRecalibrationTest", spec); + @DataProvider(name = "BQSRTest") + public Object[][] createBQSRTestData() { + String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam"; + String HiSeqInterval = "chr1:10,000,000-10,100,000"; + return new Object[][]{ + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "93c63489bf274f68e0378a6c296b2948")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "3bb9f93dc5a2eb879099402faf352a01")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "c1321f6548bd240b174a8afe62bb07b9")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "d9512cebf54ea120539059976b33d1ca")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "b29f83d42fc796b60ebe14a768e4f3af")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "afa916b4d3a57d62b9b3e4f64b97f428")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "5e305384496fb5ff64f7494092c939cf")}, + //{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "1:10,000,000-10,200,000", "", "")}, + //{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "")}, + //{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "1:10,000,000-10,200,000", "", "")}, + //{new BQSRTest(b36KGReference, validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1:10,800,000-10,810,000", "", "")}, + //{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "1:1-1,000", " -OQ", "")}, + //{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "")}, + //{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "")}, + //{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "recalibrationTest.bed", "")}, + }; } - // TODO -- add test for -qq and -NIQ arguments + @Test(dataProvider = "BQSRTest") + public void testBQSR(BQSRTest params) { + WalkerTestSpec spec = new WalkerTestSpec( + "-T BaseQualityScoreRecalibrator" + + " -R " + params.reference + + " -I " + params.bam + + " -L " + params.interval + + params.args + + " --no_plots" + + " -knownSites " + (params.reference.equals(b36KGReference) ? b36dbSNP129 : hg18dbSNP132) + + " -o %s", + Arrays.asList(params.md5)); + executeTest("testBQSR-"+params.args, spec).getFirst(); + } + @Test + public void testBQSRFailWithoutDBSNP() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b36KGReference + + " -T BaseQualityScoreRecalibrator" + + " -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" + + " -L 1:10,000,000-10,200,000" + + " --no_plots" + + " -o %s", + 1, // just one output file + UserException.CommandLineException.class); + executeTest("testBQSRFailWithoutDBSNP", spec); + } + + private static class PRTest { + final String args; + final String md5; + + private PRTest(String args, String md5) { + this.args = args; + this.md5 = md5; + } + + @Override + public String toString() { + return String.format("PrintReads(args='%s')", args); + } + } + + @DataProvider(name = "PRTest") + public Object[][] createPRTestData() { + return new Object[][]{ + {new PRTest("", "d2d6ed8667cdba7e56f5db97d6262676")}, + {new PRTest(" -qq -1", "b7053d3d67aba6d8892f0a60f0ded338")}, + {new PRTest(" -qq 6", "bfbf0855185b2b70aa35237fb71e4487")}, + {new PRTest(" -NIQ", "66aa65223f192ee39c1773aa187fd493")} + }; + } + + @Test(dataProvider = "PRTest") + public void testPR(PRTest params) { + WalkerTestSpec spec = new WalkerTestSpec( + "-T PrintReads" + + " -R " + hg18Reference + + " -I " + privateTestDir + "HiSeq.1mb.1RG.bam" + + " -BQSR " + privateTestDir + "HiSeq.1mb.1RG.table" + + params.args + + " -o %s", + Arrays.asList(params.md5)); + executeTest("testPrintReads-"+params.args, spec).getFirst(); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java index 17da89f29..8bc58063d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java @@ -83,7 +83,7 @@ public class ContextCovariate implements StandardCovariate { public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { // store the original bases and then write Ns over low quality ones - final byte[] originalBases = read.getReadBases(); + final byte[] originalBases = read.getReadBases().clone(); final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag(); diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java deleted file mode 100644 index 74d9420b2..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java +++ /dev/null @@ -1,218 +0,0 @@ -package org.broadinstitute.sting.utils.recalibration; - -import org.broadinstitute.sting.gatk.walkers.bqsr.*; -import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.collections.NestedIntegerArray; -import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; - -import java.util.*; - -/** - * Unit tests for on-the-fly recalibration. - * - * @author Mauricio Carneiro - * @since 3/16/12 - */ -public class BaseRecalibrationUnitTest { - - private org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager dataManager; - - private ReadGroupCovariate rgCovariate; - private QualityScoreCovariate qsCovariate; - private ContextCovariate cxCovariate; - private CycleCovariate cyCovariate; - - private GATKSAMRecord read = ReadUtils.createRandomRead(10000); - private BaseRecalibration baseRecalibration; - private ReadCovariates readCovariates; - - - @BeforeClass - public void init() { - GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("rg"); - rg.setPlatform("illumina"); - read.setReadGroup(rg); - - byte[] quals = new byte[read.getReadLength()]; - for (int i = 0; i < read.getReadLength(); i++) - quals[i] = 20; - read.setBaseQualities(quals); - - RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); - List requiredCovariates = new ArrayList(); - List optionalCovariates = new ArrayList(); - - dataManager = new org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager(true, 4); - - rgCovariate = new ReadGroupCovariate(); - rgCovariate.initialize(RAC); - requiredCovariates.add(rgCovariate); - - qsCovariate = new QualityScoreCovariate(); - qsCovariate.initialize(RAC); - requiredCovariates.add(qsCovariate); - - cxCovariate = new ContextCovariate(); - cxCovariate.initialize(RAC); - optionalCovariates.add(cxCovariate); - cyCovariate = new CycleCovariate(); - cyCovariate.initialize(RAC); - optionalCovariates.add(cyCovariate); - - final Covariate[] requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()]; - int covariateIndex = 0; - for (final Covariate cov : requiredCovariates) - requestedCovariates[covariateIndex++] = cov; - for (final Covariate cov : optionalCovariates) - requestedCovariates[covariateIndex++] = cov; - - readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates); - - RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates); - final NestedIntegerArray rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); - final NestedIntegerArray qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); - - for (int i=0; i covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j); - covTable.put(newDatum, bitKeys[0], bitKeys[1], j, bitKeys[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j], EventType.BASE_SUBSTITUTION.index); - } - } - - dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE); - - List quantizedQuals = new ArrayList(); - List qualCounts = new ArrayList(); - for (byte i = 0; i <= QualityUtils.MAX_QUAL_SCORE; i++) { - quantizedQuals.add(i); - qualCounts.add(1L); - } - QuantizationInfo quantizationInfo = new QuantizationInfo(quantizedQuals, qualCounts); - quantizationInfo.noQuantization(); - baseRecalibration = new BaseRecalibration(quantizationInfo, recalibrationTables, requestedCovariates); - - } - - - @Test(enabled=false) - public void testGoldStandardComparison() { - for (int i = 0; i < read.getReadLength(); i++) { - int [] bitKey = readCovariates.getKeySet(i, EventType.BASE_SUBSTITUTION); - Object [] objKey = buildObjectKey(bitKey); - byte v2 = baseRecalibration.performSequentialQualityCalculation(bitKey, EventType.BASE_SUBSTITUTION); - byte v1 = goldStandardSequentialCalculation(objKey); - Assert.assertEquals(v2, v1); - } - } - - private Object[] buildObjectKey(final int[] bitKey) { - Object[] key = new Object[bitKey.length]; - key[0] = rgCovariate.formatKey(bitKey[0]); - key[1] = qsCovariate.formatKey(bitKey[1]); - key[2] = cxCovariate.formatKey(bitKey[2]); - key[3] = cyCovariate.formatKey(bitKey[3]); - return key; - } - - /** - * Implements a serial recalibration of the reads using the combinational table. - * First, we perform a positional recalibration, and then a subsequent dinuc correction. - * - * Given the full recalibration table, we perform the following preprocessing steps: - * - * - calculate the global quality score shift across all data [DeltaQ] - * - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift - * -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual - * - The final shift equation is: - * - * Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... ) - * - * @param key The list of Comparables that were calculated from the covariates - * @return A recalibrated quality score as a byte - */ - private byte goldStandardSequentialCalculation(final Object... key) { - - final byte qualFromRead = (byte) Integer.parseInt(key[1].toString()); - final Object[] readGroupCollapsedKey = new Object[1]; - final Object[] qualityScoreCollapsedKey = new Object[2]; - final Object[] covariateCollapsedKey = new Object[3]; - - // The global quality shift (over the read group only) - readGroupCollapsedKey[0] = key[0]; - final org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum globalRecalDatum = ((org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum) dataManager.getCollapsedTable(0).get(readGroupCollapsedKey)); - double globalDeltaQ = 0.0; - if (globalRecalDatum != null) { - final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality(); - final double aggregrateQReported = globalRecalDatum.getEstimatedQReported(); - globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported; - } - - // The shift in quality between reported and empirical - qualityScoreCollapsedKey[0] = key[0]; - qualityScoreCollapsedKey[1] = key[1]; - final org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum qReportedRecalDatum = ((org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum) dataManager.getCollapsedTable(1).get(qualityScoreCollapsedKey)); - double deltaQReported = 0.0; - if (qReportedRecalDatum != null) { - final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality(); - deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ; - } - - // The shift in quality due to each covariate by itself in turn - double deltaQCovariates = 0.0; - double deltaQCovariateEmpirical; - covariateCollapsedKey[0] = key[0]; - covariateCollapsedKey[1] = key[1]; - for (int iii = 2; iii < key.length; iii++) { - covariateCollapsedKey[2] = key[iii]; // The given covariate - final org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum covariateRecalDatum = ((org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum) dataManager.getCollapsedTable(iii).get(covariateCollapsedKey)); - if (covariateRecalDatum != null) { - deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality(); - deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported)); - } - } - - final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; - return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_RECALIBRATED_Q_SCORE); - - // Verbose printouts used to validate with old recalibrator - //if(key.contains(null)) { - // System.out.println( key + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d", - // qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte)); - //} - //else { - // System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d", - // key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) ); - //} - - //return newQualityByte; - } - - public static double calcEmpiricalQual(final int observations, final int errors) { - final int smoothing = 1; - final double doubleMismatches = (double) (errors + smoothing); - final double doubleObservations = (double) ( observations + smoothing ); - double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); - return Math.min(QualityUtils.MAX_RECALIBRATED_Q_SCORE, empiricalQual); - } -}