Complete overhaul of the BQSRv2 integration tests. Much more comprehensive. Still need to deal with a few tests that need some modifications before I'm done, but I'll take care of that sometime tomorrow.
This commit is contained in:
parent
a003148d50
commit
f657b8bda8
|
|
@ -1,35 +1,126 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* @author carneiro
|
||||
* @since 3/27/12
|
||||
* @author ebanks
|
||||
* @since 7/16/12
|
||||
*/
|
||||
public class BQSRIntegrationTest extends WalkerTest {
|
||||
@Test(enabled = true)
|
||||
public void recalibrateTest() {
|
||||
String REF = "public/testdata/exampleFASTA.fasta";
|
||||
String BAM = "public/testdata/exampleBAM.bam";
|
||||
String DBSNP = "public/testdata/exampleDBSNP.vcf";
|
||||
String base = String.format("-T BaseQualityScoreRecalibrator -R %s -I %s -knownSites %s", REF, BAM, DBSNP) + " -o %s ";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("583517f4c0b480268edf3c36755e07a8"));
|
||||
executeTest("recalibrateTest", spec);
|
||||
|
||||
private static class BQSRTest {
|
||||
final String reference;
|
||||
final String interval;
|
||||
final String bam;
|
||||
final String args;
|
||||
final String md5;
|
||||
|
||||
private BQSRTest(String reference, String bam, String interval, String args, String md5) {
|
||||
this.reference = reference;
|
||||
this.bam = bam;
|
||||
this.interval = interval;
|
||||
this.args = args;
|
||||
this.md5 = md5;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("BQSR(bam='%s', args='%s')", bam, args);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void onTheFlyRecalibrationTest() {
|
||||
String REF = "public/testdata/exampleFASTA.fasta";
|
||||
String BAM = "public/testdata/exampleBAM.bam";
|
||||
String GRP = "public/testdata/exampleGRP.grp";
|
||||
String base = String.format("-T PrintReads -qq -1 -R %s -I %s -BQSR %s", REF, BAM, GRP) + " -o %s ";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("690c7ab414bd39de72a635aedc1fcd66"));
|
||||
executeTest("onTheFlyRecalibrationTest", spec);
|
||||
@DataProvider(name = "BQSRTest")
|
||||
public Object[][] createBQSRTestData() {
|
||||
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
|
||||
String HiSeqInterval = "chr1:10,000,000-10,100,000";
|
||||
return new Object[][]{
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "93c63489bf274f68e0378a6c296b2948")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "3bb9f93dc5a2eb879099402faf352a01")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "c1321f6548bd240b174a8afe62bb07b9")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "d9512cebf54ea120539059976b33d1ca")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "b29f83d42fc796b60ebe14a768e4f3af")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "afa916b4d3a57d62b9b3e4f64b97f428")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "5e305384496fb5ff64f7494092c939cf")},
|
||||
//{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "1:10,000,000-10,200,000", "", "")},
|
||||
//{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "")},
|
||||
//{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "1:10,000,000-10,200,000", "", "")},
|
||||
//{new BQSRTest(b36KGReference, validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1:10,800,000-10,810,000", "", "")},
|
||||
//{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "1:1-1,000", " -OQ", "")},
|
||||
//{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "")},
|
||||
//{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "")},
|
||||
//{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "recalibrationTest.bed", "")},
|
||||
};
|
||||
}
|
||||
|
||||
// TODO -- add test for -qq and -NIQ arguments
|
||||
@Test(dataProvider = "BQSRTest")
|
||||
public void testBQSR(BQSRTest params) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T BaseQualityScoreRecalibrator" +
|
||||
" -R " + params.reference +
|
||||
" -I " + params.bam +
|
||||
" -L " + params.interval +
|
||||
params.args +
|
||||
" --no_plots" +
|
||||
" -knownSites " + (params.reference.equals(b36KGReference) ? b36dbSNP129 : hg18dbSNP132) +
|
||||
" -o %s",
|
||||
Arrays.asList(params.md5));
|
||||
executeTest("testBQSR-"+params.args, spec).getFirst();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBQSRFailWithoutDBSNP() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" -T BaseQualityScoreRecalibrator" +
|
||||
" -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" +
|
||||
" -L 1:10,000,000-10,200,000" +
|
||||
" --no_plots" +
|
||||
" -o %s",
|
||||
1, // just one output file
|
||||
UserException.CommandLineException.class);
|
||||
executeTest("testBQSRFailWithoutDBSNP", spec);
|
||||
}
|
||||
|
||||
private static class PRTest {
|
||||
final String args;
|
||||
final String md5;
|
||||
|
||||
private PRTest(String args, String md5) {
|
||||
this.args = args;
|
||||
this.md5 = md5;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("PrintReads(args='%s')", args);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "PRTest")
|
||||
public Object[][] createPRTestData() {
|
||||
return new Object[][]{
|
||||
{new PRTest("", "d2d6ed8667cdba7e56f5db97d6262676")},
|
||||
{new PRTest(" -qq -1", "b7053d3d67aba6d8892f0a60f0ded338")},
|
||||
{new PRTest(" -qq 6", "bfbf0855185b2b70aa35237fb71e4487")},
|
||||
{new PRTest(" -NIQ", "66aa65223f192ee39c1773aa187fd493")}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "PRTest")
|
||||
public void testPR(PRTest params) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T PrintReads" +
|
||||
" -R " + hg18Reference +
|
||||
" -I " + privateTestDir + "HiSeq.1mb.1RG.bam" +
|
||||
" -BQSR " + privateTestDir + "HiSeq.1mb.1RG.table" +
|
||||
params.args +
|
||||
" -o %s",
|
||||
Arrays.asList(params.md5));
|
||||
executeTest("testPrintReads-"+params.args, spec).getFirst();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
|
||||
// store the original bases and then write Ns over low quality ones
|
||||
final byte[] originalBases = read.getReadBases();
|
||||
final byte[] originalBases = read.getReadBases().clone();
|
||||
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
|
||||
|
||||
final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag();
|
||||
|
|
|
|||
|
|
@ -1,218 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Unit tests for on-the-fly recalibration.
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 3/16/12
|
||||
*/
|
||||
public class BaseRecalibrationUnitTest {
|
||||
|
||||
private org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager dataManager;
|
||||
|
||||
private ReadGroupCovariate rgCovariate;
|
||||
private QualityScoreCovariate qsCovariate;
|
||||
private ContextCovariate cxCovariate;
|
||||
private CycleCovariate cyCovariate;
|
||||
|
||||
private GATKSAMRecord read = ReadUtils.createRandomRead(10000);
|
||||
private BaseRecalibration baseRecalibration;
|
||||
private ReadCovariates readCovariates;
|
||||
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("rg");
|
||||
rg.setPlatform("illumina");
|
||||
read.setReadGroup(rg);
|
||||
|
||||
byte[] quals = new byte[read.getReadLength()];
|
||||
for (int i = 0; i < read.getReadLength(); i++)
|
||||
quals[i] = 20;
|
||||
read.setBaseQualities(quals);
|
||||
|
||||
RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
List<Covariate> requiredCovariates = new ArrayList<Covariate>();
|
||||
List<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
|
||||
dataManager = new org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager(true, 4);
|
||||
|
||||
rgCovariate = new ReadGroupCovariate();
|
||||
rgCovariate.initialize(RAC);
|
||||
requiredCovariates.add(rgCovariate);
|
||||
|
||||
qsCovariate = new QualityScoreCovariate();
|
||||
qsCovariate.initialize(RAC);
|
||||
requiredCovariates.add(qsCovariate);
|
||||
|
||||
cxCovariate = new ContextCovariate();
|
||||
cxCovariate.initialize(RAC);
|
||||
optionalCovariates.add(cxCovariate);
|
||||
cyCovariate = new CycleCovariate();
|
||||
cyCovariate.initialize(RAC);
|
||||
optionalCovariates.add(cyCovariate);
|
||||
|
||||
final Covariate[] requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
|
||||
int covariateIndex = 0;
|
||||
for (final Covariate cov : requiredCovariates)
|
||||
requestedCovariates[covariateIndex++] = cov;
|
||||
for (final Covariate cov : optionalCovariates)
|
||||
requestedCovariates[covariateIndex++] = cov;
|
||||
|
||||
readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||
|
||||
RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates);
|
||||
final NestedIntegerArray<RecalDatum> rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
|
||||
for (int i=0; i<read.getReadLength(); i++) {
|
||||
final int[] bitKeys = readCovariates.getMismatchesKeySet(i);
|
||||
final Object[] objKey = buildObjectKey(bitKeys);
|
||||
|
||||
Random random = new Random();
|
||||
final int nObservations = random.nextInt(10000);
|
||||
final int nErrors = random.nextInt(10);
|
||||
final byte estimatedQReported = 30;
|
||||
double empiricalQuality = calcEmpiricalQual(nObservations, nErrors);
|
||||
|
||||
org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum oldDatum = new org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
|
||||
dataManager.addToAllTables(objKey, oldDatum, QualityUtils.MIN_USABLE_Q_SCORE);
|
||||
|
||||
RecalDatum newDatum = new RecalDatum(nObservations, nErrors, estimatedQReported);
|
||||
|
||||
rgTable.put(newDatum, bitKeys[0], EventType.BASE_SUBSTITUTION.index);
|
||||
qualTable.put(newDatum, bitKeys[0], bitKeys[1], EventType.BASE_SUBSTITUTION.index);
|
||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||
final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j);
|
||||
covTable.put(newDatum, bitKeys[0], bitKeys[1], j, bitKeys[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j], EventType.BASE_SUBSTITUTION.index);
|
||||
}
|
||||
}
|
||||
|
||||
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
|
||||
List<Byte> quantizedQuals = new ArrayList<Byte>();
|
||||
List<Long> qualCounts = new ArrayList<Long>();
|
||||
for (byte i = 0; i <= QualityUtils.MAX_QUAL_SCORE; i++) {
|
||||
quantizedQuals.add(i);
|
||||
qualCounts.add(1L);
|
||||
}
|
||||
QuantizationInfo quantizationInfo = new QuantizationInfo(quantizedQuals, qualCounts);
|
||||
quantizationInfo.noQuantization();
|
||||
baseRecalibration = new BaseRecalibration(quantizationInfo, recalibrationTables, requestedCovariates);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test(enabled=false)
|
||||
public void testGoldStandardComparison() {
|
||||
for (int i = 0; i < read.getReadLength(); i++) {
|
||||
int [] bitKey = readCovariates.getKeySet(i, EventType.BASE_SUBSTITUTION);
|
||||
Object [] objKey = buildObjectKey(bitKey);
|
||||
byte v2 = baseRecalibration.performSequentialQualityCalculation(bitKey, EventType.BASE_SUBSTITUTION);
|
||||
byte v1 = goldStandardSequentialCalculation(objKey);
|
||||
Assert.assertEquals(v2, v1);
|
||||
}
|
||||
}
|
||||
|
||||
private Object[] buildObjectKey(final int[] bitKey) {
|
||||
Object[] key = new Object[bitKey.length];
|
||||
key[0] = rgCovariate.formatKey(bitKey[0]);
|
||||
key[1] = qsCovariate.formatKey(bitKey[1]);
|
||||
key[2] = cxCovariate.formatKey(bitKey[2]);
|
||||
key[3] = cyCovariate.formatKey(bitKey[3]);
|
||||
return key;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements a serial recalibration of the reads using the combinational table.
|
||||
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
|
||||
*
|
||||
* Given the full recalibration table, we perform the following preprocessing steps:
|
||||
*
|
||||
* - calculate the global quality score shift across all data [DeltaQ]
|
||||
* - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
|
||||
* -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
|
||||
* - The final shift equation is:
|
||||
*
|
||||
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
|
||||
*
|
||||
* @param key The list of Comparables that were calculated from the covariates
|
||||
* @return A recalibrated quality score as a byte
|
||||
*/
|
||||
private byte goldStandardSequentialCalculation(final Object... key) {
|
||||
|
||||
final byte qualFromRead = (byte) Integer.parseInt(key[1].toString());
|
||||
final Object[] readGroupCollapsedKey = new Object[1];
|
||||
final Object[] qualityScoreCollapsedKey = new Object[2];
|
||||
final Object[] covariateCollapsedKey = new Object[3];
|
||||
|
||||
// The global quality shift (over the read group only)
|
||||
readGroupCollapsedKey[0] = key[0];
|
||||
final org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum globalRecalDatum = ((org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum) dataManager.getCollapsedTable(0).get(readGroupCollapsedKey));
|
||||
double globalDeltaQ = 0.0;
|
||||
if (globalRecalDatum != null) {
|
||||
final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
|
||||
final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
|
||||
globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
|
||||
}
|
||||
|
||||
// The shift in quality between reported and empirical
|
||||
qualityScoreCollapsedKey[0] = key[0];
|
||||
qualityScoreCollapsedKey[1] = key[1];
|
||||
final org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum qReportedRecalDatum = ((org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum) dataManager.getCollapsedTable(1).get(qualityScoreCollapsedKey));
|
||||
double deltaQReported = 0.0;
|
||||
if (qReportedRecalDatum != null) {
|
||||
final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
|
||||
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
||||
}
|
||||
|
||||
// The shift in quality due to each covariate by itself in turn
|
||||
double deltaQCovariates = 0.0;
|
||||
double deltaQCovariateEmpirical;
|
||||
covariateCollapsedKey[0] = key[0];
|
||||
covariateCollapsedKey[1] = key[1];
|
||||
for (int iii = 2; iii < key.length; iii++) {
|
||||
covariateCollapsedKey[2] = key[iii]; // The given covariate
|
||||
final org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum covariateRecalDatum = ((org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum) dataManager.getCollapsedTable(iii).get(covariateCollapsedKey));
|
||||
if (covariateRecalDatum != null) {
|
||||
deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
|
||||
deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
|
||||
}
|
||||
}
|
||||
|
||||
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
||||
return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
|
||||
// Verbose printouts used to validate with old recalibrator
|
||||
//if(key.contains(null)) {
|
||||
// System.out.println( key + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d",
|
||||
// qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte));
|
||||
//}
|
||||
//else {
|
||||
// System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d",
|
||||
// key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
|
||||
//}
|
||||
|
||||
//return newQualityByte;
|
||||
}
|
||||
|
||||
public static double calcEmpiricalQual(final int observations, final int errors) {
|
||||
final int smoothing = 1;
|
||||
final double doubleMismatches = (double) (errors + smoothing);
|
||||
final double doubleObservations = (double) ( observations + smoothing );
|
||||
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||
return Math.min(QualityUtils.MAX_RECALIBRATED_Q_SCORE, empiricalQual);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue