From f31bf37a6fd1f2ae3c0755e3c1ce4071a6e29310 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Sun, 3 Feb 2013 15:31:30 -0500 Subject: [PATCH] First step in better BQSR unit tests for covariates (not done yet): more test coverage in basic covariates, test logging several read groups/read lengths and more combinations simultaneously. Add basic Javadocs headers for PerReadAlleleLikehoodMap. --- .../recalibration/ReadCovariatesUnitTest.java | 75 +++++++++++-------- .../genotyper/PerReadAlleleLikelihoodMap.java | 40 ++++++++++ 2 files changed, 84 insertions(+), 31 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java index 8d94c4c4a..58e143fc2 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java @@ -54,6 +54,8 @@ import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; import org.testng.annotations.Test; +import java.util.Random; + /** * @author carneiro * @since 4/21/12 @@ -62,16 +64,8 @@ public class ReadCovariatesUnitTest { @Test(enabled = false) public void testCovariateGeneration() { + final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); final String RGID = "id"; - final int length = 10; - final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); - GATKSAMRecord read = ReadUtils.createRandomRead(length, false); - GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(RGID); - rg.setPlatform("illumina"); - read.setReadGroup(rg); - final byte[] mQuals = read.getBaseQualities(EventType.BASE_SUBSTITUTION); - final byte[] iQuals = read.getBaseQualities(EventType.BASE_INSERTION); - final byte[] dQuals = read.getBaseQualities(EventType.BASE_DELETION); ReadGroupCovariate rgCov = new ReadGroupCovariate(); QualityScoreCovariate qsCov = new QualityScoreCovariate(); @@ -89,33 +83,52 @@ public class ReadCovariatesUnitTest { requestedCovariates[2] = coCov; requestedCovariates[3] = cyCov; - ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates); + final int NUM_READS = 100; + final Random rnd = new Random(); - // check that the length is correct - Assert.assertEquals(rc.getMismatchesKeySet().length, length); - Assert.assertEquals(rc.getInsertionsKeySet().length, length); - Assert.assertEquals(rc.getDeletionsKeySet().length, length); + final String[] readGroups = {"RG1", "RG2", "RGbla"}; + for (int idx = 0; idx < NUM_READS; idx++) { + for (final String rgs : readGroups) { + final int length = 10 + rnd.nextInt(100); // random read length, at least 10 bp long + final GATKSAMRecord read = ReadUtils.createRandomRead(length, false); + final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(rgs); + rg.setPlatform("illumina"); + read.setReadGroup(rg); + read.setReadNegativeStrandFlag(rnd.nextBoolean()); + final byte[] mQuals = read.getBaseQualities(EventType.BASE_SUBSTITUTION); + final byte[] iQuals = read.getBaseQualities(EventType.BASE_INSERTION); + final byte[] dQuals = read.getBaseQualities(EventType.BASE_DELETION); + ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates); - for (int i = 0; i < length; i++) { - // check that read group is always the same - Assert.assertEquals(rgCov.formatKey(rc.getMismatchesKeySet(i)[0]), RGID); - Assert.assertEquals(rgCov.formatKey(rc.getInsertionsKeySet(i)[0]), RGID); - Assert.assertEquals(rgCov.formatKey(rc.getDeletionsKeySet(i)[0]), RGID); + // check that the length is correct + Assert.assertEquals(rc.getMismatchesKeySet().length, length); + Assert.assertEquals(rc.getInsertionsKeySet().length, length); + Assert.assertEquals(rc.getDeletionsKeySet().length, length); - // check quality score - Assert.assertEquals(qsCov.formatKey(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]); - Assert.assertEquals(qsCov.formatKey(rc.getInsertionsKeySet(i)[1]), "" + iQuals[i]); - Assert.assertEquals(qsCov.formatKey(rc.getDeletionsKeySet(i)[1]), "" + dQuals[i]); + for (int i = 0; i < length; i++) { + // check that read group is always the same + Assert.assertEquals(rgCov.formatKey(rc.getMismatchesKeySet(i)[0]), rgs); + Assert.assertEquals(rgCov.formatKey(rc.getInsertionsKeySet(i)[0]), rgs); + Assert.assertEquals(rgCov.formatKey(rc.getDeletionsKeySet(i)[0]), rgs); - // check context - Assert.assertEquals(coCov.formatKey(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE)); - Assert.assertEquals(coCov.formatKey(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE)); - Assert.assertEquals(coCov.formatKey(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE)); + // check quality score + Assert.assertEquals(qsCov.formatKey(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]); + Assert.assertEquals(qsCov.formatKey(rc.getInsertionsKeySet(i)[1]), "" + iQuals[i]); + Assert.assertEquals(qsCov.formatKey(rc.getDeletionsKeySet(i)[1]), "" + dQuals[i]); + + // check context + Assert.assertEquals(coCov.formatKey(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE)); + Assert.assertEquals(coCov.formatKey(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE)); + Assert.assertEquals(coCov.formatKey(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE)); + + // check cycle + Assert.assertEquals(cyCov.formatKey(rc.getMismatchesKeySet(i)[3]), "" + (i+1)); + Assert.assertEquals(cyCov.formatKey(rc.getInsertionsKeySet(i)[3]), "" + (i+1)); + Assert.assertEquals(cyCov.formatKey(rc.getDeletionsKeySet(i)[3]), "" + (i+1)); + } + + } - // check cycle - Assert.assertEquals(cyCov.formatKey(rc.getMismatchesKeySet(i)[3]), "" + (i+1)); - Assert.assertEquals(cyCov.formatKey(rc.getInsertionsKeySet(i)[3]), "" + (i+1)); - Assert.assertEquals(cyCov.formatKey(rc.getDeletionsKeySet(i)[3]), "" + (i+1)); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java index 46a3f58d0..85d77d946 100644 --- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.genotyper; +import com.google.java.contract.Ensures; import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -35,8 +36,13 @@ import org.broadinstitute.variant.variantcontext.Allele; import java.io.PrintStream; import java.util.*; +/** + * Wrapper class that holds a set of maps of the form (Read -> Map(Allele->Double)) + * For each read, this holds underlying alleles represented by an aligned read, and corresponding relative likelihood. + */ public class PerReadAlleleLikelihoodMap { + public static final double INFORMATIVE_LIKELIHOOD_THRESHOLD = 0.2; protected List alleles; @@ -47,6 +53,12 @@ public class PerReadAlleleLikelihoodMap { alleles = new ArrayList(); } + /** + * Adds a read, allele and corresponding likelihood to map + * @param read SAM record to add + * @param a corresponding allele + * @param likelihood corresponding likelihood + */ public void add(GATKSAMRecord read, Allele a, Double likelihood) { Map likelihoodMap; if (likelihoodReadMap.containsKey(read)){ @@ -97,15 +109,33 @@ public class PerReadAlleleLikelihoodMap { likelihoodReadMap.remove(read); } + @Ensures("result >=0") public int size() { return likelihoodReadMap.size(); } + /** + * Helper function to add the read underneath a pileup element to the map + * @param p Pileup element + * @param a Corresponding allele + * @param likelihood Allele likelihood + */ public void add(PileupElement p, Allele a, Double likelihood) { + if (p==null || p.getRead()==null || a == null ) + throw new IllegalArgumentException("Invalid parameters passed to PerReadAlleleLikelihoodMap.add"); add(p.getRead(), a, likelihood); } + /** + * Does the current map contain the key associated with a particular SAM record in pileup? + * @param p Pileup element + * @return + */ + @Ensures("result != null") public boolean containsPileupElement(PileupElement p) { + if (p==null ) + throw new IllegalArgumentException("Invalid pileup element"); + return likelihoodReadMap.containsKey(p.getRead()); } @@ -140,11 +170,21 @@ public class PerReadAlleleLikelihoodMap { return likelihoodReadMap.get(p.getRead()); } + + /** + * For a given alleleMap, return most likely allele, i.e. the one with highest associated likelihood + * @param alleleMap Underlying allele map + * @return Most likely allele. If all alleles are equally likely, returns a no-call allele. + */ + @Ensures("result != null") public static Allele getMostLikelyAllele( final Map alleleMap ) { double maxLike = Double.NEGATIVE_INFINITY; double prevMaxLike = Double.NEGATIVE_INFINITY; Allele mostLikelyAllele = Allele.NO_CALL; + if (alleleMap==null) + throw new IllegalArgumentException("alleleMap in getMostLikelyAllele() method can't be null"); + for (final Map.Entry el : alleleMap.entrySet()) { if (el.getValue() > maxLike) { prevMaxLike = maxLike;