First step in better BQSR unit tests for covariates (not done yet): more test coverage in basic covariates, test logging several read groups/read lengths and more combinations simultaneously.

Add basic Javadocs headers for PerReadAlleleLikehoodMap.
This commit is contained in:
Guillermo del Angel 2013-02-03 15:31:30 -05:00
parent e504806b91
commit f31bf37a6f
2 changed files with 84 additions and 31 deletions

View File

@ -54,6 +54,8 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.util.Random;
/**
* @author carneiro
* @since 4/21/12
@ -62,16 +64,8 @@ public class ReadCovariatesUnitTest {
@Test(enabled = false)
public void testCovariateGeneration() {
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
final String RGID = "id";
final int length = 10;
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
GATKSAMRecord read = ReadUtils.createRandomRead(length, false);
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(RGID);
rg.setPlatform("illumina");
read.setReadGroup(rg);
final byte[] mQuals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
final byte[] iQuals = read.getBaseQualities(EventType.BASE_INSERTION);
final byte[] dQuals = read.getBaseQualities(EventType.BASE_DELETION);
ReadGroupCovariate rgCov = new ReadGroupCovariate();
QualityScoreCovariate qsCov = new QualityScoreCovariate();
@ -89,33 +83,52 @@ public class ReadCovariatesUnitTest {
requestedCovariates[2] = coCov;
requestedCovariates[3] = cyCov;
ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);
final int NUM_READS = 100;
final Random rnd = new Random();
// check that the length is correct
Assert.assertEquals(rc.getMismatchesKeySet().length, length);
Assert.assertEquals(rc.getInsertionsKeySet().length, length);
Assert.assertEquals(rc.getDeletionsKeySet().length, length);
final String[] readGroups = {"RG1", "RG2", "RGbla"};
for (int idx = 0; idx < NUM_READS; idx++) {
for (final String rgs : readGroups) {
final int length = 10 + rnd.nextInt(100); // random read length, at least 10 bp long
final GATKSAMRecord read = ReadUtils.createRandomRead(length, false);
final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(rgs);
rg.setPlatform("illumina");
read.setReadGroup(rg);
read.setReadNegativeStrandFlag(rnd.nextBoolean());
final byte[] mQuals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
final byte[] iQuals = read.getBaseQualities(EventType.BASE_INSERTION);
final byte[] dQuals = read.getBaseQualities(EventType.BASE_DELETION);
ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);
for (int i = 0; i < length; i++) {
// check that read group is always the same
Assert.assertEquals(rgCov.formatKey(rc.getMismatchesKeySet(i)[0]), RGID);
Assert.assertEquals(rgCov.formatKey(rc.getInsertionsKeySet(i)[0]), RGID);
Assert.assertEquals(rgCov.formatKey(rc.getDeletionsKeySet(i)[0]), RGID);
// check that the length is correct
Assert.assertEquals(rc.getMismatchesKeySet().length, length);
Assert.assertEquals(rc.getInsertionsKeySet().length, length);
Assert.assertEquals(rc.getDeletionsKeySet().length, length);
// check quality score
Assert.assertEquals(qsCov.formatKey(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]);
Assert.assertEquals(qsCov.formatKey(rc.getInsertionsKeySet(i)[1]), "" + iQuals[i]);
Assert.assertEquals(qsCov.formatKey(rc.getDeletionsKeySet(i)[1]), "" + dQuals[i]);
for (int i = 0; i < length; i++) {
// check that read group is always the same
Assert.assertEquals(rgCov.formatKey(rc.getMismatchesKeySet(i)[0]), rgs);
Assert.assertEquals(rgCov.formatKey(rc.getInsertionsKeySet(i)[0]), rgs);
Assert.assertEquals(rgCov.formatKey(rc.getDeletionsKeySet(i)[0]), rgs);
// check context
Assert.assertEquals(coCov.formatKey(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE));
Assert.assertEquals(coCov.formatKey(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE));
Assert.assertEquals(coCov.formatKey(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE));
// check quality score
Assert.assertEquals(qsCov.formatKey(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]);
Assert.assertEquals(qsCov.formatKey(rc.getInsertionsKeySet(i)[1]), "" + iQuals[i]);
Assert.assertEquals(qsCov.formatKey(rc.getDeletionsKeySet(i)[1]), "" + dQuals[i]);
// check context
Assert.assertEquals(coCov.formatKey(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE));
Assert.assertEquals(coCov.formatKey(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE));
Assert.assertEquals(coCov.formatKey(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE));
// check cycle
Assert.assertEquals(cyCov.formatKey(rc.getMismatchesKeySet(i)[3]), "" + (i+1));
Assert.assertEquals(cyCov.formatKey(rc.getInsertionsKeySet(i)[3]), "" + (i+1));
Assert.assertEquals(cyCov.formatKey(rc.getDeletionsKeySet(i)[3]), "" + (i+1));
}
}
// check cycle
Assert.assertEquals(cyCov.formatKey(rc.getMismatchesKeySet(i)[3]), "" + (i+1));
Assert.assertEquals(cyCov.formatKey(rc.getInsertionsKeySet(i)[3]), "" + (i+1));
Assert.assertEquals(cyCov.formatKey(rc.getDeletionsKeySet(i)[3]), "" + (i+1));
}
}

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.utils.genotyper;
import com.google.java.contract.Ensures;
import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -35,8 +36,13 @@ import org.broadinstitute.variant.variantcontext.Allele;
import java.io.PrintStream;
import java.util.*;
/**
* Wrapper class that holds a set of maps of the form (Read -> Map(Allele->Double))
* For each read, this holds underlying alleles represented by an aligned read, and corresponding relative likelihood.
*/
public class PerReadAlleleLikelihoodMap {
public static final double INFORMATIVE_LIKELIHOOD_THRESHOLD = 0.2;
protected List<Allele> alleles;
@ -47,6 +53,12 @@ public class PerReadAlleleLikelihoodMap {
alleles = new ArrayList<Allele>();
}
/**
* Adds a read, allele and corresponding likelihood to map
* @param read SAM record to add
* @param a corresponding allele
* @param likelihood corresponding likelihood
*/
public void add(GATKSAMRecord read, Allele a, Double likelihood) {
Map<Allele,Double> likelihoodMap;
if (likelihoodReadMap.containsKey(read)){
@ -97,15 +109,33 @@ public class PerReadAlleleLikelihoodMap {
likelihoodReadMap.remove(read);
}
@Ensures("result >=0")
public int size() {
return likelihoodReadMap.size();
}
/**
* Helper function to add the read underneath a pileup element to the map
* @param p Pileup element
* @param a Corresponding allele
* @param likelihood Allele likelihood
*/
public void add(PileupElement p, Allele a, Double likelihood) {
if (p==null || p.getRead()==null || a == null )
throw new IllegalArgumentException("Invalid parameters passed to PerReadAlleleLikelihoodMap.add");
add(p.getRead(), a, likelihood);
}
/**
* Does the current map contain the key associated with a particular SAM record in pileup?
* @param p Pileup element
* @return
*/
@Ensures("result != null")
public boolean containsPileupElement(PileupElement p) {
if (p==null )
throw new IllegalArgumentException("Invalid pileup element");
return likelihoodReadMap.containsKey(p.getRead());
}
@ -140,11 +170,21 @@ public class PerReadAlleleLikelihoodMap {
return likelihoodReadMap.get(p.getRead());
}
/**
* For a given alleleMap, return most likely allele, i.e. the one with highest associated likelihood
* @param alleleMap Underlying allele map
* @return Most likely allele. If all alleles are equally likely, returns a no-call allele.
*/
@Ensures("result != null")
public static Allele getMostLikelyAllele( final Map<Allele,Double> alleleMap ) {
double maxLike = Double.NEGATIVE_INFINITY;
double prevMaxLike = Double.NEGATIVE_INFINITY;
Allele mostLikelyAllele = Allele.NO_CALL;
if (alleleMap==null)
throw new IllegalArgumentException("alleleMap in getMostLikelyAllele() method can't be null");
for (final Map.Entry<Allele,Double> el : alleleMap.entrySet()) {
if (el.getValue() > maxLike) {
prevMaxLike = maxLike;