First step in better BQSR unit tests for covariates (not done yet): more test coverage in basic covariates, test logging several read groups/read lengths and more combinations simultaneously.
Add basic Javadocs headers for PerReadAlleleLikehoodMap.
This commit is contained in:
parent
e504806b91
commit
f31bf37a6f
|
|
@ -54,6 +54,8 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
|
|||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* @author carneiro
|
||||
* @since 4/21/12
|
||||
|
|
@ -62,16 +64,8 @@ public class ReadCovariatesUnitTest {
|
|||
|
||||
@Test(enabled = false)
|
||||
public void testCovariateGeneration() {
|
||||
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
final String RGID = "id";
|
||||
final int length = 10;
|
||||
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
GATKSAMRecord read = ReadUtils.createRandomRead(length, false);
|
||||
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(RGID);
|
||||
rg.setPlatform("illumina");
|
||||
read.setReadGroup(rg);
|
||||
final byte[] mQuals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
|
||||
final byte[] iQuals = read.getBaseQualities(EventType.BASE_INSERTION);
|
||||
final byte[] dQuals = read.getBaseQualities(EventType.BASE_DELETION);
|
||||
|
||||
ReadGroupCovariate rgCov = new ReadGroupCovariate();
|
||||
QualityScoreCovariate qsCov = new QualityScoreCovariate();
|
||||
|
|
@ -89,33 +83,52 @@ public class ReadCovariatesUnitTest {
|
|||
requestedCovariates[2] = coCov;
|
||||
requestedCovariates[3] = cyCov;
|
||||
|
||||
ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);
|
||||
final int NUM_READS = 100;
|
||||
final Random rnd = new Random();
|
||||
|
||||
// check that the length is correct
|
||||
Assert.assertEquals(rc.getMismatchesKeySet().length, length);
|
||||
Assert.assertEquals(rc.getInsertionsKeySet().length, length);
|
||||
Assert.assertEquals(rc.getDeletionsKeySet().length, length);
|
||||
final String[] readGroups = {"RG1", "RG2", "RGbla"};
|
||||
for (int idx = 0; idx < NUM_READS; idx++) {
|
||||
for (final String rgs : readGroups) {
|
||||
final int length = 10 + rnd.nextInt(100); // random read length, at least 10 bp long
|
||||
final GATKSAMRecord read = ReadUtils.createRandomRead(length, false);
|
||||
final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(rgs);
|
||||
rg.setPlatform("illumina");
|
||||
read.setReadGroup(rg);
|
||||
read.setReadNegativeStrandFlag(rnd.nextBoolean());
|
||||
final byte[] mQuals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
|
||||
final byte[] iQuals = read.getBaseQualities(EventType.BASE_INSERTION);
|
||||
final byte[] dQuals = read.getBaseQualities(EventType.BASE_DELETION);
|
||||
ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);
|
||||
|
||||
for (int i = 0; i < length; i++) {
|
||||
// check that read group is always the same
|
||||
Assert.assertEquals(rgCov.formatKey(rc.getMismatchesKeySet(i)[0]), RGID);
|
||||
Assert.assertEquals(rgCov.formatKey(rc.getInsertionsKeySet(i)[0]), RGID);
|
||||
Assert.assertEquals(rgCov.formatKey(rc.getDeletionsKeySet(i)[0]), RGID);
|
||||
// check that the length is correct
|
||||
Assert.assertEquals(rc.getMismatchesKeySet().length, length);
|
||||
Assert.assertEquals(rc.getInsertionsKeySet().length, length);
|
||||
Assert.assertEquals(rc.getDeletionsKeySet().length, length);
|
||||
|
||||
// check quality score
|
||||
Assert.assertEquals(qsCov.formatKey(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]);
|
||||
Assert.assertEquals(qsCov.formatKey(rc.getInsertionsKeySet(i)[1]), "" + iQuals[i]);
|
||||
Assert.assertEquals(qsCov.formatKey(rc.getDeletionsKeySet(i)[1]), "" + dQuals[i]);
|
||||
for (int i = 0; i < length; i++) {
|
||||
// check that read group is always the same
|
||||
Assert.assertEquals(rgCov.formatKey(rc.getMismatchesKeySet(i)[0]), rgs);
|
||||
Assert.assertEquals(rgCov.formatKey(rc.getInsertionsKeySet(i)[0]), rgs);
|
||||
Assert.assertEquals(rgCov.formatKey(rc.getDeletionsKeySet(i)[0]), rgs);
|
||||
|
||||
// check context
|
||||
Assert.assertEquals(coCov.formatKey(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE));
|
||||
Assert.assertEquals(coCov.formatKey(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE));
|
||||
Assert.assertEquals(coCov.formatKey(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE));
|
||||
// check quality score
|
||||
Assert.assertEquals(qsCov.formatKey(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]);
|
||||
Assert.assertEquals(qsCov.formatKey(rc.getInsertionsKeySet(i)[1]), "" + iQuals[i]);
|
||||
Assert.assertEquals(qsCov.formatKey(rc.getDeletionsKeySet(i)[1]), "" + dQuals[i]);
|
||||
|
||||
// check context
|
||||
Assert.assertEquals(coCov.formatKey(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE));
|
||||
Assert.assertEquals(coCov.formatKey(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE));
|
||||
Assert.assertEquals(coCov.formatKey(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE));
|
||||
|
||||
// check cycle
|
||||
Assert.assertEquals(cyCov.formatKey(rc.getMismatchesKeySet(i)[3]), "" + (i+1));
|
||||
Assert.assertEquals(cyCov.formatKey(rc.getInsertionsKeySet(i)[3]), "" + (i+1));
|
||||
Assert.assertEquals(cyCov.formatKey(rc.getDeletionsKeySet(i)[3]), "" + (i+1));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// check cycle
|
||||
Assert.assertEquals(cyCov.formatKey(rc.getMismatchesKeySet(i)[3]), "" + (i+1));
|
||||
Assert.assertEquals(cyCov.formatKey(rc.getInsertionsKeySet(i)[3]), "" + (i+1));
|
||||
Assert.assertEquals(cyCov.formatKey(rc.getDeletionsKeySet(i)[3]), "" + (i+1));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.utils.genotyper;
|
||||
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -35,8 +36,13 @@ import org.broadinstitute.variant.variantcontext.Allele;
|
|||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Wrapper class that holds a set of maps of the form (Read -> Map(Allele->Double))
|
||||
* For each read, this holds underlying alleles represented by an aligned read, and corresponding relative likelihood.
|
||||
*/
|
||||
public class PerReadAlleleLikelihoodMap {
|
||||
|
||||
|
||||
public static final double INFORMATIVE_LIKELIHOOD_THRESHOLD = 0.2;
|
||||
|
||||
protected List<Allele> alleles;
|
||||
|
|
@ -47,6 +53,12 @@ public class PerReadAlleleLikelihoodMap {
|
|||
alleles = new ArrayList<Allele>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a read, allele and corresponding likelihood to map
|
||||
* @param read SAM record to add
|
||||
* @param a corresponding allele
|
||||
* @param likelihood corresponding likelihood
|
||||
*/
|
||||
public void add(GATKSAMRecord read, Allele a, Double likelihood) {
|
||||
Map<Allele,Double> likelihoodMap;
|
||||
if (likelihoodReadMap.containsKey(read)){
|
||||
|
|
@ -97,15 +109,33 @@ public class PerReadAlleleLikelihoodMap {
|
|||
likelihoodReadMap.remove(read);
|
||||
}
|
||||
|
||||
@Ensures("result >=0")
|
||||
public int size() {
|
||||
return likelihoodReadMap.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to add the read underneath a pileup element to the map
|
||||
* @param p Pileup element
|
||||
* @param a Corresponding allele
|
||||
* @param likelihood Allele likelihood
|
||||
*/
|
||||
public void add(PileupElement p, Allele a, Double likelihood) {
|
||||
if (p==null || p.getRead()==null || a == null )
|
||||
throw new IllegalArgumentException("Invalid parameters passed to PerReadAlleleLikelihoodMap.add");
|
||||
add(p.getRead(), a, likelihood);
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the current map contain the key associated with a particular SAM record in pileup?
|
||||
* @param p Pileup element
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public boolean containsPileupElement(PileupElement p) {
|
||||
if (p==null )
|
||||
throw new IllegalArgumentException("Invalid pileup element");
|
||||
|
||||
return likelihoodReadMap.containsKey(p.getRead());
|
||||
}
|
||||
|
||||
|
|
@ -140,11 +170,21 @@ public class PerReadAlleleLikelihoodMap {
|
|||
return likelihoodReadMap.get(p.getRead());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* For a given alleleMap, return most likely allele, i.e. the one with highest associated likelihood
|
||||
* @param alleleMap Underlying allele map
|
||||
* @return Most likely allele. If all alleles are equally likely, returns a no-call allele.
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public static Allele getMostLikelyAllele( final Map<Allele,Double> alleleMap ) {
|
||||
double maxLike = Double.NEGATIVE_INFINITY;
|
||||
double prevMaxLike = Double.NEGATIVE_INFINITY;
|
||||
Allele mostLikelyAllele = Allele.NO_CALL;
|
||||
|
||||
if (alleleMap==null)
|
||||
throw new IllegalArgumentException("alleleMap in getMostLikelyAllele() method can't be null");
|
||||
|
||||
for (final Map.Entry<Allele,Double> el : alleleMap.entrySet()) {
|
||||
if (el.getValue() > maxLike) {
|
||||
prevMaxLike = maxLike;
|
||||
|
|
|
|||
Loading…
Reference in New Issue