First step in better BQSR unit tests for covariates (not done yet): more test coverage in basic covariates, test logging several read groups/read lengths and more combinations simultaneously.
Add basic Javadocs headers for PerReadAlleleLikehoodMap.
This commit is contained in:
parent
e504806b91
commit
f31bf37a6f
|
|
@ -54,6 +54,8 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author carneiro
|
* @author carneiro
|
||||||
* @since 4/21/12
|
* @since 4/21/12
|
||||||
|
|
@ -62,16 +64,8 @@ public class ReadCovariatesUnitTest {
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = false)
|
||||||
public void testCovariateGeneration() {
|
public void testCovariateGeneration() {
|
||||||
final String RGID = "id";
|
|
||||||
final int length = 10;
|
|
||||||
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||||
GATKSAMRecord read = ReadUtils.createRandomRead(length, false);
|
final String RGID = "id";
|
||||||
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(RGID);
|
|
||||||
rg.setPlatform("illumina");
|
|
||||||
read.setReadGroup(rg);
|
|
||||||
final byte[] mQuals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
|
|
||||||
final byte[] iQuals = read.getBaseQualities(EventType.BASE_INSERTION);
|
|
||||||
final byte[] dQuals = read.getBaseQualities(EventType.BASE_DELETION);
|
|
||||||
|
|
||||||
ReadGroupCovariate rgCov = new ReadGroupCovariate();
|
ReadGroupCovariate rgCov = new ReadGroupCovariate();
|
||||||
QualityScoreCovariate qsCov = new QualityScoreCovariate();
|
QualityScoreCovariate qsCov = new QualityScoreCovariate();
|
||||||
|
|
@ -89,6 +83,21 @@ public class ReadCovariatesUnitTest {
|
||||||
requestedCovariates[2] = coCov;
|
requestedCovariates[2] = coCov;
|
||||||
requestedCovariates[3] = cyCov;
|
requestedCovariates[3] = cyCov;
|
||||||
|
|
||||||
|
final int NUM_READS = 100;
|
||||||
|
final Random rnd = new Random();
|
||||||
|
|
||||||
|
final String[] readGroups = {"RG1", "RG2", "RGbla"};
|
||||||
|
for (int idx = 0; idx < NUM_READS; idx++) {
|
||||||
|
for (final String rgs : readGroups) {
|
||||||
|
final int length = 10 + rnd.nextInt(100); // random read length, at least 10 bp long
|
||||||
|
final GATKSAMRecord read = ReadUtils.createRandomRead(length, false);
|
||||||
|
final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(rgs);
|
||||||
|
rg.setPlatform("illumina");
|
||||||
|
read.setReadGroup(rg);
|
||||||
|
read.setReadNegativeStrandFlag(rnd.nextBoolean());
|
||||||
|
final byte[] mQuals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
|
||||||
|
final byte[] iQuals = read.getBaseQualities(EventType.BASE_INSERTION);
|
||||||
|
final byte[] dQuals = read.getBaseQualities(EventType.BASE_DELETION);
|
||||||
ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);
|
ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);
|
||||||
|
|
||||||
// check that the length is correct
|
// check that the length is correct
|
||||||
|
|
@ -98,9 +107,9 @@ public class ReadCovariatesUnitTest {
|
||||||
|
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
// check that read group is always the same
|
// check that read group is always the same
|
||||||
Assert.assertEquals(rgCov.formatKey(rc.getMismatchesKeySet(i)[0]), RGID);
|
Assert.assertEquals(rgCov.formatKey(rc.getMismatchesKeySet(i)[0]), rgs);
|
||||||
Assert.assertEquals(rgCov.formatKey(rc.getInsertionsKeySet(i)[0]), RGID);
|
Assert.assertEquals(rgCov.formatKey(rc.getInsertionsKeySet(i)[0]), rgs);
|
||||||
Assert.assertEquals(rgCov.formatKey(rc.getDeletionsKeySet(i)[0]), RGID);
|
Assert.assertEquals(rgCov.formatKey(rc.getDeletionsKeySet(i)[0]), rgs);
|
||||||
|
|
||||||
// check quality score
|
// check quality score
|
||||||
Assert.assertEquals(qsCov.formatKey(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]);
|
Assert.assertEquals(qsCov.formatKey(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]);
|
||||||
|
|
@ -120,4 +129,8 @@ public class ReadCovariatesUnitTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.sting.utils.genotyper;
|
package org.broadinstitute.sting.utils.genotyper;
|
||||||
|
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils;
|
import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
@ -35,8 +36,13 @@ import org.broadinstitute.variant.variantcontext.Allele;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrapper class that holds a set of maps of the form (Read -> Map(Allele->Double))
|
||||||
|
* For each read, this holds underlying alleles represented by an aligned read, and corresponding relative likelihood.
|
||||||
|
*/
|
||||||
public class PerReadAlleleLikelihoodMap {
|
public class PerReadAlleleLikelihoodMap {
|
||||||
|
|
||||||
|
|
||||||
public static final double INFORMATIVE_LIKELIHOOD_THRESHOLD = 0.2;
|
public static final double INFORMATIVE_LIKELIHOOD_THRESHOLD = 0.2;
|
||||||
|
|
||||||
protected List<Allele> alleles;
|
protected List<Allele> alleles;
|
||||||
|
|
@ -47,6 +53,12 @@ public class PerReadAlleleLikelihoodMap {
|
||||||
alleles = new ArrayList<Allele>();
|
alleles = new ArrayList<Allele>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a read, allele and corresponding likelihood to map
|
||||||
|
* @param read SAM record to add
|
||||||
|
* @param a corresponding allele
|
||||||
|
* @param likelihood corresponding likelihood
|
||||||
|
*/
|
||||||
public void add(GATKSAMRecord read, Allele a, Double likelihood) {
|
public void add(GATKSAMRecord read, Allele a, Double likelihood) {
|
||||||
Map<Allele,Double> likelihoodMap;
|
Map<Allele,Double> likelihoodMap;
|
||||||
if (likelihoodReadMap.containsKey(read)){
|
if (likelihoodReadMap.containsKey(read)){
|
||||||
|
|
@ -97,15 +109,33 @@ public class PerReadAlleleLikelihoodMap {
|
||||||
likelihoodReadMap.remove(read);
|
likelihoodReadMap.remove(read);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Ensures("result >=0")
|
||||||
public int size() {
|
public int size() {
|
||||||
return likelihoodReadMap.size();
|
return likelihoodReadMap.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function to add the read underneath a pileup element to the map
|
||||||
|
* @param p Pileup element
|
||||||
|
* @param a Corresponding allele
|
||||||
|
* @param likelihood Allele likelihood
|
||||||
|
*/
|
||||||
public void add(PileupElement p, Allele a, Double likelihood) {
|
public void add(PileupElement p, Allele a, Double likelihood) {
|
||||||
|
if (p==null || p.getRead()==null || a == null )
|
||||||
|
throw new IllegalArgumentException("Invalid parameters passed to PerReadAlleleLikelihoodMap.add");
|
||||||
add(p.getRead(), a, likelihood);
|
add(p.getRead(), a, likelihood);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does the current map contain the key associated with a particular SAM record in pileup?
|
||||||
|
* @param p Pileup element
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
public boolean containsPileupElement(PileupElement p) {
|
public boolean containsPileupElement(PileupElement p) {
|
||||||
|
if (p==null )
|
||||||
|
throw new IllegalArgumentException("Invalid pileup element");
|
||||||
|
|
||||||
return likelihoodReadMap.containsKey(p.getRead());
|
return likelihoodReadMap.containsKey(p.getRead());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -140,11 +170,21 @@ public class PerReadAlleleLikelihoodMap {
|
||||||
return likelihoodReadMap.get(p.getRead());
|
return likelihoodReadMap.get(p.getRead());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For a given alleleMap, return most likely allele, i.e. the one with highest associated likelihood
|
||||||
|
* @param alleleMap Underlying allele map
|
||||||
|
* @return Most likely allele. If all alleles are equally likely, returns a no-call allele.
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
public static Allele getMostLikelyAllele( final Map<Allele,Double> alleleMap ) {
|
public static Allele getMostLikelyAllele( final Map<Allele,Double> alleleMap ) {
|
||||||
double maxLike = Double.NEGATIVE_INFINITY;
|
double maxLike = Double.NEGATIVE_INFINITY;
|
||||||
double prevMaxLike = Double.NEGATIVE_INFINITY;
|
double prevMaxLike = Double.NEGATIVE_INFINITY;
|
||||||
Allele mostLikelyAllele = Allele.NO_CALL;
|
Allele mostLikelyAllele = Allele.NO_CALL;
|
||||||
|
|
||||||
|
if (alleleMap==null)
|
||||||
|
throw new IllegalArgumentException("alleleMap in getMostLikelyAllele() method can't be null");
|
||||||
|
|
||||||
for (final Map.Entry<Allele,Double> el : alleleMap.entrySet()) {
|
for (final Map.Entry<Allele,Double> el : alleleMap.entrySet()) {
|
||||||
if (el.getValue() > maxLike) {
|
if (el.getValue() > maxLike) {
|
||||||
prevMaxLike = maxLike;
|
prevMaxLike = maxLike;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue