Improvements to indel calling in pool caller: a) Compute per-read likelihoods in reference sample to determine wheter a read is informative or not. b) Fixed bugs in unit tests. c) Fixed padding-related bugs when computing matches/mismatches in ErrorModel, d) Added a couple of more integration tests to increase test coverage, including testing odd ploidy
This commit is contained in:
parent
1e8610b2c6
commit
2ae890155c
|
|
@ -1,6 +1,10 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||||
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
@ -9,6 +13,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -30,24 +35,26 @@ public class ErrorModel {
|
||||||
private static final boolean compressRange = false;
|
private static final boolean compressRange = false;
|
||||||
|
|
||||||
private static final double log10MinusE = Math.log10(Math.exp(1.0));
|
private static final double log10MinusE = Math.log10(Math.exp(1.0));
|
||||||
|
private static final boolean DEBUG = false;
|
||||||
/**
|
/**
|
||||||
* Calculates the probability of the data (reference sample reads) given the phred scaled site quality score.
|
* Calculates the probability of the data (reference sample reads) given the phred scaled site quality score.
|
||||||
*
|
*
|
||||||
* @param minQualityScore Minimum site quality score to evaluate
|
* @param UAC Argument Collection
|
||||||
* @param maxQualityScore Maximum site quality score to evaluate
|
|
||||||
* @param phredScaledPrior Prior for site quality
|
|
||||||
* @param refSamplePileup Reference sample pileup
|
* @param refSamplePileup Reference sample pileup
|
||||||
* @param refSampleVC VC with True alleles in reference sample pileup
|
* @param refSampleVC VC with True alleles in reference sample pileup
|
||||||
* @param minPower Minimum power
|
|
||||||
*/
|
*/
|
||||||
public ErrorModel (byte minQualityScore, byte maxQualityScore, byte phredScaledPrior,
|
public ErrorModel (final UnifiedArgumentCollection UAC,
|
||||||
ReadBackedPileup refSamplePileup, VariantContext refSampleVC, double minPower) {
|
final ReadBackedPileup refSamplePileup,
|
||||||
this.maxQualityScore = maxQualityScore;
|
VariantContext refSampleVC, final ReferenceContext refContext) {
|
||||||
this.minQualityScore = minQualityScore;
|
this.maxQualityScore = UAC.maxQualityScore;
|
||||||
this.phredScaledPrior = phredScaledPrior;
|
this.minQualityScore = UAC.minQualityScore;
|
||||||
log10minPower = Math.log10(minPower);
|
this.phredScaledPrior = UAC.phredScaledPrior;
|
||||||
|
log10minPower = Math.log10(UAC.minPower);
|
||||||
|
|
||||||
|
PairHMMIndelErrorModel pairModel = null;
|
||||||
|
LinkedHashMap<Allele, Haplotype> haplotypeMap = null;
|
||||||
|
HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = null;
|
||||||
|
double[][] perReadLikelihoods = null;
|
||||||
|
|
||||||
double[] model = new double[maxQualityScore+1];
|
double[] model = new double[maxQualityScore+1];
|
||||||
Arrays.fill(model,Double.NEGATIVE_INFINITY);
|
Arrays.fill(model,Double.NEGATIVE_INFINITY);
|
||||||
|
|
@ -61,11 +68,17 @@ public class ErrorModel {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (refSampleVC.isIndel()) {
|
||||||
|
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||||
|
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
|
||||||
|
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
|
||||||
|
indelLikelihoodMap = new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
|
||||||
|
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
|
||||||
if (refSamplePileup == null || refSampleVC == null || !hasCalledAlleles) {
|
if (refSamplePileup == null || refSampleVC == null || !hasCalledAlleles) {
|
||||||
double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
|
|
||||||
for (byte q=minQualityScore; q<=maxQualityScore; q++) {
|
for (byte q=minQualityScore; q<=maxQualityScore; q++) {
|
||||||
// maximum uncertainty if there's no ref data at site
|
// maximum uncertainty if there's no ref data at site
|
||||||
model[q] = p;
|
model[q] = p;
|
||||||
|
|
@ -75,23 +88,47 @@ public class ErrorModel {
|
||||||
else {
|
else {
|
||||||
hasData = true;
|
hasData = true;
|
||||||
int matches = 0;
|
int matches = 0;
|
||||||
int coverage = refSamplePileup.getNumberOfElements();
|
int coverage = 0;
|
||||||
|
|
||||||
Allele refAllele = refSampleVC.getReference();
|
Allele refAllele = refSampleVC.getReference();
|
||||||
|
|
||||||
|
if (refSampleVC.isIndel()) {
|
||||||
|
final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
|
||||||
|
//perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
|
||||||
|
final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
|
||||||
|
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
|
||||||
|
}
|
||||||
|
int idx = 0;
|
||||||
for (PileupElement refPileupElement : refSamplePileup) {
|
for (PileupElement refPileupElement : refSamplePileup) {
|
||||||
|
if (DEBUG)
|
||||||
|
System.out.println(refPileupElement.toString());
|
||||||
boolean isMatch = false;
|
boolean isMatch = false;
|
||||||
for (Allele allele : refSampleVC.getAlleles())
|
for (Allele allele : refSampleVC.getAlleles()) {
|
||||||
isMatch |= pileupElementMatches(refPileupElement, allele, refAllele);
|
boolean m = pileupElementMatches(refPileupElement, allele, refAllele, refContext.getBase());
|
||||||
|
if (DEBUG) System.out.println(m);
|
||||||
|
isMatch |= m;
|
||||||
|
}
|
||||||
|
if (refSampleVC.isIndel()) {
|
||||||
|
// ignore match/mismatch if reads, as determined by their likelihood, are not informative
|
||||||
|
double[] perAlleleLikelihoods = perReadLikelihoods[idx++];
|
||||||
|
if (!isInformativeElement(perAlleleLikelihoods))
|
||||||
|
matches++;
|
||||||
|
else
|
||||||
|
matches += (isMatch?1:0);
|
||||||
|
|
||||||
matches += (isMatch?1:0);
|
} else {
|
||||||
// System.out.format("MATCH:%b\n",isMatch);
|
matches += (isMatch?1:0);
|
||||||
|
}
|
||||||
|
coverage++;
|
||||||
}
|
}
|
||||||
|
|
||||||
int mismatches = coverage - matches;
|
int mismatches = coverage - matches;
|
||||||
//System.out.format("Cov:%d match:%d mismatch:%d\n",coverage, matches, mismatches);
|
//System.out.format("Cov:%d match:%d mismatch:%d\n",coverage, matches, mismatches);
|
||||||
for (byte q=minQualityScore; q<=maxQualityScore; q++) {
|
for (byte q=minQualityScore; q<=maxQualityScore; q++) {
|
||||||
model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches);
|
if (coverage==0)
|
||||||
|
model[q] = p;
|
||||||
|
else
|
||||||
|
model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches);
|
||||||
}
|
}
|
||||||
this.refDepth = coverage;
|
this.refDepth = coverage;
|
||||||
}
|
}
|
||||||
|
|
@ -101,6 +138,17 @@ public class ErrorModel {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Requires("likelihoods.length>0")
|
||||||
|
private boolean isInformativeElement(double[] likelihoods) {
|
||||||
|
// if likelihoods are the same, they're not informative
|
||||||
|
final double thresh = 0.1;
|
||||||
|
int maxIdx = MathUtils.maxElementIndex(likelihoods);
|
||||||
|
int minIdx = MathUtils.minElementIndex(likelihoods);
|
||||||
|
if (likelihoods[maxIdx]-likelihoods[minIdx]< thresh)
|
||||||
|
return false;
|
||||||
|
else
|
||||||
|
return true;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Simple constructor that just takes a given log-probability vector as error model.
|
* Simple constructor that just takes a given log-probability vector as error model.
|
||||||
* Only intended for unit testing, not general usage.
|
* Only intended for unit testing, not general usage.
|
||||||
|
|
@ -115,23 +163,27 @@ public class ErrorModel {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele) {
|
public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele, byte refBase) {
|
||||||
/* System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n",
|
if (DEBUG)
|
||||||
|
System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n",
|
||||||
pileupElement.getBase(), pileupElement.isBeforeDeletionStart(),
|
pileupElement.getBase(), pileupElement.isBeforeDeletionStart(),
|
||||||
pileupElement.isBeforeInsertion(),pileupElement.getEventBases(),pileupElement.getEventLength(), allele.toString(), refAllele.toString());
|
pileupElement.isBeforeInsertion(),pileupElement.getEventBases(),pileupElement.getEventLength(), allele.toString(), refAllele.toString());
|
||||||
*/
|
|
||||||
|
|
||||||
|
//pileupElement.
|
||||||
// if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch
|
// if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch
|
||||||
if (allele.isReference()) {
|
if (allele.isReference()) {
|
||||||
// for a ref allele, any base mismatch or new indel is a mismatch.
|
// for a ref allele, any base mismatch or new indel is a mismatch.
|
||||||
if(allele.getBases().length>0 && allele.getBases().length == refAllele.getBases().length ) // SNP/MNP case
|
if(allele.getBases().length>0 )
|
||||||
return (/*!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart() &&*/ pileupElement.getBase() == allele.getBases()[0]);
|
// todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location
|
||||||
|
// could clean up/simplify this when unpadding is removed
|
||||||
|
return (pileupElement.getBase() == refBase);
|
||||||
else
|
else
|
||||||
// either null allele to compare, or ref/alt lengths are different (indel by definition).
|
// either null allele to compare, or ref/alt lengths are different (indel by definition).
|
||||||
// if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch
|
// if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch
|
||||||
return (!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
|
return (!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// for non-ref alleles to compare:
|
||||||
if (refAllele.getBases().length == allele.getBases().length)
|
if (refAllele.getBases().length == allele.getBases().length)
|
||||||
// alleles have the same length (eg snp or mnp)
|
// alleles have the same length (eg snp or mnp)
|
||||||
return pileupElement.getBase() == allele.getBases()[0];
|
return pileupElement.getBase() == allele.getBases()[0];
|
||||||
|
|
|
||||||
|
|
@ -247,7 +247,8 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
||||||
|
|
||||||
// find the alternate allele(s) that we should be using
|
// find the alternate allele(s) that we should be using
|
||||||
final List<Allele> alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs);
|
final List<Allele> alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs);
|
||||||
|
if (alleles == null || alleles.isEmpty())
|
||||||
|
return null;
|
||||||
// start making the VariantContext
|
// start making the VariantContext
|
||||||
final GenomeLoc loc = ref.getLocus();
|
final GenomeLoc loc = ref.getLocus();
|
||||||
final int endLoc = getEndLocation(tracker, ref, alleles);
|
final int endLoc = getEndLocation(tracker, ref, alleles);
|
||||||
|
|
@ -313,7 +314,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
||||||
refLanePileup = refPileup.getPileupForLane(laneID);
|
refLanePileup = refPileup.getPileupForLane(laneID);
|
||||||
|
|
||||||
//ReferenceSample referenceSample = new ReferenceSample(UAC.referenceSampleName, refLanePileup, trueReferenceAlleles);
|
//ReferenceSample referenceSample = new ReferenceSample(UAC.referenceSampleName, refLanePileup, trueReferenceAlleles);
|
||||||
perLaneErrorModels.put(laneID, new ErrorModel(UAC.minQualityScore, UAC.maxQualityScore, UAC.phredScaledPrior, refLanePileup, refVC, UAC.minPower));
|
perLaneErrorModels.put(laneID, new ErrorModel(UAC, refLanePileup, refVC, ref));
|
||||||
}
|
}
|
||||||
return perLaneErrorModels;
|
return perLaneErrorModels;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,8 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
|
||||||
final int eventLength;
|
final int eventLength;
|
||||||
double[][] readHaplotypeLikelihoods;
|
double[][] readHaplotypeLikelihoods;
|
||||||
|
|
||||||
|
final byte refBase;
|
||||||
|
|
||||||
public PoolIndelGenotypeLikelihoods(final List<Allele> alleles,
|
public PoolIndelGenotypeLikelihoods(final List<Allele> alleles,
|
||||||
final double[] logLikelihoods,
|
final double[] logLikelihoods,
|
||||||
final int ploidy,
|
final int ploidy,
|
||||||
|
|
@ -38,6 +40,8 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
|
||||||
this.haplotypeMap = haplotypeMap;
|
this.haplotypeMap = haplotypeMap;
|
||||||
this.refContext = referenceContext;
|
this.refContext = referenceContext;
|
||||||
this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles);
|
this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles);
|
||||||
|
// todo - not needed if indel alleles have base at current position
|
||||||
|
this.refBase = referenceContext.getBase();
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
|
|
@ -141,7 +145,7 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
|
||||||
final int numHaplotypes = haplotypeMap.size();
|
final int numHaplotypes = haplotypeMap.size();
|
||||||
|
|
||||||
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
||||||
readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, PoolIndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts);
|
readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts);
|
||||||
n = readHaplotypeLikelihoods.length;
|
n = readHaplotypeLikelihoods.length;
|
||||||
} else {
|
} else {
|
||||||
Allele refAllele = null;
|
Allele refAllele = null;
|
||||||
|
|
@ -161,7 +165,7 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
|
||||||
int idx =0;
|
int idx =0;
|
||||||
for (Allele allele : alleles) {
|
for (Allele allele : alleles) {
|
||||||
int cnt = numSeenBases.get(idx);
|
int cnt = numSeenBases.get(idx);
|
||||||
numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele)?1:0));
|
numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele, refBase)?1:0));
|
||||||
}
|
}
|
||||||
|
|
||||||
n++;
|
n++;
|
||||||
|
|
|
||||||
|
|
@ -70,12 +70,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static HashMap<PileupElement, LinkedHashMap<Allele, Double>> getIndelLikelihoodMap() {
|
|
||||||
return IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
||||||
final double[] logLikelihoods,
|
final double[] logLikelihoods,
|
||||||
final int ploidy,
|
final int ploidy,
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,13 @@ public class PoolCallerIntegrationTest extends WalkerTest {
|
||||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
|
||||||
|
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s -glm %s -ignoreLane -pnrm POOL",
|
||||||
|
REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
|
||||||
|
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||||
|
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBOTH_GGA_Pools() {
|
public void testBOTH_GGA_Pools() {
|
||||||
PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4");
|
PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4");
|
||||||
|
|
@ -44,7 +51,17 @@ public class PoolCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testINDEL_GGA_Pools() {
|
public void testINDEL_GGA_Pools() {
|
||||||
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLINDEL","d1339990291648495bfcf4404f051478");
|
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","POOLINDEL","d1339990291648495bfcf4404f051478");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() {
|
||||||
|
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","POOLINDEL","b66e7150603310fd57ee7bf9fc590706");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() {
|
||||||
|
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","POOLINDEL","ccdae3fc4d2c922f956a186aaad51c29");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
import net.sf.samtools.SAMUtils;
|
import net.sf.samtools.SAMUtils;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
@ -49,6 +50,13 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
||||||
private static final boolean SIMULATE_NOISY_PILEUP = false;
|
private static final boolean SIMULATE_NOISY_PILEUP = false;
|
||||||
private static final int NUM_SIMULATED_OBS = 10;
|
private static final int NUM_SIMULATED_OBS = 10;
|
||||||
|
|
||||||
|
void PoolGenotypeLikelihoodsUnitTest() {
|
||||||
|
UAC.minQualityScore = 5;
|
||||||
|
UAC.maxQualityScore = 40;
|
||||||
|
UAC.phredScaledPrior = (byte)20;
|
||||||
|
UAC.minPower = 0.0;
|
||||||
|
|
||||||
|
}
|
||||||
@Test
|
@Test
|
||||||
public void testStoringLikelihoodElements() {
|
public void testStoringLikelihoodElements() {
|
||||||
|
|
||||||
|
|
@ -251,8 +259,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
||||||
@Test
|
@Test
|
||||||
public void testErrorModel() {
|
public void testErrorModel() {
|
||||||
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
|
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
|
||||||
final byte minQ = 5;
|
|
||||||
final byte maxQ = 40;
|
|
||||||
final byte refByte = refPileupTestProvider.getRefByte();
|
final byte refByte = refPileupTestProvider.getRefByte();
|
||||||
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
|
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
|
||||||
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
|
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
|
||||||
|
|
@ -270,7 +276,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
||||||
// get artificial alignment context for ref sample - no noise
|
// get artificial alignment context for ref sample - no noise
|
||||||
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, new String(new byte[]{altByte}), new int[]{matches, mismatches}, false, 30);
|
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, new String(new byte[]{altByte}), new int[]{matches, mismatches}, false, 30);
|
||||||
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
|
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
|
||||||
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refVC, 0.0);
|
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refVC, refPileupTestProvider.getReferenceContext());
|
||||||
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
|
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
|
||||||
|
|
||||||
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
|
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
|
||||||
|
|
@ -287,8 +293,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
||||||
@Test
|
@Test
|
||||||
public void testIndelErrorModel() {
|
public void testIndelErrorModel() {
|
||||||
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
|
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
|
||||||
final byte minQ = 5;
|
|
||||||
final byte maxQ = 40;
|
|
||||||
final byte refByte = refPileupTestProvider.getRefByte();
|
final byte refByte = refPileupTestProvider.getRefByte();
|
||||||
final String altBases = "TCA";
|
final String altBases = "TCA";
|
||||||
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
|
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
|
||||||
|
|
@ -313,7 +317,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
||||||
// Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches
|
// Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches
|
||||||
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
|
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
|
||||||
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
|
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
|
||||||
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refInsertionVC, 0.0);
|
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refInsertionVC, refPileupTestProvider.getReferenceContext());
|
||||||
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
|
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
|
||||||
|
|
||||||
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
|
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
|
||||||
|
|
@ -343,7 +347,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
||||||
// Ref sample has 4bp deletion but pileup will have 3 bp deletion instead to test mismatches
|
// Ref sample has 4bp deletion but pileup will have 3 bp deletion instead to test mismatches
|
||||||
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(-delLength+1, altBases, new int[]{matches, mismatches}, false, 30);
|
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(-delLength+1, altBases, new int[]{matches, mismatches}, false, 30);
|
||||||
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
|
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
|
||||||
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refDeletionVC, 0.0);
|
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refDeletionVC, refPileupTestProvider.getReferenceContext());
|
||||||
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
|
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
|
||||||
|
|
||||||
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
|
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.utils.Haplotype;
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
@ -175,6 +176,7 @@ public class PairHMMIndelErrorModel {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Ensures("result != null && result.length == pileup.getNumberOfElements()")
|
||||||
public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final ReadBackedPileup pileup,
|
public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final ReadBackedPileup pileup,
|
||||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
|
|
|
||||||
|
|
@ -90,7 +90,7 @@ public class ArtificialReadPileupTestProvider {
|
||||||
return sampleNames;
|
return sampleNames;
|
||||||
}
|
}
|
||||||
public byte getRefByte() {
|
public byte getRefByte() {
|
||||||
return refBases.substring(offset,offset+1).getBytes()[0];
|
return referenceContext.getBase();
|
||||||
}
|
}
|
||||||
|
|
||||||
public ReferenceContext getReferenceContext() { return referenceContext;}
|
public ReferenceContext getReferenceContext() { return referenceContext;}
|
||||||
|
|
@ -107,7 +107,7 @@ public class ArtificialReadPileupTestProvider {
|
||||||
ArrayList<Allele> vcAlleles = new ArrayList<Allele>();
|
ArrayList<Allele> vcAlleles = new ArrayList<Allele>();
|
||||||
Allele refAllele, altAllele;
|
Allele refAllele, altAllele;
|
||||||
if (eventLength == 0) {// SNP case
|
if (eventLength == 0) {// SNP case
|
||||||
refAllele =Allele.create(refBases.substring(offset,offset+1),true);
|
refAllele =Allele.create(referenceContext.getBase(),true);
|
||||||
altAllele = Allele.create(altBases.substring(0,1), false);
|
altAllele = Allele.create(altBases.substring(0,1), false);
|
||||||
|
|
||||||
} else if (eventLength>0){
|
} else if (eventLength>0){
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue