Bug fix for indel HMM: protect against situation where long reads (e.g. Sanger) in a pileup can lead to a read starting after the haplotype end for a given haplotype.
This commit is contained in:
parent
406adb8d44
commit
574d5b467f
|
|
@ -240,8 +240,8 @@ public class AFCalcPerformanceTest {
|
|||
if ( a.isNonReference() ) {
|
||||
final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : "";
|
||||
logger.info("\t\t MLE " + a + ": " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE);
|
||||
final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
|
||||
logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost);
|
||||
final String warningmePost = call.originalCall.getLog10PosteriorOfAFEq0ForAllele(a) == 0 && result.getLog10PosteriorOfAFEq0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
|
||||
logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFEq0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFEq0ForAllele(a) + warningmePost);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,8 +83,8 @@ public class AFCalcResultUnitTest extends BaseTest {
|
|||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final List<Double> pValues = new LinkedList<Double>();
|
||||
for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999) )
|
||||
for ( final double espilon : Arrays.asList(-1e-5, 0.0, 1e-5) )
|
||||
for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999, 1 - 1e-4, 1 - 1e-5, 1 - 1e-6) )
|
||||
for ( final double espilon : Arrays.asList(-1e-7, 0.0, 1e-7) )
|
||||
pValues.add(p + espilon);
|
||||
|
||||
for ( final double pNonRef : pValues ) {
|
||||
|
|
@ -106,7 +106,7 @@ public class AFCalcResultUnitTest extends BaseTest {
|
|||
alleles,
|
||||
MathUtils.normalizeFromLog10(new double[]{1 - pNonRef, pNonRef}, true, false),
|
||||
log10Even,
|
||||
Collections.singletonMap(C, Math.log10(pNonRef)));
|
||||
Collections.singletonMap(C, Math.log10(1 - pNonRef)));
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "TestIsPolymorphic")
|
||||
|
|
|
|||
|
|
@ -681,7 +681,7 @@ public class AFCalcUnitTest extends BaseTest {
|
|||
|
||||
// must be getCalledChrCount because we cannot ensure that the VC made has our desired ACs
|
||||
Assert.assertEquals(result.getAlleleCountAtMLE(alt), vc.getCalledChrCount(alt));
|
||||
Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFGt0ForAllele(alt));
|
||||
Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFEq0ForAllele(alt));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -148,7 +148,7 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
|
|||
for ( int i = 0; i < log10LAlleles.size(); i++ ) {
|
||||
final double log10LAllele1 = log10LAlleles.get(i);
|
||||
final double[] L1 = MathUtils.normalizeFromLog10(new double[]{log10LAllele1, 0.0}, true);
|
||||
final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, 0.0));
|
||||
final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, -10000.0));
|
||||
originalPriors.add(result1);
|
||||
pNonRefN.add(log10pNonRef*(i+1));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -343,7 +343,7 @@ public class GATKReport {
|
|||
|
||||
GATKReportTable table = tables.firstEntry().getValue();
|
||||
if ( table.getNumColumns() != values.length )
|
||||
throw new ReviewedStingException("The number of arguments in writeRow() " + values.length + " must match the number of columns in the table" + table.getNumColumns());
|
||||
throw new ReviewedStingException("The number of arguments in writeRow (" + values.length + ") must match the number of columns in the table (" + table.getNumColumns() + ")" );
|
||||
|
||||
final int rowIndex = table.getNumRows();
|
||||
for ( int i = 0; i < values.length; i++ )
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
|
|
@ -52,7 +51,7 @@ public class AFCalcResult {
|
|||
private final double[] log10PriorsOfAC;
|
||||
private final double[] log10PosteriorsOfAC;
|
||||
|
||||
private final Map<Allele, Double> log10pNonRefByAllele;
|
||||
private final Map<Allele, Double> log10pRefByAllele;
|
||||
|
||||
/**
|
||||
* The AC values for all ALT alleles at the MLE
|
||||
|
|
@ -74,16 +73,16 @@ public class AFCalcResult {
|
|||
final List<Allele> allelesUsedInGenotyping,
|
||||
final double[] log10LikelihoodsOfAC,
|
||||
final double[] log10PriorsOfAC,
|
||||
final Map<Allele, Double> log10pNonRefByAllele) {
|
||||
final Map<Allele, Double> log10pRefByAllele) {
|
||||
if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.size() < 1 ) throw new IllegalArgumentException("allelesUsedInGenotyping must be non-null list of at least 1 value " + allelesUsedInGenotyping);
|
||||
if ( alleleCountsOfMLE == null ) throw new IllegalArgumentException("alleleCountsOfMLE cannot be null");
|
||||
if ( alleleCountsOfMLE.length != allelesUsedInGenotyping.size() - 1) throw new IllegalArgumentException("alleleCountsOfMLE.length " + alleleCountsOfMLE.length + " != allelesUsedInGenotyping.size() " + allelesUsedInGenotyping.size());
|
||||
if ( nEvaluations < 0 ) throw new IllegalArgumentException("nEvaluations must be >= 0 but saw " + nEvaluations);
|
||||
if ( log10LikelihoodsOfAC.length != 2 ) throw new IllegalArgumentException("log10LikelihoodsOfAC must have length equal 2");
|
||||
if ( log10PriorsOfAC.length != 2 ) throw new IllegalArgumentException("log10PriorsOfAC must have length equal 2");
|
||||
if ( log10pNonRefByAllele == null ) throw new IllegalArgumentException("log10pNonRefByAllele cannot be null");
|
||||
if ( log10pNonRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pNonRefByAllele has the wrong number of elements: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
|
||||
if ( ! allelesUsedInGenotyping.containsAll(log10pNonRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pNonRefByAllele doesn't contain all of the alleles used in genotyping: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
|
||||
if ( log10pRefByAllele == null ) throw new IllegalArgumentException("log10pRefByAllele cannot be null");
|
||||
if ( log10pRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pRefByAllele has the wrong number of elements: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
|
||||
if ( ! allelesUsedInGenotyping.containsAll(log10pRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pRefByAllele doesn't contain all of the alleles used in genotyping: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
|
||||
if ( ! MathUtils.goodLog10ProbVector(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES, false) ) throw new IllegalArgumentException("log10LikelihoodsOfAC are bad " + Utils.join(",", log10LikelihoodsOfAC));
|
||||
if ( ! MathUtils.goodLog10ProbVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, true) ) throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC));
|
||||
|
||||
|
|
@ -94,7 +93,7 @@ public class AFCalcResult {
|
|||
this.log10LikelihoodsOfAC = Arrays.copyOf(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES);
|
||||
this.log10PriorsOfAC = Arrays.copyOf(log10PriorsOfAC, LOG_10_ARRAY_SIZES);
|
||||
this.log10PosteriorsOfAC = computePosteriors(log10LikelihoodsOfAC, log10PriorsOfAC);
|
||||
this.log10pNonRefByAllele = new HashMap<Allele, Double>(log10pNonRefByAllele);
|
||||
this.log10pRefByAllele = new HashMap<Allele, Double>(log10pRefByAllele);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -104,7 +103,7 @@ public class AFCalcResult {
|
|||
* @return
|
||||
*/
|
||||
public AFCalcResult withNewPriors(final double[] log10PriorsOfAC) {
|
||||
return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pNonRefByAllele);
|
||||
return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -219,7 +218,7 @@ public class AFCalcResult {
|
|||
public String toString() {
|
||||
final List<String> byAllele = new LinkedList<String>();
|
||||
for ( final Allele a : getAllelesUsedInGenotyping() )
|
||||
if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFGt0ForAllele(a)));
|
||||
if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFEq0ForAllele(a)));
|
||||
return String.format("AFCalc%n\t\tlog10PosteriorOfAFGT0=%.2f%n\t\t%s", getLog10LikelihoodOfAFGT0(), Utils.join("\n\t\t", byAllele));
|
||||
}
|
||||
|
||||
|
|
@ -237,7 +236,7 @@ public class AFCalcResult {
|
|||
*/
|
||||
@Requires("MathUtils.goodLog10Probability(log10minPNonRef)")
|
||||
public boolean isPolymorphic(final Allele allele, final double log10minPNonRef) {
|
||||
return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
|
||||
return getLog10PosteriorOfAFEq0ForAllele(allele) < log10minPNonRef;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -245,7 +244,7 @@ public class AFCalcResult {
|
|||
*/
|
||||
public boolean isPolymorphicPhredScaledQual(final Allele allele, final double minPNonRefPhredScaledQual) {
|
||||
if ( minPNonRefPhredScaledQual < 0 ) throw new IllegalArgumentException("phredScaledQual " + minPNonRefPhredScaledQual + " < 0 ");
|
||||
final double log10Threshold = Math.log10(QualityUtils.qualToProb(minPNonRefPhredScaledQual));
|
||||
final double log10Threshold = minPNonRefPhredScaledQual / -10;
|
||||
return isPolymorphic(allele, log10Threshold);
|
||||
}
|
||||
|
||||
|
|
@ -263,7 +262,16 @@ public class AFCalcResult {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the log10 probability that allele is segregating
|
||||
* Returns the log10 probability that allele is not segregating
|
||||
*
|
||||
* Note that this function is p not segregating so that we can store
|
||||
* internally the log10 value of AF == 0, which grows very quickly
|
||||
* negative and yet has sufficient resolution for high confidence tests.
|
||||
* For example, if log10pRef == -100, not an unreasonably high number,
|
||||
* if we tried to store log10pNonRef we'd be looking at 1 - 10^-100, which
|
||||
* quickly underflows to 1. So the logic here is backward from what
|
||||
* you really want (the p of segregating) but we do that for numerical
|
||||
* reasons
|
||||
*
|
||||
* Unlike the sites-level annotation, this calculation is specific to allele, and can be
|
||||
* used to separately determine how much evidence there is that allele is independently
|
||||
|
|
@ -272,11 +280,11 @@ public class AFCalcResult {
|
|||
* evidence for one allele but not so much for any other allele
|
||||
*
|
||||
* @param allele the allele we're interested in, must be in getAllelesUsedInGenotyping
|
||||
* @return the log10 probability that allele is segregating at this site
|
||||
* @return the log10 probability that allele is not segregating at this site
|
||||
*/
|
||||
@Ensures("MathUtils.goodLog10Probability(result)")
|
||||
public double getLog10PosteriorOfAFGt0ForAllele(final Allele allele) {
|
||||
final Double log10pNonRef = log10pNonRefByAllele.get(allele);
|
||||
public double getLog10PosteriorOfAFEq0ForAllele(final Allele allele) {
|
||||
final Double log10pNonRef = log10pRefByAllele.get(allele);
|
||||
if ( log10pNonRef == null ) throw new IllegalArgumentException("Unknown allele " + allele);
|
||||
return log10pNonRef;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ public class ExactCallLogger implements Cloneable {
|
|||
for ( final Allele allele : result.getAllelesUsedInGenotyping() ) {
|
||||
if ( allele.isNonReference() ) {
|
||||
printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele));
|
||||
printCallElement(vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele));
|
||||
printCallElement(vc, "pRefByAllele", allele, result.getLog10PosteriorOfAFEq0ForAllele(allele));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -123,7 +123,7 @@ public class ExactCallLogger implements Cloneable {
|
|||
final double[] posteriors = new double[2];
|
||||
final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
|
||||
final List<Integer> mle = new ArrayList<Integer>();
|
||||
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
|
||||
final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>();
|
||||
long runtimeNano = -1;
|
||||
|
||||
GenomeLoc currentLoc = null;
|
||||
|
|
@ -148,7 +148,7 @@ public class ExactCallLogger implements Cloneable {
|
|||
builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
|
||||
builder.genotypes(genotypes);
|
||||
final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{}));
|
||||
final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
|
||||
final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pRefByAllele);
|
||||
calls.add(new ExactCall(builder.make(), runtimeNano, result));
|
||||
}
|
||||
break;
|
||||
|
|
@ -165,9 +165,9 @@ public class ExactCallLogger implements Cloneable {
|
|||
posteriors[1] = Double.valueOf(value);
|
||||
} else if (variable.equals("MLE")) {
|
||||
mle.add(Integer.valueOf(value));
|
||||
} else if (variable.equals("pNonRefByAllele")) {
|
||||
} else if (variable.equals("pRefByAllele")) {
|
||||
final Allele a = Allele.create(key);
|
||||
log10pNonRefByAllele.put(a, Double.valueOf(value));
|
||||
log10pRefByAllele.put(a, Double.valueOf(value));
|
||||
} else if (variable.equals("runtime.nano")) {
|
||||
runtimeNano = Long.valueOf(value);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -125,8 +125,8 @@ import java.util.*;
|
|||
*/
|
||||
final List<AFCalcResult> supporting;
|
||||
|
||||
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pNonRefByAllele, List<AFCalcResult> supporting) {
|
||||
super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pNonRefByAllele);
|
||||
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pRefByAllele, List<AFCalcResult> supporting) {
|
||||
super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
|
||||
this.supporting = supporting;
|
||||
}
|
||||
}
|
||||
|
|
@ -323,7 +323,7 @@ import java.util.*;
|
|||
final int nAltAlleles = sortedResultsWithThetaNPriors.size();
|
||||
final int[] alleleCountsOfMLE = new int[nAltAlleles];
|
||||
final double[] log10PriorsOfAC = new double[2];
|
||||
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
|
||||
final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
|
||||
|
||||
// the sum of the log10 posteriors for AF == 0 and AF > 0 to determine joint probs
|
||||
double log10PosteriorOfACEq0Sum = 0.0;
|
||||
|
|
@ -348,7 +348,7 @@ import java.util.*;
|
|||
log10PosteriorOfACGt0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0();
|
||||
|
||||
// bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
|
||||
log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0());
|
||||
log10pRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0());
|
||||
|
||||
// trivial -- update the number of evaluations
|
||||
nEvaluations += sortedResultWithThetaNPriors.nEvaluations;
|
||||
|
|
@ -384,6 +384,6 @@ import java.util.*;
|
|||
MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true),
|
||||
// priors incorporate multiple alt alleles, must be normalized
|
||||
MathUtils.normalizeFromLog10(log10PriorsOfAC, true),
|
||||
log10pNonRefByAllele, sortedResultsWithThetaNPriors);
|
||||
log10pRefByAllele, sortedResultsWithThetaNPriors);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,13 +30,13 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
|
|||
final double[] log10Priors = new double[]{log10AlleleFrequencyPriors[0], MathUtils.log10sumLog10(log10AlleleFrequencyPriors, 1)};
|
||||
final double[] log10Posteriors = MathUtils.vectorSum(log10Likelihoods, log10Priors);
|
||||
|
||||
final double log10PNonRef = log10Posteriors[1] > log10Posteriors[0] ? 0.0 : MathUtils.LOG10_P_OF_ZERO;
|
||||
final Map<Allele, Double> log10pNonRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PNonRef);
|
||||
final double log10PRef = log10Posteriors[1] > log10Posteriors[0] ? MathUtils.LOG10_P_OF_ZERO : 0.0;
|
||||
final Map<Allele, Double> log10pRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PRef);
|
||||
|
||||
return new AFCalcResult(new int[]{mleK}, 0, vc.getAlleles(),
|
||||
MathUtils.normalizeFromLog10(log10Likelihoods, true),
|
||||
MathUtils.normalizeFromLog10(log10Priors, true),
|
||||
log10pNonRefByAllele);
|
||||
log10pRefByAllele);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -165,14 +165,14 @@ final class StateTracker {
|
|||
final double[] log10Likelihoods = MathUtils.normalizeFromLog10(new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()}, true);
|
||||
final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true);
|
||||
|
||||
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
|
||||
final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
|
||||
for ( int i = 0; i < subACOfMLE.length; i++ ) {
|
||||
final Allele allele = allelesUsedInGenotyping.get(i+1);
|
||||
final double log10PNonRef = alleleCountsOfMAP[i] > 0 ? 0 : -10000; // TODO -- a total hack but in effect what the old behavior was
|
||||
log10pNonRefByAllele.put(allele, log10PNonRef);
|
||||
final double log10PRef = alleleCountsOfMAP[i] > 0 ? -10000 : 0; // TODO -- a total hack but in effect what the old behavior was
|
||||
log10pRefByAllele.put(allele, log10PRef);
|
||||
}
|
||||
|
||||
return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pNonRefByAllele);
|
||||
return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pRefByAllele);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -287,6 +287,9 @@ public class PairHMMIndelErrorModel {
|
|||
if (startLocationInRefForHaplotypes < ref.getWindow().getStart()) {
|
||||
startLocationInRefForHaplotypes = ref.getWindow().getStart(); // read starts before haplotype: read will have to be cut numStartSoftClippedBases += ref.getWindow().getStart() - startLocationInRefForHaplotypes;
|
||||
}
|
||||
else if (startLocationInRefForHaplotypes > ref.getWindow().getStop()) {
|
||||
startLocationInRefForHaplotypes = ref.getWindow().getStop(); // read starts after haplotype: read will have to be clipped completely;
|
||||
}
|
||||
|
||||
if (stopLocationInRefForHaplotypes > ref.getWindow().getStop()) {
|
||||
stopLocationInRefForHaplotypes = ref.getWindow().getStop(); // check also if end of read will go beyond reference context
|
||||
|
|
@ -338,6 +341,8 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
if (startLocationInRefForHaplotypes < haplotype.getStartPosition())
|
||||
startLocationInRefForHaplotypes = haplotype.getStartPosition();
|
||||
else if (startLocationInRefForHaplotypes > haplotype.getStopPosition())
|
||||
startLocationInRefForHaplotypes = haplotype.getStopPosition();
|
||||
|
||||
final long indStart = startLocationInRefForHaplotypes - haplotype.getStartPosition();
|
||||
final long indStop = stopLocationInRefForHaplotypes - haplotype.getStartPosition();
|
||||
|
|
@ -347,8 +352,6 @@ public class PairHMMIndelErrorModel {
|
|||
System.out.format("indStart: %d indStop: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d C:%s\n",
|
||||
indStart, indStop, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength(), read.getCigar().toString());
|
||||
|
||||
|
||||
|
||||
final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
|
||||
(int)indStart, (int)indStop);
|
||||
|
||||
|
|
|
|||
|
|
@ -109,12 +109,16 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
dictionary = reference.getSequenceDictionary();
|
||||
genomeLocParser = new GenomeLocParser(dictionary);
|
||||
|
||||
// TODO: test shard boundaries
|
||||
|
||||
intervals = new ArrayList<GenomeLoc>();
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("2", 1, 100));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
|
||||
|
||||
|
|
@ -126,6 +130,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008));
|
||||
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
|
||||
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
|
||||
reads.add(buildSAMRecord("end_of_chr1", "1", 249250600, 249250700));
|
||||
reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
|
||||
|
||||
// required by LocusIteratorByState, and I prefer to list them in test case order above
|
||||
|
|
@ -139,8 +144,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
List<GenomeLoc> activeIntervals = getIsActiveIntervals(walker, intervals);
|
||||
// Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
|
||||
verifyEqualIntervals(intervals, activeIntervals);
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
|
|
@ -171,8 +174,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
|
||||
Collection<ActiveRegion> activeRegions = getActiveRegions(walker, intervals).values();
|
||||
verifyActiveRegionCoverage(intervals, activeRegions);
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
private void verifyActiveRegionCoverage(List<GenomeLoc> intervals, Collection<ActiveRegion> activeRegions) {
|
||||
|
|
@ -231,6 +232,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// end_of_chr1: Primary in 1:249250600-249250621
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -245,6 +247,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||
|
|
@ -256,6 +259,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
getRead(region, "boundary_unequal");
|
||||
getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
|
|
@ -267,6 +271,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
|
||||
|
||||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
verifyReadNotPlaced(region, "boundary_equal");
|
||||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
getRead(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
|
|
@ -278,9 +295,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
getRead(region, "simple20");
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -300,6 +316,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// end_of_chr1: Primary in 1:249250600-249250621
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -314,6 +331,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||
|
|
@ -325,6 +343,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
getRead(region, "boundary_unequal");
|
||||
getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
|
|
@ -336,6 +355,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
getRead(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
|
||||
|
||||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
verifyReadNotPlaced(region, "boundary_equal");
|
||||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
getRead(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
|
|
@ -347,9 +379,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
getRead(region, "simple20");
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -370,6 +401,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// end_of_chr1: Primary in 1:249250600-249250621
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -384,6 +416,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||
|
|
@ -395,6 +428,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
getRead(region, "boundary_unequal");
|
||||
getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
|
|
@ -406,6 +440,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
getRead(region, "boundary_unequal");
|
||||
getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
|
||||
|
||||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
verifyReadNotPlaced(region, "boundary_equal");
|
||||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
getRead(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
|
|
@ -417,9 +464,13 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
getRead(region, "simple20");
|
||||
}
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
@Test
|
||||
public void testUnmappedReads() {
|
||||
// TODO
|
||||
}
|
||||
|
||||
private void verifyReadNotPlaced(ActiveRegion region, String readName) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue