Bug fix for indel HMM: protect against situation where long reads (e.g. Sanger) in a pileup can lead to a read starting after the haplotype end for a given haplotype.

This commit is contained in:
Eric Banks 2012-12-09 02:09:34 -05:00
parent 406adb8d44
commit 574d5b467f
12 changed files with 113 additions and 51 deletions

View File

@ -240,8 +240,8 @@ public class AFCalcPerformanceTest {
if ( a.isNonReference() ) {
final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : "";
logger.info("\t\t MLE " + a + ": " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE);
final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost);
final String warningmePost = call.originalCall.getLog10PosteriorOfAFEq0ForAllele(a) == 0 && result.getLog10PosteriorOfAFEq0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFEq0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFEq0ForAllele(a) + warningmePost);
}
}
}

View File

@ -83,8 +83,8 @@ public class AFCalcResultUnitTest extends BaseTest {
List<Object[]> tests = new ArrayList<Object[]>();
final List<Double> pValues = new LinkedList<Double>();
for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999) )
for ( final double espilon : Arrays.asList(-1e-5, 0.0, 1e-5) )
for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999, 1 - 1e-4, 1 - 1e-5, 1 - 1e-6) )
for ( final double espilon : Arrays.asList(-1e-7, 0.0, 1e-7) )
pValues.add(p + espilon);
for ( final double pNonRef : pValues ) {
@ -106,7 +106,7 @@ public class AFCalcResultUnitTest extends BaseTest {
alleles,
MathUtils.normalizeFromLog10(new double[]{1 - pNonRef, pNonRef}, true, false),
log10Even,
Collections.singletonMap(C, Math.log10(pNonRef)));
Collections.singletonMap(C, Math.log10(1 - pNonRef)));
}
@Test(enabled = true, dataProvider = "TestIsPolymorphic")

View File

@ -681,7 +681,7 @@ public class AFCalcUnitTest extends BaseTest {
// must be getCalledChrCount because we cannot ensure that the VC made has our desired ACs
Assert.assertEquals(result.getAlleleCountAtMLE(alt), vc.getCalledChrCount(alt));
Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFGt0ForAllele(alt));
Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFEq0ForAllele(alt));
}
}
}

View File

@ -148,7 +148,7 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
for ( int i = 0; i < log10LAlleles.size(); i++ ) {
final double log10LAllele1 = log10LAlleles.get(i);
final double[] L1 = MathUtils.normalizeFromLog10(new double[]{log10LAllele1, 0.0}, true);
final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, 0.0));
final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, -10000.0));
originalPriors.add(result1);
pNonRefN.add(log10pNonRef*(i+1));
}

View File

@ -343,7 +343,7 @@ public class GATKReport {
GATKReportTable table = tables.firstEntry().getValue();
if ( table.getNumColumns() != values.length )
throw new ReviewedStingException("The number of arguments in writeRow() " + values.length + " must match the number of columns in the table" + table.getNumColumns());
throw new ReviewedStingException("The number of arguments in writeRow (" + values.length + ") must match the number of columns in the table (" + table.getNumColumns() + ")" );
final int rowIndex = table.getNumRows();
for ( int i = 0; i < values.length; i++ )

View File

@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -52,7 +51,7 @@ public class AFCalcResult {
private final double[] log10PriorsOfAC;
private final double[] log10PosteriorsOfAC;
private final Map<Allele, Double> log10pNonRefByAllele;
private final Map<Allele, Double> log10pRefByAllele;
/**
* The AC values for all ALT alleles at the MLE
@ -74,16 +73,16 @@ public class AFCalcResult {
final List<Allele> allelesUsedInGenotyping,
final double[] log10LikelihoodsOfAC,
final double[] log10PriorsOfAC,
final Map<Allele, Double> log10pNonRefByAllele) {
final Map<Allele, Double> log10pRefByAllele) {
if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.size() < 1 ) throw new IllegalArgumentException("allelesUsedInGenotyping must be non-null list of at least 1 value " + allelesUsedInGenotyping);
if ( alleleCountsOfMLE == null ) throw new IllegalArgumentException("alleleCountsOfMLE cannot be null");
if ( alleleCountsOfMLE.length != allelesUsedInGenotyping.size() - 1) throw new IllegalArgumentException("alleleCountsOfMLE.length " + alleleCountsOfMLE.length + " != allelesUsedInGenotyping.size() " + allelesUsedInGenotyping.size());
if ( nEvaluations < 0 ) throw new IllegalArgumentException("nEvaluations must be >= 0 but saw " + nEvaluations);
if ( log10LikelihoodsOfAC.length != 2 ) throw new IllegalArgumentException("log10LikelihoodsOfAC must have length equal 2");
if ( log10PriorsOfAC.length != 2 ) throw new IllegalArgumentException("log10PriorsOfAC must have length equal 2");
if ( log10pNonRefByAllele == null ) throw new IllegalArgumentException("log10pNonRefByAllele cannot be null");
if ( log10pNonRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pNonRefByAllele has the wrong number of elements: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
if ( ! allelesUsedInGenotyping.containsAll(log10pNonRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pNonRefByAllele doesn't contain all of the alleles used in genotyping: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
if ( log10pRefByAllele == null ) throw new IllegalArgumentException("log10pRefByAllele cannot be null");
if ( log10pRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pRefByAllele has the wrong number of elements: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
if ( ! allelesUsedInGenotyping.containsAll(log10pRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pRefByAllele doesn't contain all of the alleles used in genotyping: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
if ( ! MathUtils.goodLog10ProbVector(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES, false) ) throw new IllegalArgumentException("log10LikelihoodsOfAC are bad " + Utils.join(",", log10LikelihoodsOfAC));
if ( ! MathUtils.goodLog10ProbVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, true) ) throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC));
@ -94,7 +93,7 @@ public class AFCalcResult {
this.log10LikelihoodsOfAC = Arrays.copyOf(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES);
this.log10PriorsOfAC = Arrays.copyOf(log10PriorsOfAC, LOG_10_ARRAY_SIZES);
this.log10PosteriorsOfAC = computePosteriors(log10LikelihoodsOfAC, log10PriorsOfAC);
this.log10pNonRefByAllele = new HashMap<Allele, Double>(log10pNonRefByAllele);
this.log10pRefByAllele = new HashMap<Allele, Double>(log10pRefByAllele);
}
/**
@ -104,7 +103,7 @@ public class AFCalcResult {
* @return
*/
public AFCalcResult withNewPriors(final double[] log10PriorsOfAC) {
return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pNonRefByAllele);
return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
}
/**
@ -219,7 +218,7 @@ public class AFCalcResult {
public String toString() {
final List<String> byAllele = new LinkedList<String>();
for ( final Allele a : getAllelesUsedInGenotyping() )
if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFGt0ForAllele(a)));
if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFEq0ForAllele(a)));
return String.format("AFCalc%n\t\tlog10PosteriorOfAFGT0=%.2f%n\t\t%s", getLog10LikelihoodOfAFGT0(), Utils.join("\n\t\t", byAllele));
}
@ -237,7 +236,7 @@ public class AFCalcResult {
*/
@Requires("MathUtils.goodLog10Probability(log10minPNonRef)")
public boolean isPolymorphic(final Allele allele, final double log10minPNonRef) {
return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
return getLog10PosteriorOfAFEq0ForAllele(allele) < log10minPNonRef;
}
/**
@ -245,7 +244,7 @@ public class AFCalcResult {
*/
public boolean isPolymorphicPhredScaledQual(final Allele allele, final double minPNonRefPhredScaledQual) {
if ( minPNonRefPhredScaledQual < 0 ) throw new IllegalArgumentException("phredScaledQual " + minPNonRefPhredScaledQual + " < 0 ");
final double log10Threshold = Math.log10(QualityUtils.qualToProb(minPNonRefPhredScaledQual));
final double log10Threshold = minPNonRefPhredScaledQual / -10;
return isPolymorphic(allele, log10Threshold);
}
@ -263,7 +262,16 @@ public class AFCalcResult {
}
/**
* Returns the log10 probability that allele is segregating
* Returns the log10 probability that allele is not segregating
*
* Note that this function is p not segregating so that we can store
* internally the log10 value of AF == 0, which grows very quickly
* negative and yet has sufficient resolution for high confidence tests.
* For example, if log10pRef == -100, not an unreasonably high number,
* if we tried to store log10pNonRef we'd be looking at 1 - 10^-100, which
* quickly underflows to 1. So the logic here is backward from what
* you really want (the p of segregating) but we do that for numerical
* reasons
*
* Unlike the sites-level annotation, this calculation is specific to allele, and can be
* used to separately determine how much evidence there is that allele is independently
@ -272,11 +280,11 @@ public class AFCalcResult {
* evidence for one allele but not so much for any other allele
*
* @param allele the allele we're interested in, must be in getAllelesUsedInGenotyping
* @return the log10 probability that allele is segregating at this site
* @return the log10 probability that allele is not segregating at this site
*/
@Ensures("MathUtils.goodLog10Probability(result)")
public double getLog10PosteriorOfAFGt0ForAllele(final Allele allele) {
final Double log10pNonRef = log10pNonRefByAllele.get(allele);
public double getLog10PosteriorOfAFEq0ForAllele(final Allele allele) {
final Double log10pNonRef = log10pRefByAllele.get(allele);
if ( log10pNonRef == null ) throw new IllegalArgumentException("Unknown allele " + allele);
return log10pNonRef;
}

View File

@ -77,7 +77,7 @@ public class ExactCallLogger implements Cloneable {
for ( final Allele allele : result.getAllelesUsedInGenotyping() ) {
if ( allele.isNonReference() ) {
printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele));
printCallElement(vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele));
printCallElement(vc, "pRefByAllele", allele, result.getLog10PosteriorOfAFEq0ForAllele(allele));
}
}
@ -123,7 +123,7 @@ public class ExactCallLogger implements Cloneable {
final double[] posteriors = new double[2];
final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
final List<Integer> mle = new ArrayList<Integer>();
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>();
long runtimeNano = -1;
GenomeLoc currentLoc = null;
@ -148,7 +148,7 @@ public class ExactCallLogger implements Cloneable {
builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
builder.genotypes(genotypes);
final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{}));
final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pRefByAllele);
calls.add(new ExactCall(builder.make(), runtimeNano, result));
}
break;
@ -165,9 +165,9 @@ public class ExactCallLogger implements Cloneable {
posteriors[1] = Double.valueOf(value);
} else if (variable.equals("MLE")) {
mle.add(Integer.valueOf(value));
} else if (variable.equals("pNonRefByAllele")) {
} else if (variable.equals("pRefByAllele")) {
final Allele a = Allele.create(key);
log10pNonRefByAllele.put(a, Double.valueOf(value));
log10pRefByAllele.put(a, Double.valueOf(value));
} else if (variable.equals("runtime.nano")) {
runtimeNano = Long.valueOf(value);
} else {

View File

@ -125,8 +125,8 @@ import java.util.*;
*/
final List<AFCalcResult> supporting;
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pNonRefByAllele, List<AFCalcResult> supporting) {
super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pNonRefByAllele);
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pRefByAllele, List<AFCalcResult> supporting) {
super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
this.supporting = supporting;
}
}
@ -323,7 +323,7 @@ import java.util.*;
final int nAltAlleles = sortedResultsWithThetaNPriors.size();
final int[] alleleCountsOfMLE = new int[nAltAlleles];
final double[] log10PriorsOfAC = new double[2];
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
// the sum of the log10 posteriors for AF == 0 and AF > 0 to determine joint probs
double log10PosteriorOfACEq0Sum = 0.0;
@ -348,7 +348,7 @@ import java.util.*;
log10PosteriorOfACGt0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0();
// bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0());
log10pRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0());
// trivial -- update the number of evaluations
nEvaluations += sortedResultWithThetaNPriors.nEvaluations;
@ -384,6 +384,6 @@ import java.util.*;
MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true),
// priors incorporate multiple alt alleles, must be normalized
MathUtils.normalizeFromLog10(log10PriorsOfAC, true),
log10pNonRefByAllele, sortedResultsWithThetaNPriors);
log10pRefByAllele, sortedResultsWithThetaNPriors);
}
}

View File

@ -30,13 +30,13 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
final double[] log10Priors = new double[]{log10AlleleFrequencyPriors[0], MathUtils.log10sumLog10(log10AlleleFrequencyPriors, 1)};
final double[] log10Posteriors = MathUtils.vectorSum(log10Likelihoods, log10Priors);
final double log10PNonRef = log10Posteriors[1] > log10Posteriors[0] ? 0.0 : MathUtils.LOG10_P_OF_ZERO;
final Map<Allele, Double> log10pNonRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PNonRef);
final double log10PRef = log10Posteriors[1] > log10Posteriors[0] ? MathUtils.LOG10_P_OF_ZERO : 0.0;
final Map<Allele, Double> log10pRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PRef);
return new AFCalcResult(new int[]{mleK}, 0, vc.getAlleles(),
MathUtils.normalizeFromLog10(log10Likelihoods, true),
MathUtils.normalizeFromLog10(log10Priors, true),
log10pNonRefByAllele);
log10pRefByAllele);
}
/**

View File

@ -165,14 +165,14 @@ final class StateTracker {
final double[] log10Likelihoods = MathUtils.normalizeFromLog10(new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()}, true);
final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true);
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
for ( int i = 0; i < subACOfMLE.length; i++ ) {
final Allele allele = allelesUsedInGenotyping.get(i+1);
final double log10PNonRef = alleleCountsOfMAP[i] > 0 ? 0 : -10000; // TODO -- a total hack but in effect what the old behavior was
log10pNonRefByAllele.put(allele, log10PNonRef);
final double log10PRef = alleleCountsOfMAP[i] > 0 ? -10000 : 0; // TODO -- a total hack but in effect what the old behavior was
log10pRefByAllele.put(allele, log10PRef);
}
return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pNonRefByAllele);
return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pRefByAllele);
}
// --------------------------------------------------------------------------------

View File

@ -287,6 +287,9 @@ public class PairHMMIndelErrorModel {
if (startLocationInRefForHaplotypes < ref.getWindow().getStart()) {
startLocationInRefForHaplotypes = ref.getWindow().getStart(); // read starts before haplotype: read will have to be cut numStartSoftClippedBases += ref.getWindow().getStart() - startLocationInRefForHaplotypes;
}
else if (startLocationInRefForHaplotypes > ref.getWindow().getStop()) {
startLocationInRefForHaplotypes = ref.getWindow().getStop(); // read starts after haplotype: read will have to be clipped completely;
}
if (stopLocationInRefForHaplotypes > ref.getWindow().getStop()) {
stopLocationInRefForHaplotypes = ref.getWindow().getStop(); // check also if end of read will go beyond reference context
@ -338,6 +341,8 @@ public class PairHMMIndelErrorModel {
if (startLocationInRefForHaplotypes < haplotype.getStartPosition())
startLocationInRefForHaplotypes = haplotype.getStartPosition();
else if (startLocationInRefForHaplotypes > haplotype.getStopPosition())
startLocationInRefForHaplotypes = haplotype.getStopPosition();
final long indStart = startLocationInRefForHaplotypes - haplotype.getStartPosition();
final long indStop = stopLocationInRefForHaplotypes - haplotype.getStartPosition();
@ -347,8 +352,6 @@ public class PairHMMIndelErrorModel {
System.out.format("indStart: %d indStop: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d C:%s\n",
indStart, indStop, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength(), read.getCigar().toString());
final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
(int)indStart, (int)indStop);

View File

@ -109,12 +109,16 @@ public class TraverseActiveRegionsTest extends BaseTest {
dictionary = reference.getSequenceDictionary();
genomeLocParser = new GenomeLocParser(dictionary);
// TODO: test shard boundaries
intervals = new ArrayList<GenomeLoc>();
intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999));
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
intervals.add(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
intervals.add(genomeLocParser.createGenomeLoc("2", 1, 100));
intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100));
intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
@ -126,6 +130,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008));
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
reads.add(buildSAMRecord("end_of_chr1", "1", 249250600, 249250700));
reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
// required by LocusIteratorByState, and I prefer to list them in test case order above
@ -139,8 +144,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
List<GenomeLoc> activeIntervals = getIsActiveIntervals(walker, intervals);
// Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
verifyEqualIntervals(intervals, activeIntervals);
// TODO: more tests and edge cases
}
private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
@ -171,8 +174,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
Collection<ActiveRegion> activeRegions = getActiveRegions(walker, intervals).values();
verifyActiveRegionCoverage(intervals, activeRegions);
// TODO: more tests and edge cases
}
private void verifyActiveRegionCoverage(List<GenomeLoc> intervals, Collection<ActiveRegion> activeRegions) {
@ -231,6 +232,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// end_of_chr1: Primary in 1:249250600-249250621
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -245,6 +247,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
@ -256,6 +259,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
getRead(region, "boundary_unequal");
getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
@ -267,6 +271,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
getRead(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
@ -278,9 +295,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
getRead(region, "simple20");
// TODO: more tests and edge cases
}
@Test
@ -300,6 +316,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// end_of_chr1: Primary in 1:249250600-249250621
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -314,6 +331,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "boundary_unequal");
getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
@ -325,6 +343,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
getRead(region, "boundary_unequal");
getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
@ -336,6 +355,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
getRead(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
getRead(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
@ -347,9 +379,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
getRead(region, "simple20");
// TODO: more tests and edge cases
}
@Test
@ -370,6 +401,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// end_of_chr1: Primary in 1:249250600-249250621
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -384,6 +416,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "boundary_unequal");
getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
@ -395,6 +428,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
getRead(region, "boundary_unequal");
getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
@ -406,6 +440,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
getRead(region, "boundary_unequal");
getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
getRead(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
@ -417,9 +464,13 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
getRead(region, "simple20");
}
// TODO: more tests and edge cases
@Test
public void testUnmappedReads() {
// TODO
}
private void verifyReadNotPlaced(ActiveRegion region, String readName) {