diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 24b3309f1..d194e2620 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -45,6 +45,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.codecs.vcf.*; @@ -295,9 +296,15 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Override public boolean includeReadsWithDeletionAtLoci() { return true; } - // enable non primary reads in the active region + // enable non primary and extended reads in the active region @Override - public boolean wantsNonPrimaryReads() { return true; } + public EnumSet desiredReadStates() { + return EnumSet.of( + ActiveRegionReadState.PRIMARY, + ActiveRegionReadState.NONPRIMARY, + ActiveRegionReadState.EXTENDED + ); + } @Override @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"}) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index d3e77e002..9d12b0ded 100755 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("704888987baacff8c7b273b8ab9938d0")); + Arrays.asList("d20c7a143b899f0239bf64b652ad3edb")); executeTest("test Multiple SNP alleles", spec); } @@ -197,7 +197,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameterAllSites() { - testOutputParameters("--output_mode EMIT_ALL_SITES", "81fff490c0f59890f1e75dc290833434"); + testOutputParameters("--output_mode EMIT_ALL_SITES", "8b26088a035e579c4afd3b46737291e4"); } private void testOutputParameters(final String args, final String md5) { @@ -345,7 +345,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("a4761d7f25e7a62f34494801c98a0da7")); + Arrays.asList("69df7a00f800204564ca3726e1871132")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java index cbe2eb268..96e055e92 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java @@ -2,16 +2,14 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; +import java.util.*; public class AFCalcResultUnitTest extends BaseTest { private static class MyTest { @@ -79,4 +77,54 @@ public class AFCalcResultUnitTest extends BaseTest { final double[] actualPosteriors = new double[]{result.getLog10PosteriorOfAFEq0(), result.getLog10PosteriorOfAFGT0()}; Assert.assertEquals(MathUtils.sumLog10(actualPosteriors), 1.0, 1e-3, "Posteriors don't sum to 1 with 1e-3 precision"); } + + @DataProvider(name = "TestIsPolymorphic") + public Object[][] makeTestIsPolymorphic() { + List tests = new ArrayList(); + + final List pValues = new LinkedList(); + for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999) ) + for ( final double espilon : Arrays.asList(-1e-5, 0.0, 1e-5) ) + pValues.add(p + espilon); + + for ( final double pNonRef : pValues ) { + for ( final double pThreshold : pValues ) { + final boolean shouldBePoly = pNonRef >= pThreshold; + if ( pNonRef != pThreshold) + // let's not deal with numerical instability + tests.add(new Object[]{ pNonRef, pThreshold, shouldBePoly }); + } + } + + return tests.toArray(new Object[][]{}); + } + + private AFCalcResult makePolymorphicTestData(final double pNonRef) { + return new AFCalcResult( + new int[]{0}, + 1, + alleles, + MathUtils.normalizeFromLog10(new double[]{1 - pNonRef, pNonRef}, true, false), + log10Even, + Collections.singletonMap(C, Math.log10(pNonRef))); + } + + @Test(enabled = true, dataProvider = "TestIsPolymorphic") + private void testIsPolymorphic(final double pNonRef, final double pThreshold, final boolean shouldBePoly) { + final AFCalcResult result = makePolymorphicTestData(pNonRef); + final boolean actualIsPoly = result.isPolymorphic(C, Math.log10(pThreshold)); + Assert.assertEquals(actualIsPoly, shouldBePoly, + "isPolymorphic with pNonRef " + pNonRef + " and threshold " + pThreshold + " returned " + + actualIsPoly + " but the expected result is " + shouldBePoly); + } + + @Test(enabled = true, dataProvider = "TestIsPolymorphic") + private void testIsPolymorphicQual(final double pNonRef, final double pThreshold, final boolean shouldBePoly) { + final AFCalcResult result = makePolymorphicTestData(pNonRef); + final double qual = QualityUtils.phredScaleCorrectRate(pThreshold); + final boolean actualIsPoly = result.isPolymorphicPhredScaledQual(C, qual); + Assert.assertEquals(actualIsPoly, shouldBePoly, + "isPolymorphic with pNonRef " + pNonRef + " and threshold " + pThreshold + " returned " + + actualIsPoly + " but the expected result is " + shouldBePoly); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 3f20db0af..06fc01232 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -258,13 +258,23 @@ public class TraverseActiveRegions extends TraversalEngine extends Walker desiredReadStates() { + return EnumSet.of(ActiveRegionReadState.PRIMARY); + } + + public final boolean wantsNonPrimaryReads() { + return desiredReadStates().contains(ActiveRegionReadState.NONPRIMARY); + } + + public boolean wantsExtendedReads() { + return desiredReadStates().contains(ActiveRegionReadState.EXTENDED); + } + + public boolean wantsUnmappedReads() { + return desiredReadStates().contains(ActiveRegionReadState.UNMAPPED); } // Determine probability of active status over the AlignmentContext diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 80bc04845..cc086b148 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -382,11 +382,9 @@ public class UnifiedGenotyperEngine { if ( alternateAllele.isReference() ) continue; - // we are non-ref if the probability of being non-ref > the emit confidence. - // the emit confidence is phred-scaled, say 30 => 10^-3. - // the posterior AF > 0 is log10: -5 => 10^-5 - // we are non-ref if 10^-5 < 10^-3 => -5 < -3 - final boolean isNonRef = AFresult.isPolymorphic(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING / -10.0); + // Compute if the site is considered polymorphic with sufficient confidence relative to our + // phred-scaled emission QUAL + final boolean isNonRef = AFresult.isPolymorphicPhredScaledQual(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING); // if the most likely AC is not 0, then this is a good alternate allele to use if ( isNonRef ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java index a65772444..dbb0e8cdd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.variantcontext.Allele; @@ -234,10 +235,20 @@ public class AFCalcResult { * * @return true if there's enough confidence (relative to log10minPNonRef) to reject AF == 0 */ + @Requires("MathUtils.goodLog10Probability(log10minPNonRef)") public boolean isPolymorphic(final Allele allele, final double log10minPNonRef) { return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef; } + /** + * Same as #isPolymorphic but takes a phred-scaled quality score as input + */ + public boolean isPolymorphicPhredScaledQual(final Allele allele, final double minPNonRefPhredScaledQual) { + if ( minPNonRefPhredScaledQual < 0 ) throw new IllegalArgumentException("phredScaledQual " + minPNonRefPhredScaledQual + " < 0 "); + final double log10Threshold = Math.log10(QualityUtils.qualToProb(minPNonRefPhredScaledQual)); + return isPolymorphic(allele, log10Threshold); + } + /** * Are any of the alleles polymorphic w.r.t. #isPolymorphic? * diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java index 1242e5b00..848beccb8 100755 --- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java @@ -9,7 +9,7 @@ import net.sf.samtools.SAMUtils; * @author Kiran Garimella */ public class QualityUtils { - public final static byte MAX_RECALIBRATED_Q_SCORE = 93; + public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE; public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE; public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java new file mode 100644 index 000000000..00e491eb0 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java @@ -0,0 +1,16 @@ +package org.broadinstitute.sting.utils.activeregion; + +/** + * Created with IntelliJ IDEA. + * User: thibault + * Date: 11/26/12 + * Time: 2:35 PM + * + * Describes how a read relates to an assigned ActiveRegion + */ +public enum ActiveRegionReadState { + PRIMARY, // This is the read's primary region + NONPRIMARY, // This region overlaps the read, but it is not primary + EXTENDED, // This region would overlap the read if it were extended + UNMAPPED // This read is not mapped +} diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index cf4d699ee..9ad1bf773 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -406,10 +406,15 @@ public class BAQ { // so BQi = Qi - BAQi + 64 byte[] bqTag = new byte[baq.length]; for ( int i = 0; i < bqTag.length; i++) { - int bq = (int)read.getBaseQualities()[i] + 64; - int baq_i = (int)baq[i]; - int tag = bq - baq_i; - if ( tag < 0 ) throw new ReviewedStingException("BAQ tag calculation error. BAQ value above base quality at " + read); + final int bq = (int)read.getBaseQualities()[i] + 64; + final int baq_i = (int)baq[i]; + final int tag = bq - baq_i; + // problem with the calculation of the correction factor; this is our problem + if ( tag < 0 ) + throw new ReviewedStingException("BAQ tag calculation error. BAQ value above base quality at " + read); + // the original quality is too high, almost certainly due to using the wrong encoding in the BAM file + if ( tag > Byte.MAX_VALUE ) + throw new UserException.MalformedBAM(read, "we encountered an extremely high quality score (" + (bq - 64) + ") with BAQ correction factor of " + baq_i + "; the BAM file appears to be using the wrong encoding for quality scores"); bqTag[i] = (byte)tag; } return new String(bqTag); diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java new file mode 100644 index 000000000..d4e4ab65e --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java @@ -0,0 +1,64 @@ +package org.broadinstitute.sting.utils.recalibration.covariates; + +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.TandemRepeat; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; + +import java.util.Arrays; + +/** + * Created with IntelliJ IDEA. + * User: rpoplin + * Date: 11/3/12 + */ + +public class RepeatLengthCovariate implements ExperimentalCovariate { + final int MAX_REPEAT_LENGTH = 20; + + // Initialize any member variables using the command-line arguments passed to the walkers + @Override + public void initialize(final RecalibrationArgumentCollection RAC) {} + + @Override + public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { + byte[] readBytes = read.getReadBases(); + for (int i = 0; i < readBytes.length; i++) { + int maxRL = 0; + for (int str = 1; str <= 8; str++) { + if (i + str <= readBytes.length) { + maxRL = Math.max(maxRL, VariantContextUtils.findNumberofRepetitions( + Arrays.copyOfRange(readBytes,i,i + str), + Arrays.copyOfRange(readBytes,i,readBytes.length) + )); + } + } + if(maxRL > MAX_REPEAT_LENGTH) { maxRL = MAX_REPEAT_LENGTH; } + values.addCovariate(maxRL, maxRL, maxRL, i); + } + } + + // Used to get the covariate's value from input csv file during on-the-fly recalibration + @Override + public final Object getValue(final String str) { + return Byte.parseByte(str); + } + + @Override + public String formatKey(final int key) { + return String.format("%d", key); + } + + @Override + public int keyFromValue(final Object value) { + return (value instanceof String) ? Integer.parseInt((String) value) : (Integer) value; + } + + @Override + public int maximumKeyValue() { + return MAX_REPEAT_LENGTH + 1; + } + +} diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 1f1867f75..b3e3cf8df 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -1267,7 +1267,7 @@ public class VariantContextUtils { * @param testString String to test * @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's */ - protected static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) { + public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) { int numRepeats = 0; for (int start = 0; start < testString.length; start += repeatUnit.length) { int end = start + repeatUnit.length; diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java index 66504da11..a65b0cb45 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.traversals; import com.google.java.contract.PreconditionError; import net.sf.samtools.*; +import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -24,6 +25,7 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -46,6 +48,8 @@ public class TraverseActiveRegionsTest extends BaseTest { private class DummyActiveRegionWalker extends ActiveRegionWalker { private final double prob; + private EnumSet states = super.desiredReadStates(); + protected List isActiveCalls = new ArrayList(); protected Map mappedActiveRegions = new HashMap(); @@ -57,6 +61,16 @@ public class TraverseActiveRegionsTest extends BaseTest { this.prob = constProb; } + public DummyActiveRegionWalker(EnumSet wantStates) { + this.prob = 1.0; + this.states = wantStates; + } + + @Override + public EnumSet desiredReadStates() { + return states; + } + @Override public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { isActiveCalls.add(ref.getLocus()); @@ -87,7 +101,7 @@ public class TraverseActiveRegionsTest extends BaseTest { private GenomeLocParser genomeLocParser; private List intervals; - private List reads; + private List reads; @BeforeClass private void init() throws FileNotFoundException { @@ -104,16 +118,18 @@ public class TraverseActiveRegionsTest extends BaseTest { intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100)); intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList(); - reads = new ArrayList(); + reads = new ArrayList(); reads.add(buildSAMRecord("simple", "1", 100, 200)); reads.add(buildSAMRecord("overlap_equal", "1", 10, 20)); reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21)); reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009)); reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008)); - reads.add(buildSAMRecord("extended_only", "1", 3000, 3100)); reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990)); reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000)); reads.add(buildSAMRecord("simple20", "20", 10025, 10075)); + + // required by LocusIteratorByState, and I prefer to list them in test case order above + ReadUtils.sortReadsByCoordinate(reads); } @Test @@ -202,12 +218,148 @@ public class TraverseActiveRegionsTest extends BaseTest { } @Test - public void testReadMapping() { + public void testPrimaryReadMapping() { DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); // Contract: Each read has the Primary state in a single region (or none) // This is the region of maximum overlap for the read (earlier if tied) + // simple: Primary in 1:1-999 + // overlap_equal: Primary in 1:1-999 + // overlap_unequal: Primary in 1:1-999 + // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 + // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 + // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 + // outside_intervals: none + // simple20: Primary in 20:10000-10100 + + Map activeRegions = getActiveRegions(walker, intervals); + ActiveRegion region; + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999)); + + getRead(region, "simple"); + getRead(region, "overlap_equal"); + getRead(region, "overlap_unequal"); + verifyReadNotPlaced(region, "boundary_equal"); + verifyReadNotPlaced(region, "boundary_unequal"); + verifyReadNotPlaced(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "simple20"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999)); + + verifyReadNotPlaced(region, "simple"); + verifyReadNotPlaced(region, "overlap_equal"); + verifyReadNotPlaced(region, "overlap_unequal"); + verifyReadNotPlaced(region, "boundary_equal"); + getRead(region, "boundary_unequal"); + getRead(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "simple20"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); + + verifyReadNotPlaced(region, "simple"); + verifyReadNotPlaced(region, "overlap_equal"); + verifyReadNotPlaced(region, "overlap_unequal"); + getRead(region, "boundary_equal"); + verifyReadNotPlaced(region, "boundary_unequal"); + verifyReadNotPlaced(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "simple20"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100)); + + verifyReadNotPlaced(region, "simple"); + verifyReadNotPlaced(region, "overlap_equal"); + verifyReadNotPlaced(region, "overlap_unequal"); + verifyReadNotPlaced(region, "boundary_equal"); + verifyReadNotPlaced(region, "boundary_unequal"); + verifyReadNotPlaced(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + getRead(region, "simple20"); + + // TODO: more tests and edge cases + } + + @Test + public void testNonPrimaryReadMapping() { + DummyActiveRegionWalker walker = new DummyActiveRegionWalker( + EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY)); + + // Contract: Each read has the Primary state in a single region (or none) + // This is the region of maximum overlap for the read (earlier if tied) + + // Contract: Each read has the Non-Primary state in all other regions it overlaps + + // simple: Primary in 1:1-999 + // overlap_equal: Primary in 1:1-999 + // overlap_unequal: Primary in 1:1-999 + // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 + // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 + // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 + // outside_intervals: none + // simple20: Primary in 20:10000-10100 + + Map activeRegions = getActiveRegions(walker, intervals); + ActiveRegion region; + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999)); + + getRead(region, "simple"); + getRead(region, "overlap_equal"); + getRead(region, "overlap_unequal"); + verifyReadNotPlaced(region, "boundary_equal"); + verifyReadNotPlaced(region, "boundary_unequal"); + getRead(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "simple20"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999)); + + verifyReadNotPlaced(region, "simple"); + verifyReadNotPlaced(region, "overlap_equal"); + verifyReadNotPlaced(region, "overlap_unequal"); + getRead(region, "boundary_equal"); + getRead(region, "boundary_unequal"); + getRead(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "simple20"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); + + verifyReadNotPlaced(region, "simple"); + verifyReadNotPlaced(region, "overlap_equal"); + verifyReadNotPlaced(region, "overlap_unequal"); + getRead(region, "boundary_equal"); + getRead(region, "boundary_unequal"); + verifyReadNotPlaced(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "simple20"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100)); + + verifyReadNotPlaced(region, "simple"); + verifyReadNotPlaced(region, "overlap_equal"); + verifyReadNotPlaced(region, "overlap_unequal"); + verifyReadNotPlaced(region, "boundary_equal"); + verifyReadNotPlaced(region, "boundary_unequal"); + verifyReadNotPlaced(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + getRead(region, "simple20"); + + // TODO: more tests and edge cases + } + + @Test + public void testExtendedReadMapping() { + DummyActiveRegionWalker walker = new DummyActiveRegionWalker( + EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY, ActiveRegionReadState.EXTENDED)); + + // Contract: Each read has the Primary state in a single region (or none) + // This is the region of maximum overlap for the read (earlier if tied) + // Contract: Each read has the Non-Primary state in all other regions it overlaps // Contract: Each read has the Extended state in regions where it only overlaps if the region is extended @@ -216,7 +368,6 @@ public class TraverseActiveRegionsTest extends BaseTest { // overlap_unequal: Primary in 1:1-999 // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 - // extended_only: Extended in 1:2000-2999 // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 // outside_intervals: none // simple20: Primary in 20:10000-10100 @@ -226,13 +377,12 @@ public class TraverseActiveRegionsTest extends BaseTest { region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999)); - verifyReadPrimary(region, "simple"); - verifyReadPrimary(region, "overlap_equal"); - verifyReadPrimary(region, "overlap_unequal"); + getRead(region, "simple"); + getRead(region, "overlap_equal"); + getRead(region, "overlap_unequal"); verifyReadNotPlaced(region, "boundary_equal"); verifyReadNotPlaced(region, "boundary_unequal"); - verifyReadNotPlaced(region, "extended_only"); - // TODO: fail verifyReadNonPrimary(region, "extended_and_np"); + getRead(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); verifyReadNotPlaced(region, "simple20"); @@ -241,10 +391,9 @@ public class TraverseActiveRegionsTest extends BaseTest { verifyReadNotPlaced(region, "simple"); verifyReadNotPlaced(region, "overlap_equal"); verifyReadNotPlaced(region, "overlap_unequal"); - // TODO: fail verifyReadNonPrimary(region, "boundary_equal"); - verifyReadPrimary(region, "boundary_unequal"); - verifyReadNotPlaced(region, "extended_only"); - // TODO: fail verifyReadPrimary(region, "extended_and_np"); + getRead(region, "boundary_equal"); + getRead(region, "boundary_unequal"); + getRead(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); verifyReadNotPlaced(region, "simple20"); @@ -253,10 +402,9 @@ public class TraverseActiveRegionsTest extends BaseTest { verifyReadNotPlaced(region, "simple"); verifyReadNotPlaced(region, "overlap_equal"); verifyReadNotPlaced(region, "overlap_unequal"); - verifyReadPrimary(region, "boundary_equal"); - // TODO: fail verifyReadNonPrimary(region, "boundary_unequal"); - // TODO: fail verifyReadExtended(region, "extended_only"); - // TODO: fail verifyReadExtended(region, "extended_and_np"); + getRead(region, "boundary_equal"); + getRead(region, "boundary_unequal"); + getRead(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); verifyReadNotPlaced(region, "simple20"); @@ -267,28 +415,13 @@ public class TraverseActiveRegionsTest extends BaseTest { verifyReadNotPlaced(region, "overlap_unequal"); verifyReadNotPlaced(region, "boundary_equal"); verifyReadNotPlaced(region, "boundary_unequal"); - verifyReadNotPlaced(region, "extended_only"); verifyReadNotPlaced(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); - verifyReadPrimary(region, "simple20"); + getRead(region, "simple20"); // TODO: more tests and edge cases } - private void verifyReadPrimary(ActiveRegion region, String readName) { - SAMRecord read = getRead(region, readName); - Assert.assertFalse(read.getNotPrimaryAlignmentFlag(), "Read " + read + " not primary in active region " + region); - } - - private void verifyReadNonPrimary(ActiveRegion region, String readName) { - SAMRecord read = getRead(region, readName); - Assert.assertTrue(read.getNotPrimaryAlignmentFlag(), "Read " + read + " primary in active region " + region); - } - - private void verifyReadExtended(ActiveRegion region, String readName) { - Assert.fail("The Extended read state has not been implemented"); - } - private void verifyReadNotPlaced(ActiveRegion region, String readName) { for (SAMRecord read : region.getReads()) { if (read.getReadName().equals(readName)) @@ -302,7 +435,7 @@ public class TraverseActiveRegionsTest extends BaseTest { return read; } - Assert.fail("Read " + readName + " not found in active region " + region); + Assert.fail("Read " + readName + " not assigned to active region " + region); return null; } @@ -375,7 +508,7 @@ public class TraverseActiveRegionsTest extends BaseTest { engine.setGenomeLocParser(genomeLocParser); t.initialize(engine); - StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(reads); + StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList(reads)); Shard shard = new MockLocusShard(genomeLocParser, intervals); List providers = new ArrayList();