Add ActiveRegionReadState handling

This commit is contained in:
Joel Thibault 2012-11-26 15:01:13 -05:00
parent f0395b457a
commit 198923b597
5 changed files with 230 additions and 40 deletions

View File

@ -45,6 +45,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.codecs.vcf.*;
@ -295,9 +296,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Override
public boolean includeReadsWithDeletionAtLoci() { return true; }
// enable non primary reads in the active region
// enable non primary and extended reads in the active region
@Override
public boolean wantsNonPrimaryReads() { return true; }
public EnumSet<ActiveRegionReadState> desiredReadStates() {
return EnumSet.of(
ActiveRegionReadState.PRIMARY,
ActiveRegionReadState.NONPRIMARY,
ActiveRegionReadState.EXTENDED
);
}
@Override
@Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})

View File

@ -258,13 +258,23 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
activeRegion.add( read );
}
for( final ActiveRegion otherRegionToTest : workQueue ) {
if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
otherRegionToTest.add( read );
if( !bestRegion.equals(otherRegionToTest) ) {
// check for non-primary vs. extended
if ( otherRegionToTest.getLocation().overlapsP( readLoc ) ) {
otherRegionToTest.add( read );
} else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
otherRegionToTest.add( read );
}
}
}
}
placedReads.add( read );
} else if( activeRegion.getExtendedLoc().overlapsP( readLoc ) && walker.wantsNonPrimaryReads() ) {
// check for non-primary vs. extended
} else if( activeRegion.getLocation().overlapsP( readLoc ) ) {
if ( walker.wantsNonPrimaryReads() ) {
activeRegion.add( read );
}
} else if( walker.wantsExtendedReads() && activeRegion.getExtendedLoc().overlapsP( readLoc )) {
activeRegion.add( read );
}
}

View File

@ -14,14 +14,14 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.*;
/**
* Base class for all the Active Region Walkers.
@ -71,8 +71,20 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
return true; // We are keeping all the reads
}
public boolean wantsNonPrimaryReads() {
return false;
public EnumSet<ActiveRegionReadState> desiredReadStates() {
return EnumSet.of(ActiveRegionReadState.PRIMARY);
}
public final boolean wantsNonPrimaryReads() {
return desiredReadStates().contains(ActiveRegionReadState.NONPRIMARY);
}
public boolean wantsExtendedReads() {
return desiredReadStates().contains(ActiveRegionReadState.EXTENDED);
}
public boolean wantsUnmappedReads() {
return desiredReadStates().contains(ActiveRegionReadState.UNMAPPED);
}
// Determine probability of active status over the AlignmentContext

View File

@ -0,0 +1,16 @@
package org.broadinstitute.sting.utils.activeregion;
/**
* Created with IntelliJ IDEA.
* User: thibault
* Date: 11/26/12
* Time: 2:35 PM
*
* Describes how a read relates to an assigned ActiveRegion
*/
public enum ActiveRegionReadState {
PRIMARY, // This is the read's primary region
NONPRIMARY, // This region overlaps the read, but it is not primary
EXTENDED, // This region would overlap the read if it were extended
UNMAPPED // This read is not mapped
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.traversals;
import com.google.java.contract.PreconditionError;
import net.sf.samtools.*;
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -46,6 +47,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
private final double prob;
private EnumSet<ActiveRegionReadState> states = super.desiredReadStates();
protected List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
protected Map<GenomeLoc, ActiveRegion> mappedActiveRegions = new HashMap<GenomeLoc, ActiveRegion>();
@ -57,6 +60,16 @@ public class TraverseActiveRegionsTest extends BaseTest {
this.prob = constProb;
}
public DummyActiveRegionWalker(EnumSet<ActiveRegionReadState> wantStates) {
this.prob = 1.0;
this.states = wantStates;
}
@Override
public EnumSet<ActiveRegionReadState> desiredReadStates() {
return states;
}
@Override
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
isActiveCalls.add(ref.getLocus());
@ -202,12 +215,158 @@ public class TraverseActiveRegionsTest extends BaseTest {
}
@Test
public void testReadMapping() {
public void testPrimaryReadMapping() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
// Contract: Each read has the Primary state in a single region (or none)
// This is the region of maximum overlap for the read (earlier if tied)
// simple: Primary in 1:1-999
// overlap_equal: Primary in 1:1-999
// overlap_unequal: Primary in 1:1-999
// boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
// extended_only: Extended in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
ActiveRegion region;
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
getRead(region, "simple");
getRead(region, "overlap_equal");
getRead(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
getRead(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
// TODO: fail getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
getRead(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
getRead(region, "simple20");
// TODO: more tests and edge cases
}
@Test
public void testNonPrimaryReadMapping() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY));
// Contract: Each read has the Primary state in a single region (or none)
// This is the region of maximum overlap for the read (earlier if tied)
// Contract: Each read has the Non-Primary state in all other regions it overlaps
// simple: Primary in 1:1-999
// overlap_equal: Primary in 1:1-999
// overlap_unequal: Primary in 1:1-999
// boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
// extended_only: Extended in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
ActiveRegion region;
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
getRead(region, "simple");
getRead(region, "overlap_equal");
getRead(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
// TODO: fail getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
getRead(region, "boundary_equal");
getRead(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
// TODO: fail getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
getRead(region, "boundary_equal");
getRead(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
getRead(region, "simple20");
// TODO: more tests and edge cases
}
@Test
public void testExtendedReadMapping() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY, ActiveRegionReadState.EXTENDED));
// Contract: Each read has the Primary state in a single region (or none)
// This is the region of maximum overlap for the read (earlier if tied)
// Contract: Each read has the Non-Primary state in all other regions it overlaps
// Contract: Each read has the Extended state in regions where it only overlaps if the region is extended
@ -226,13 +385,13 @@ public class TraverseActiveRegionsTest extends BaseTest {
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
verifyReadPrimary(region, "simple");
verifyReadPrimary(region, "overlap_equal");
verifyReadPrimary(region, "overlap_unequal");
getRead(region, "simple");
getRead(region, "overlap_equal");
getRead(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
// TODO: fail verifyReadNonPrimary(region, "extended_and_np");
// TODO: fail getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "simple20");
@ -241,10 +400,10 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
// TODO: fail verifyReadNonPrimary(region, "boundary_equal");
verifyReadPrimary(region, "boundary_unequal");
getRead(region, "boundary_equal");
getRead(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
// TODO: fail verifyReadPrimary(region, "extended_and_np");
// TODO: fail getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "simple20");
@ -253,10 +412,10 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
verifyReadPrimary(region, "boundary_equal");
// TODO: fail verifyReadNonPrimary(region, "boundary_unequal");
// TODO: fail verifyReadExtended(region, "extended_only");
// TODO: fail verifyReadExtended(region, "extended_and_np");
getRead(region, "boundary_equal");
getRead(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "simple20");
@ -267,33 +426,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
verifyReadNotPlaced(region, "extended_and_np");
// TODO: fail getRead(region, "extended_only");
// TODO: fail getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadPrimary(region, "simple20");
getRead(region, "simple20");
// TODO: more tests and edge cases
}
private void verifyReadPrimary(ActiveRegion region, String readName) {
SAMRecord read = getRead(region, readName);
Assert.assertFalse(read.getNotPrimaryAlignmentFlag(), "Read " + read + " not primary in active region " + region);
}
private void verifyReadNonPrimary(ActiveRegion region, String readName) {
SAMRecord read = getRead(region, readName);
Assert.assertTrue(read.getNotPrimaryAlignmentFlag(), "Read " + read + " primary in active region " + region);
}
private void verifyReadExtended(ActiveRegion region, String readName) {
Assert.fail("The Extended read state has not been implemented");
}
private void verifyReadNotPlaced(ActiveRegion region, String readName) {
for (SAMRecord read : region.getReads()) {
if (read.getReadName().equals(readName))
Assert.fail("Read " + readName + " found in active region " + region);
}
}
}
private SAMRecord getRead(ActiveRegion region, String readName) {
@ -302,7 +447,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
return read;
}
Assert.fail("Read " + readName + " not found in active region " + region);
Assert.fail("Read " + readName + " not assigned to active region " + region);
return null;
}