Add ActiveRegionReadState handling
This commit is contained in:
parent
f0395b457a
commit
198923b597
|
|
@ -45,6 +45,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
|
|
@ -295,9 +296,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
@Override
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
||||
// enable non primary reads in the active region
|
||||
// enable non primary and extended reads in the active region
|
||||
@Override
|
||||
public boolean wantsNonPrimaryReads() { return true; }
|
||||
public EnumSet<ActiveRegionReadState> desiredReadStates() {
|
||||
return EnumSet.of(
|
||||
ActiveRegionReadState.PRIMARY,
|
||||
ActiveRegionReadState.NONPRIMARY,
|
||||
ActiveRegionReadState.EXTENDED
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
@Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
|
||||
|
|
|
|||
|
|
@ -258,13 +258,23 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
activeRegion.add( read );
|
||||
}
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
|
||||
otherRegionToTest.add( read );
|
||||
if( !bestRegion.equals(otherRegionToTest) ) {
|
||||
// check for non-primary vs. extended
|
||||
if ( otherRegionToTest.getLocation().overlapsP( readLoc ) ) {
|
||||
otherRegionToTest.add( read );
|
||||
} else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
|
||||
otherRegionToTest.add( read );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
placedReads.add( read );
|
||||
} else if( activeRegion.getExtendedLoc().overlapsP( readLoc ) && walker.wantsNonPrimaryReads() ) {
|
||||
// check for non-primary vs. extended
|
||||
} else if( activeRegion.getLocation().overlapsP( readLoc ) ) {
|
||||
if ( walker.wantsNonPrimaryReads() ) {
|
||||
activeRegion.add( read );
|
||||
}
|
||||
} else if( walker.wantsExtendedReads() && activeRegion.getExtendedLoc().overlapsP( readLoc )) {
|
||||
activeRegion.add( read );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,14 +14,14 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Base class for all the Active Region Walkers.
|
||||
|
|
@ -71,8 +71,20 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
|||
return true; // We are keeping all the reads
|
||||
}
|
||||
|
||||
public boolean wantsNonPrimaryReads() {
|
||||
return false;
|
||||
public EnumSet<ActiveRegionReadState> desiredReadStates() {
|
||||
return EnumSet.of(ActiveRegionReadState.PRIMARY);
|
||||
}
|
||||
|
||||
public final boolean wantsNonPrimaryReads() {
|
||||
return desiredReadStates().contains(ActiveRegionReadState.NONPRIMARY);
|
||||
}
|
||||
|
||||
public boolean wantsExtendedReads() {
|
||||
return desiredReadStates().contains(ActiveRegionReadState.EXTENDED);
|
||||
}
|
||||
|
||||
public boolean wantsUnmappedReads() {
|
||||
return desiredReadStates().contains(ActiveRegionReadState.UNMAPPED);
|
||||
}
|
||||
|
||||
// Determine probability of active status over the AlignmentContext
|
||||
|
|
|
|||
|
|
@ -0,0 +1,16 @@
|
|||
package org.broadinstitute.sting.utils.activeregion;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: thibault
|
||||
* Date: 11/26/12
|
||||
* Time: 2:35 PM
|
||||
*
|
||||
* Describes how a read relates to an assigned ActiveRegion
|
||||
*/
|
||||
public enum ActiveRegionReadState {
|
||||
PRIMARY, // This is the read's primary region
|
||||
NONPRIMARY, // This region overlaps the read, but it is not primary
|
||||
EXTENDED, // This region would overlap the read if it were extended
|
||||
UNMAPPED // This read is not mapped
|
||||
}
|
||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.traversals;
|
|||
|
||||
import com.google.java.contract.PreconditionError;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -46,6 +47,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
|
||||
private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
|
||||
private final double prob;
|
||||
private EnumSet<ActiveRegionReadState> states = super.desiredReadStates();
|
||||
|
||||
protected List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
|
||||
protected Map<GenomeLoc, ActiveRegion> mappedActiveRegions = new HashMap<GenomeLoc, ActiveRegion>();
|
||||
|
||||
|
|
@ -57,6 +60,16 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
this.prob = constProb;
|
||||
}
|
||||
|
||||
public DummyActiveRegionWalker(EnumSet<ActiveRegionReadState> wantStates) {
|
||||
this.prob = 1.0;
|
||||
this.states = wantStates;
|
||||
}
|
||||
|
||||
@Override
|
||||
public EnumSet<ActiveRegionReadState> desiredReadStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
isActiveCalls.add(ref.getLocus());
|
||||
|
|
@ -202,12 +215,158 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testReadMapping() {
|
||||
public void testPrimaryReadMapping() {
|
||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
|
||||
|
||||
// Contract: Each read has the Primary state in a single region (or none)
|
||||
// This is the region of maximum overlap for the read (earlier if tied)
|
||||
|
||||
// simple: Primary in 1:1-999
|
||||
// overlap_equal: Primary in 1:1-999
|
||||
// overlap_unequal: Primary in 1:1-999
|
||||
// boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// extended_only: Extended in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
ActiveRegion region;
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
|
||||
|
||||
getRead(region, "simple");
|
||||
getRead(region, "overlap_equal");
|
||||
getRead(region, "overlap_unequal");
|
||||
verifyReadNotPlaced(region, "boundary_equal");
|
||||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_only");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||
|
||||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
verifyReadNotPlaced(region, "boundary_equal");
|
||||
getRead(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_only");
|
||||
// TODO: fail getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
|
||||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
getRead(region, "boundary_equal");
|
||||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_only");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
|
||||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
verifyReadNotPlaced(region, "boundary_equal");
|
||||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_only");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
getRead(region, "simple20");
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonPrimaryReadMapping() {
|
||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
|
||||
EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY));
|
||||
|
||||
// Contract: Each read has the Primary state in a single region (or none)
|
||||
// This is the region of maximum overlap for the read (earlier if tied)
|
||||
|
||||
// Contract: Each read has the Non-Primary state in all other regions it overlaps
|
||||
|
||||
// simple: Primary in 1:1-999
|
||||
// overlap_equal: Primary in 1:1-999
|
||||
// overlap_unequal: Primary in 1:1-999
|
||||
// boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// extended_only: Extended in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
ActiveRegion region;
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
|
||||
|
||||
getRead(region, "simple");
|
||||
getRead(region, "overlap_equal");
|
||||
getRead(region, "overlap_unequal");
|
||||
verifyReadNotPlaced(region, "boundary_equal");
|
||||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_only");
|
||||
// TODO: fail getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||
|
||||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
getRead(region, "boundary_equal");
|
||||
getRead(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_only");
|
||||
// TODO: fail getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
|
||||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
getRead(region, "boundary_equal");
|
||||
getRead(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_only");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
|
||||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
verifyReadNotPlaced(region, "boundary_equal");
|
||||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_only");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
getRead(region, "simple20");
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtendedReadMapping() {
|
||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
|
||||
EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY, ActiveRegionReadState.EXTENDED));
|
||||
|
||||
// Contract: Each read has the Primary state in a single region (or none)
|
||||
// This is the region of maximum overlap for the read (earlier if tied)
|
||||
|
||||
// Contract: Each read has the Non-Primary state in all other regions it overlaps
|
||||
// Contract: Each read has the Extended state in regions where it only overlaps if the region is extended
|
||||
|
||||
|
|
@ -226,13 +385,13 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
|
||||
|
||||
verifyReadPrimary(region, "simple");
|
||||
verifyReadPrimary(region, "overlap_equal");
|
||||
verifyReadPrimary(region, "overlap_unequal");
|
||||
getRead(region, "simple");
|
||||
getRead(region, "overlap_equal");
|
||||
getRead(region, "overlap_unequal");
|
||||
verifyReadNotPlaced(region, "boundary_equal");
|
||||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_only");
|
||||
// TODO: fail verifyReadNonPrimary(region, "extended_and_np");
|
||||
// TODO: fail getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
|
|
@ -241,10 +400,10 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
// TODO: fail verifyReadNonPrimary(region, "boundary_equal");
|
||||
verifyReadPrimary(region, "boundary_unequal");
|
||||
getRead(region, "boundary_equal");
|
||||
getRead(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_only");
|
||||
// TODO: fail verifyReadPrimary(region, "extended_and_np");
|
||||
// TODO: fail getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
|
|
@ -253,10 +412,10 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
verifyReadPrimary(region, "boundary_equal");
|
||||
// TODO: fail verifyReadNonPrimary(region, "boundary_unequal");
|
||||
// TODO: fail verifyReadExtended(region, "extended_only");
|
||||
// TODO: fail verifyReadExtended(region, "extended_and_np");
|
||||
getRead(region, "boundary_equal");
|
||||
getRead(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_only");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
|
|
@ -267,33 +426,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
verifyReadNotPlaced(region, "boundary_equal");
|
||||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_only");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
// TODO: fail getRead(region, "extended_only");
|
||||
// TODO: fail getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadPrimary(region, "simple20");
|
||||
getRead(region, "simple20");
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
private void verifyReadPrimary(ActiveRegion region, String readName) {
|
||||
SAMRecord read = getRead(region, readName);
|
||||
Assert.assertFalse(read.getNotPrimaryAlignmentFlag(), "Read " + read + " not primary in active region " + region);
|
||||
}
|
||||
|
||||
private void verifyReadNonPrimary(ActiveRegion region, String readName) {
|
||||
SAMRecord read = getRead(region, readName);
|
||||
Assert.assertTrue(read.getNotPrimaryAlignmentFlag(), "Read " + read + " primary in active region " + region);
|
||||
}
|
||||
|
||||
private void verifyReadExtended(ActiveRegion region, String readName) {
|
||||
Assert.fail("The Extended read state has not been implemented");
|
||||
}
|
||||
|
||||
private void verifyReadNotPlaced(ActiveRegion region, String readName) {
|
||||
for (SAMRecord read : region.getReads()) {
|
||||
if (read.getReadName().equals(readName))
|
||||
Assert.fail("Read " + readName + " found in active region " + region);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private SAMRecord getRead(ActiveRegion region, String readName) {
|
||||
|
|
@ -302,7 +447,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
return read;
|
||||
}
|
||||
|
||||
Assert.fail("Read " + readName + " not found in active region " + region);
|
||||
Assert.fail("Read " + readName + " not assigned to active region " + region);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue