ActiveRegionWalker's isActive function returns a results object now instead of just a double.

This commit is contained in:
Ryan Poplin 2012-07-27 11:01:39 -04:00
parent 35e803e110
commit a0890126a8
8 changed files with 96 additions and 38 deletions

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
@ -303,8 +304,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
public boolean wantsNonPrimaryReads() { return true; }
@Override
@Ensures({"result >= 0.0", "result <= 1.0"})
public double isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
@Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
public ActivityProfileResult isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) {
@ -313,15 +314,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
}
}
if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) {
return 1.0;
return new ActivityProfileResult(1.0);
}
}
if( USE_ALLELES_TRIGGER ) {
return ( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
}
if( context == null ) { return 0.0; }
if( context == null ) { return new ActivityProfileResult(0.0); }
final List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
noCall.add(Allele.NO_CALL);
@ -362,7 +363,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
alleles.add( FAKE_REF_ALLELE );
alleles.add( FAKE_ALT_ALLELE );
final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
return ( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) );
return new ActivityProfileResult( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) );
}
//---------------------------------------------------------------------------------------------------------------

View File

@ -279,7 +279,6 @@ public class LocusIteratorByState extends LocusIterator {
*/
private void lazyLoadNextAlignmentContext() {
while (nextAlignmentContext == null && readStates.hasNext()) {
// this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
readStates.collectPendingReads();
final GenomeLoc location = getLocation();
@ -378,7 +377,7 @@ public class LocusIteratorByState extends LocusIterator {
CigarOperator op = state.stepForwardOnGenome();
if (op == null) {
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
// already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
it.remove(); // we've stepped off the end of the object
}

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -69,8 +70,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) {
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
final double isActiveProb = ( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 );
profile.add(fakeLoc, isActiveProb);
profile.add(fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ));
}
}
}
@ -86,8 +86,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
// Call the walkers isActive function for this locus and add them to the list to be integrated later
if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
final double isActiveProb = walkerActiveProb(walker, tracker, refContext, locus, location);
profile.add(location, isActiveProb);
profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
}
// Grab all the previously unseen reads from this pileup and add them to the massive read list
@ -144,11 +143,11 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
//
// --------------------------------------------------------------------------------
private final double walkerActiveProb(final ActiveRegionWalker<M,T> walker,
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
final RefMetaDataTracker tracker, final ReferenceContext refContext,
final AlignmentContext locus, final GenomeLoc location) {
if ( walker.hasPresetActiveRegions() ) {
return walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0;
return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
} else {
return walker.isActive( tracker, refContext, locus );
}

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
@ -72,7 +73,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
}
// Determine probability of active status over the AlignmentContext
public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
// Map over the ActiveRegion
public abstract MapType map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);

View File

@ -1596,7 +1596,17 @@ public class MathUtils {
result += v1[k].doubleValue() * v2[k].doubleValue();
return result;
}
public static double dotProduct(double[] v1, double[] v2) {
if (v1.length != v2.length)
throw new UserException("BUG: vectors v1, v2 of different size in vectorSum()");
double result = 0.0;
for (int k = 0; k < v1.length; k++)
result += v1[k] * v2[k];
return result;
}
public static double[] vectorLog10(double v1[]) {

View File

@ -31,7 +31,6 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@ -46,13 +45,13 @@ public class ActivityProfile {
final GenomeLocParser parser;
final boolean presetRegions;
GenomeLoc regionStartLoc = null;
final List<Double> isActiveList;
final List<ActivityProfileResult> isActiveList;
private GenomeLoc lastLoc = null;
private static final int FILTER_SIZE = 65;
private static final Double[] GaussianKernel;
private static final double[] GaussianKernel;
static {
GaussianKernel = new Double[2*FILTER_SIZE + 1];
GaussianKernel = new double[2*FILTER_SIZE + 1];
for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) {
GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 40.0, iii);
}
@ -63,22 +62,22 @@ public class ActivityProfile {
// todo -- add unit tests
// TODO -- own preset regions
public ActivityProfile(final GenomeLocParser parser, final boolean presetRegions) {
this(parser, presetRegions, new ArrayList<Double>(), null);
this(parser, presetRegions, new ArrayList<ActivityProfileResult>(), null);
}
protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List<Double> isActiveList, final GenomeLoc regionStartLoc) {
protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List<ActivityProfileResult> isActiveList, final GenomeLoc regionStartLoc) {
this.parser = parser;
this.presetRegions = presetRegions;
this.isActiveList = isActiveList;
this.regionStartLoc = regionStartLoc;
}
public void add(final GenomeLoc loc, final double score) {
public void add(final GenomeLoc loc, final ActivityProfileResult result) {
if ( loc.size() != 1 )
throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" );
if ( lastLoc != null && loc.getStart() != lastLoc.getStop() + 1 )
throw new ReviewedStingException("Bad add call to ActivityProfile: lastLoc added " + lastLoc + " and next is " + loc);
isActiveList.add(score);
isActiveList.add(result);
if( regionStartLoc == null ) {
regionStartLoc = loc;
}
@ -93,22 +92,33 @@ public class ActivityProfile {
* @return a new ActivityProfile that's the band-pass filtered version of this profile
*/
public ActivityProfile bandPassFilter() {
final Double[] activeProbArray = isActiveList.toArray(new Double[isActiveList.size()]);
final Double[] filteredProbArray = new Double[activeProbArray.length];
final double[] activeProbArray = new double[isActiveList.size()];
int iii = 0;
for( final ActivityProfileResult result : isActiveList ) {
activeProbArray[iii++] = result.isActiveProb;
}
final double[] filteredProbArray = new double[activeProbArray.length];
if( !presetRegions ) {
for( int iii = 0; iii < activeProbArray.length; iii++ ) {
final Double[] kernel = (Double[]) ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii));
final Double[] activeProbSubArray = (Double[]) ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1));
for( iii = 0; iii < activeProbArray.length; iii++ ) {
final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii));
final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1));
filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel);
}
}
return new ActivityProfile(parser, presetRegions, Arrays.asList(filteredProbArray), regionStartLoc);
iii = 0;
for( final double prob : filteredProbArray ) {
final ActivityProfileResult result = isActiveList.get(iii++);
result.isActiveProb = prob;
result.resultState = ActivityProfileResult.ActivityProfileResultState.NONE;
result.resultValue = null;
}
return new ActivityProfile(parser, presetRegions, isActiveList, regionStartLoc);
}
/**
* Partition this profile into active regions
* @param activeRegionExtension
* @return
* @param activeRegionExtension the amount of margin overlap in the active region
* @return the list of active regions
*/
public List<ActiveRegion> createActiveRegions( final int activeRegionExtension, final int maxRegionSize ) {
final double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author
@ -119,14 +129,14 @@ public class ActivityProfile {
return Collections.emptyList();
} else if( isActiveList.size() == 1 ) {
// there's a single element, it's either active or inactive
boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD;
boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD;
returnList.addAll(createActiveRegion(isActive, 0, 0, activeRegionExtension, maxRegionSize));
} else {
// there are 2+ elements, divide these up into regions
boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD;
boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD;
int curStart = 0;
for(int iii = 1; iii < isActiveList.size(); iii++ ) {
final boolean thisStatus = isActiveList.get(iii) > ACTIVE_PROB_THRESHOLD;
final boolean thisStatus = isActiveList.get(iii).isActiveProb > ACTIVE_PROB_THRESHOLD;
if( isActive != thisStatus ) {
returnList.addAll(createActiveRegion(isActive, curStart, iii - 1, activeRegionExtension, maxRegionSize));
isActive = thisStatus;
@ -143,7 +153,7 @@ public class ActivityProfile {
* @param isActive should the region be active?
* @param curStart offset (0-based) from the start of this region
* @param curEnd offset (0-based) from the start of this region
* @param activeRegionExtension
* @param activeRegionExtension the amount of margin overlap in the active region
* @return a fully initialized ActiveRegion with the above properties
*/
private final List<ActiveRegion> createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) {
@ -161,7 +171,7 @@ public class ActivityProfile {
final int size = curEnd - curStart + 1;
for( int iii = curStart + (int)(size*0.25); iii < curEnd - (int)(size*0.25); iii++ ) {
if( isActiveList.get(iii) < minProb ) { minProb = isActiveList.get(iii); cutPoint = iii; }
if( isActiveList.get(iii).isActiveProb < minProb ) { minProb = isActiveList.get(iii).isActiveProb; cutPoint = iii; }
}
final List<ActiveRegion> leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
final List<ActiveRegion> rightList = createActiveRegion(isActive, cutPoint+1, curEnd, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());

View File

@ -0,0 +1,31 @@
package org.broadinstitute.sting.utils.activeregion;
/**
* Created with IntelliJ IDEA.
* User: rpoplin
* Date: 7/27/12
*/
public class ActivityProfileResult {
public double isActiveProb;
public ActivityProfileResultState resultState;
public Number resultValue;
public enum ActivityProfileResultState {
NONE,
HIGH_QUALITY_SOFT_CLIPS
}
public ActivityProfileResult( final double isActiveProb ) {
this.isActiveProb = isActiveProb;
this.resultState = ActivityProfileResultState.NONE;
this.resultValue = null;
}
public ActivityProfileResult( final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) {
this.isActiveProb = isActiveProb;
this.resultState = resultState;
this.resultValue = resultValue;
}
}

View File

@ -123,12 +123,12 @@ public class ActivityProfileUnitTest extends BaseTest {
for ( int i = 0; i < cfg.probs.size(); i++ ) {
double p = cfg.probs.get(i);
GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i);
profile.add(loc, p);
profile.add(loc, new ActivityProfileResult(p));
}
Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() ));
Assert.assertEquals(profile.size(), cfg.probs.size());
Assert.assertEquals(profile.isActiveList, cfg.probs);
assertProbsAreEqual(profile.isActiveList, cfg.probs);
assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions);
}
@ -140,5 +140,12 @@ public class ActivityProfileUnitTest extends BaseTest {
}
}
private void assertProbsAreEqual(List<ActivityProfileResult> actual, List<Double> expected) {
Assert.assertEquals(actual.size(), expected.size());
for ( int i = 0; i < actual.size(); i++ ) {
Assert.assertEquals(actual.get(i).isActiveProb, expected.get(i));
}
}
// todo -- test extensions
}