Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
252b830aa8
|
|
@ -344,8 +344,12 @@ public class GATKRunReport {
|
||||||
@Element(required = false, name = "is-user-exception")
|
@Element(required = false, name = "is-user-exception")
|
||||||
Boolean isUserException;
|
Boolean isUserException;
|
||||||
|
|
||||||
|
@Element(required = false, name = "exception-class")
|
||||||
|
Class exceptionClass;
|
||||||
|
|
||||||
public ExceptionToXML(Throwable e) {
|
public ExceptionToXML(Throwable e) {
|
||||||
message = e.getMessage();
|
message = e.getMessage();
|
||||||
|
exceptionClass = e.getClass();
|
||||||
isUserException = e instanceof UserException;
|
isUserException = e instanceof UserException;
|
||||||
for (StackTraceElement element : e.getStackTrace()) {
|
for (StackTraceElement element : e.getStackTrace()) {
|
||||||
stackTrace.add(element.toString());
|
stackTrace.add(element.toString());
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.traversals;
|
package org.broadinstitute.sting.gatk.traversals;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -10,6 +11,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||||
|
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
|
|
@ -42,38 +44,31 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
|
logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
|
||||||
|
|
||||||
final LocusView locusView = getLocusView( walker, dataProvider );
|
final LocusView locusView = getLocusView( walker, dataProvider );
|
||||||
final GenomeLocSortedSet initialIntervals = engine.getIntervals(); // BUGBUG: unfortunate inefficiency that needs to be removed
|
final GenomeLocSortedSet initialIntervals = engine.getIntervals();
|
||||||
|
|
||||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||||
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||||
|
|
||||||
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
||||||
|
|
||||||
int minStart = Integer.MAX_VALUE;
|
int minStart = Integer.MAX_VALUE;
|
||||||
final ArrayList<Double> isActiveList = new ArrayList<Double>();
|
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
|
||||||
GenomeLoc firstIsActiveStart = null;
|
|
||||||
|
|
||||||
//ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
|
||||||
ReferenceOrderedView referenceOrderedDataView = null;
|
|
||||||
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
|
||||||
referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
|
|
||||||
else
|
|
||||||
referenceOrderedDataView = (RodLocusView)locusView;
|
|
||||||
|
|
||||||
// We keep processing while the next reference location is within the interval
|
// We keep processing while the next reference location is within the interval
|
||||||
GenomeLoc prevLoc = null;
|
GenomeLoc prevLoc = null;
|
||||||
while( locusView.hasNext() ) {
|
while( locusView.hasNext() ) {
|
||||||
final AlignmentContext locus = locusView.next();
|
final AlignmentContext locus = locusView.next();
|
||||||
GenomeLoc location = locus.getLocation();
|
GenomeLoc location = locus.getLocation();
|
||||||
|
|
||||||
if(prevLoc != null) {
|
if(prevLoc != null) {
|
||||||
for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) {
|
// fill in the active / inactive labels from the stop of the previous location to the start of this location
|
||||||
|
// TODO refactor to separate function
|
||||||
|
for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) {
|
||||||
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
|
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
|
||||||
if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
|
if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
|
||||||
final double isActiveProb = ( walker.presetActiveRegions == null ? 0.0 : ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
|
final double isActiveProb = ( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 );
|
||||||
isActiveList.add( isActiveProb );
|
profile.add(fakeLoc, isActiveProb);
|
||||||
if( firstIsActiveStart == null ) {
|
|
||||||
firstIsActiveStart = fakeLoc;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -89,12 +84,8 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
|
|
||||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||||
if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
|
if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
|
||||||
final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus )
|
final double isActiveProb = walkerActiveProb(walker, tracker, refContext, locus, location);
|
||||||
: ( walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0 ) );
|
profile.add(location, isActiveProb);
|
||||||
isActiveList.add( isActiveProb );
|
|
||||||
if( firstIsActiveStart == null ) {
|
|
||||||
firstIsActiveStart = location;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Grab all the previously unseen reads from this pileup and add them to the massive read list
|
// Grab all the previously unseen reads from this pileup and add them to the massive read list
|
||||||
|
|
@ -103,57 +94,105 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
if( !myReads.contains(read) ) {
|
if( !myReads.contains(read) ) {
|
||||||
myReads.add(read);
|
myReads.add(read);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// If this is the last pileup for this shard calculate the minimum alignment start so that we know
|
// If this is the last pileup for this shard calculate the minimum alignment start so that we know
|
||||||
// which active regions in the work queue are now safe to process
|
// which active regions in the work queue are now safe to process
|
||||||
if( !locusView.hasNext() ) {
|
minStart = Math.min(minStart, read.getAlignmentStart());
|
||||||
for( final PileupElement p : locus.getBasePileup() ) {
|
|
||||||
final GATKSAMRecord read = p.getRead();
|
|
||||||
if( !myReads.contains(read) ) {
|
|
||||||
myReads.add(read);
|
|
||||||
}
|
|
||||||
if( read.getAlignmentStart() < minStart ) { minStart = read.getAlignmentStart(); }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
prevLoc = location;
|
prevLoc = location;
|
||||||
|
|
||||||
printProgress(dataProvider.getShard(), locus.getLocation());
|
printProgress(dataProvider.getShard(), locus.getLocation());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Take the individual isActive calls and integrate them into contiguous active regions and
|
// Take the individual isActive calls and integrate them into contiguous active regions and
|
||||||
// add these blocks of work to the work queue
|
// add these blocks of work to the work queue
|
||||||
final ArrayList<ActiveRegion> activeRegions = integrateActiveList( isActiveList, firstIsActiveStart, activeRegionExtension, walker.presetActiveRegions != null );
|
// band-pass filter the list of isActive probabilities and turn into active regions
|
||||||
logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
|
||||||
if( walker.activeRegionOutStream == null ) {
|
final List<ActiveRegion> activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension );
|
||||||
|
|
||||||
|
// add active regions to queue of regions to process
|
||||||
workQueue.addAll( activeRegions );
|
workQueue.addAll( activeRegions );
|
||||||
} else { // Just want to output the active regions to a file, not actually process them
|
logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||||
for( final ActiveRegion activeRegion : activeRegions ) {
|
|
||||||
|
// now go and process all of the active regions
|
||||||
|
sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig());
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// simple utility functions
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private final double walkerActiveProb(final ActiveRegionWalker<M,T> walker,
|
||||||
|
final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||||
|
final AlignmentContext locus, final GenomeLoc location) {
|
||||||
|
if ( walker.hasPresetActiveRegions() ) {
|
||||||
|
return walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0;
|
||||||
|
} else {
|
||||||
|
return walker.isActive( tracker, refContext, locus );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker<M,T> walker,
|
||||||
|
final LocusShardDataProvider dataProvider,
|
||||||
|
final LocusView locusView) {
|
||||||
|
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||||
|
return new ManagingReferenceOrderedView( dataProvider );
|
||||||
|
else
|
||||||
|
return (RodLocusView)locusView;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// code to handle processing active regions
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private T processActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, final int minStart, final String currentContig ) {
|
||||||
|
if( walker.activeRegionOutStream != null ) {
|
||||||
|
writeActiveRegionsToStream(walker);
|
||||||
|
return sum;
|
||||||
|
} else {
|
||||||
|
return callWalkerMapOnActiveRegions(walker, sum, minStart, currentContig);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write out each active region to the walker activeRegionOutStream
|
||||||
|
*
|
||||||
|
* @param walker
|
||||||
|
*/
|
||||||
|
private void writeActiveRegionsToStream( final ActiveRegionWalker<M,T> walker ) {
|
||||||
|
// Just want to output the active regions to a file, not actually process them
|
||||||
|
for( final ActiveRegion activeRegion : workQueue ) {
|
||||||
if( activeRegion.isActive ) {
|
if( activeRegion.isActive ) {
|
||||||
walker.activeRegionOutStream.println( activeRegion.getLocation() );
|
walker.activeRegionOutStream.println( activeRegion.getLocation() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private T callWalkerMapOnActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, final int minStart, final String currentContig ) {
|
||||||
// Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
|
// Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
|
||||||
while( workQueue.peek() != null && (workQueue.peek().getExtendedLoc().getStop() < minStart || !workQueue.peek().getExtendedLoc().getContig().equals(dataProvider.getLocus().getContig())) ) {
|
// TODO can implement parallel traversal here
|
||||||
|
while( workQueue.peek() != null ) {
|
||||||
|
final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc();
|
||||||
|
if ( extendedLoc.getStop() < minStart || (currentContig != null && !workQueue.peek().getExtendedLoc().getContig().equals(currentContig))) {
|
||||||
final ActiveRegion activeRegion = workQueue.remove();
|
final ActiveRegion activeRegion = workQueue.remove();
|
||||||
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Special function called in LinearMicroScheduler to empty out the work queue. Ugly for now but will be cleaned up when we push this functionality more into the engine
|
|
||||||
public T endTraversal( final Walker<M,T> walker, T sum) {
|
|
||||||
while( workQueue.peek() != null ) {
|
|
||||||
final ActiveRegion activeRegion = workQueue.remove();
|
|
||||||
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, (ActiveRegionWalker<M,T>) walker );
|
|
||||||
}
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
|
private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||||
final ArrayList<GATKSAMRecord> placedReads = new ArrayList<GATKSAMRecord>();
|
final ArrayList<GATKSAMRecord> placedReads = new ArrayList<GATKSAMRecord>();
|
||||||
for( final GATKSAMRecord read : reads ) {
|
for( final GATKSAMRecord read : reads ) {
|
||||||
|
|
@ -193,6 +232,12 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
return walker.reduce( x, sum );
|
return walker.reduce( x, sum );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// engine interaction code
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the best view of loci for this walker given the available data.
|
* Gets the best view of loci for this walker given the available data.
|
||||||
* @param walker walker to interrogate.
|
* @param walker walker to interrogate.
|
||||||
|
|
@ -211,48 +256,11 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
||||||
}
|
}
|
||||||
|
|
||||||
// band-pass filter the list of isActive probabilities and turn into active regions
|
/**
|
||||||
private ArrayList<ActiveRegion> integrateActiveList( final ArrayList<Double> activeList, final GenomeLoc firstIsActiveStart, final int activeRegionExtension, final boolean presetRegions ) {
|
* Special function called in LinearMicroScheduler to empty out the work queue.
|
||||||
|
* Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||||
final double ACTIVE_PROB_THRESHOLD = 0.2; // BUGBUG: needs to be set-able by the walker author
|
*/
|
||||||
final ArrayList<ActiveRegion> returnList = new ArrayList<ActiveRegion>();
|
public T endTraversal( final Walker<M,T> walker, T sum) {
|
||||||
if( activeList.size() == 0 ) {
|
return processActiveRegions((ActiveRegionWalker<M,T>)walker, sum, Integer.MAX_VALUE, null);
|
||||||
return returnList;
|
|
||||||
} else if( activeList.size() == 1 ) {
|
|
||||||
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart(), firstIsActiveStart.getStart()),
|
|
||||||
activeList.get(0) > ACTIVE_PROB_THRESHOLD, engine.getGenomeLocParser(), activeRegionExtension ) );
|
|
||||||
return returnList;
|
|
||||||
} else {
|
|
||||||
final Double[] activeProbArray = activeList.toArray(new Double[activeList.size()]);
|
|
||||||
final double[] filteredProbArray = new double[activeProbArray.length];
|
|
||||||
final int FILTER_SIZE = ( presetRegions ? 0 : 50 ); // BUGBUG: needs to be set-able by the walker author
|
|
||||||
final int MAX_ACTIVE_REGION = ( presetRegions ? 16001 : 425 ); // BUGBUG: needs to be set-able by the walker author
|
|
||||||
for( int iii = 0; iii < activeProbArray.length; iii++ ) {
|
|
||||||
double maxVal = 0;
|
|
||||||
for( int jjj = Math.max(0, iii-FILTER_SIZE); jjj < Math.min(activeList.size(), iii+FILTER_SIZE+1); jjj++ ) {
|
|
||||||
if( activeProbArray[jjj] > maxVal ) { maxVal = activeProbArray[jjj]; }
|
|
||||||
}
|
|
||||||
filteredProbArray[iii] = maxVal;
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean curStatus = filteredProbArray[0] > ACTIVE_PROB_THRESHOLD;
|
|
||||||
int curStart = 0;
|
|
||||||
for(int iii = 1; iii < filteredProbArray.length; iii++ ) {
|
|
||||||
final boolean thisStatus = filteredProbArray[iii] > ACTIVE_PROB_THRESHOLD;
|
|
||||||
if( curStatus != thisStatus || (iii-curStart) > MAX_ACTIVE_REGION ) {
|
|
||||||
returnList.add( new ActiveRegion(
|
|
||||||
engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (iii-1)),
|
|
||||||
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) );
|
|
||||||
curStatus = thisStatus;
|
|
||||||
curStart = iii;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if( curStart != filteredProbArray.length-1 ) {
|
|
||||||
returnList.add( new ActiveRegion(
|
|
||||||
engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (filteredProbArray.length-1)),
|
|
||||||
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) );
|
|
||||||
}
|
|
||||||
return returnList;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,10 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
||||||
|
|
||||||
public GenomeLocSortedSet presetActiveRegions = null;
|
public GenomeLocSortedSet presetActiveRegions = null;
|
||||||
|
|
||||||
|
public boolean hasPresetActiveRegions() {
|
||||||
|
return presetActiveRegions != null;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
if( activeRegionBindings == null ) { return; }
|
if( activeRegionBindings == null ) { return; }
|
||||||
|
|
|
||||||
|
|
@ -117,6 +117,13 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
@Argument(doc="Only output valid sequences.",fullName="onlyOutputValidAmplicons",required=false)
|
@Argument(doc="Only output valid sequences.",fullName="onlyOutputValidAmplicons",required=false)
|
||||||
boolean onlyOutputValidAmplicons = false;
|
boolean onlyOutputValidAmplicons = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If ignoreComplexEvents is true, the output fasta file will contain only sequences coming from SNPs and Indels.
|
||||||
|
* Complex substitutions will be ignored.
|
||||||
|
*/
|
||||||
|
@Argument(doc="Ignore complex genomic records.",fullName="ignoreComplexEvents",required=false)
|
||||||
|
boolean ignoreComplexEvents = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased.
|
* BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased.
|
||||||
* This changes the size of the k-mer used for alignment.
|
* This changes the size of the k-mer used for alignment.
|
||||||
|
|
@ -146,6 +153,7 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
StringBuilder rawSequence;
|
StringBuilder rawSequence;
|
||||||
boolean sequenceInvalid;
|
boolean sequenceInvalid;
|
||||||
boolean isSiteSNP;
|
boolean isSiteSNP;
|
||||||
|
boolean isSiteIndel;
|
||||||
List<String> invReason;
|
List<String> invReason;
|
||||||
int indelCounter;
|
int indelCounter;
|
||||||
|
|
||||||
|
|
@ -244,6 +252,7 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
} else if ( validate != null ) {
|
} else if ( validate != null ) {
|
||||||
// record variant type in case it's needed in output format
|
// record variant type in case it's needed in output format
|
||||||
isSiteSNP = (validate.isSNP());
|
isSiteSNP = (validate.isSNP());
|
||||||
|
isSiteIndel = (validate.isIndel());
|
||||||
// doesn't matter if there's a mask here too -- this is what we want to validate
|
// doesn't matter if there's a mask here too -- this is what we want to validate
|
||||||
if ( validate.isFiltered() ) {
|
if ( validate.isFiltered() ) {
|
||||||
logger.warn("You are attempting to validate a filtered site. Why are you attempting to validate a filtered site? You should not be attempting to validate a filtered site.");
|
logger.warn("You are attempting to validate a filtered site. Why are you attempting to validate a filtered site? You should not be attempting to validate a filtered site.");
|
||||||
|
|
@ -504,6 +513,9 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (ignoreComplexEvents && !isSiteIndel && !isSiteSNP)
|
||||||
|
return;
|
||||||
|
|
||||||
if (!onlyOutputValidAmplicons || !sequenceInvalid) {
|
if (!onlyOutputValidAmplicons || !sequenceInvalid) {
|
||||||
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
|
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
|
||||||
if (sequenomOutput) {
|
if (sequenomOutput) {
|
||||||
|
|
@ -512,7 +524,7 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
|
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
|
||||||
}
|
}
|
||||||
else if (ilmnOutput) {
|
else if (ilmnOutput) {
|
||||||
String type = isSiteSNP?"SNP":"INDEL";
|
String type = isSiteSNP?"SNP":(isSiteIndel?"INDEL":"OTHER");
|
||||||
seqIdentity = seqIdentity.replace("*",""); // no * in ref allele
|
seqIdentity = seqIdentity.replace("*",""); // no * in ref allele
|
||||||
out.printf("%s,%s,%s,%s,%d,37,1000G,ExomePhase1,Forward,Plus,FALSE%n",probeName,type,seqIdentity,allelePos.getContig(),allelePos.getStart());
|
out.printf("%s,%s,%s,%s,%d,37,1000G,ExomePhase1,Forward,Plus,FALSE%n",probeName,type,seqIdentity,allelePos.getContig(),allelePos.getStart());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,11 @@ public class ActiveRegion implements HasGenomeLocation {
|
||||||
fullExtentReferenceLoc = extendedLoc;
|
fullExtentReferenceLoc = extendedLoc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "ActiveRegion " + activeRegionLoc.toString();
|
||||||
|
}
|
||||||
|
|
||||||
// add each read to the bin and extend the reference genome activeRegionLoc if needed
|
// add each read to the bin and extend the reference genome activeRegionLoc if needed
|
||||||
public void add( final GATKSAMRecord read ) {
|
public void add( final GATKSAMRecord read ) {
|
||||||
fullExtentReferenceLoc = fullExtentReferenceLoc.union( genomeLocParser.createGenomeLoc( read ) );
|
fullExtentReferenceLoc = fullExtentReferenceLoc.union( genomeLocParser.createGenomeLoc( read ) );
|
||||||
|
|
@ -78,4 +83,13 @@ public class ActiveRegion implements HasGenomeLocation {
|
||||||
public void clearReads() { reads.clear(); }
|
public void clearReads() { reads.clear(); }
|
||||||
public void remove( final GATKSAMRecord read ) { reads.remove( read ); }
|
public void remove( final GATKSAMRecord read ) { reads.remove( read ); }
|
||||||
public void removeAll( final ArrayList<GATKSAMRecord> readsToRemove ) { reads.removeAll( readsToRemove ); }
|
public void removeAll( final ArrayList<GATKSAMRecord> readsToRemove ) { reads.removeAll( readsToRemove ); }
|
||||||
|
|
||||||
|
public boolean equalExceptReads(final ActiveRegion other) {
|
||||||
|
if ( ! activeRegionLoc.equals(other.activeRegionLoc)) return false;
|
||||||
|
if ( isActive != other.isActive ) return false;
|
||||||
|
if ( genomeLocParser != other.genomeLocParser ) return false;
|
||||||
|
if ( extension != other.extension ) return false;
|
||||||
|
if ( ! extendedLoc.equals(other.extendedLoc) ) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,150 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.activeregion;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class holding information about per-base activity scores for the
|
||||||
|
* active region traversal
|
||||||
|
*
|
||||||
|
* @author Mark DePristo
|
||||||
|
* @since Date created
|
||||||
|
*/
|
||||||
|
public class ActivityProfile {
|
||||||
|
final GenomeLocParser parser;
|
||||||
|
final boolean presetRegions;
|
||||||
|
GenomeLoc regionStartLoc = null;
|
||||||
|
final List<Double> isActiveList;
|
||||||
|
|
||||||
|
private GenomeLoc lastLoc = null;
|
||||||
|
|
||||||
|
// todo -- add upfront the start and stop of the intervals
|
||||||
|
// todo -- check that no regions are unexpectedly missing
|
||||||
|
// todo -- add unit tests
|
||||||
|
// TODO -- own preset regions
|
||||||
|
public ActivityProfile(final GenomeLocParser parser, final boolean presetRegions) {
|
||||||
|
this(parser, presetRegions, new ArrayList<Double>(), null);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List<Double> isActiveList, final GenomeLoc regionStartLoc) {
|
||||||
|
this.parser = parser;
|
||||||
|
this.presetRegions = presetRegions;
|
||||||
|
this.isActiveList = isActiveList;
|
||||||
|
this.regionStartLoc = regionStartLoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void add(final GenomeLoc loc, final double score) {
|
||||||
|
if ( loc.size() != 1 )
|
||||||
|
throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" );
|
||||||
|
if ( lastLoc != null && loc.getStart() != lastLoc.getStop() + 1 )
|
||||||
|
throw new ReviewedStingException("Bad add call to ActivityProfile: lastLoc added " + lastLoc + " and next is " + loc);
|
||||||
|
isActiveList.add(score);
|
||||||
|
if( regionStartLoc == null ) {
|
||||||
|
regionStartLoc = loc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return isActiveList.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Band pass this ActivityProfile, producing a new profile that's band pass filtered
|
||||||
|
* @return a new ActivityProfile that's the band-pass filtered version of this profile
|
||||||
|
*/
|
||||||
|
public ActivityProfile bandPassFilter() {
|
||||||
|
final Double[] activeProbArray = isActiveList.toArray(new Double[isActiveList.size()]);
|
||||||
|
final Double[] filteredProbArray = new Double[activeProbArray.length];
|
||||||
|
final int FILTER_SIZE = ( presetRegions ? 0 : 50 ); // TODO: needs to be set-able by the walker author
|
||||||
|
for( int iii = 0; iii < activeProbArray.length; iii++ ) {
|
||||||
|
double maxVal = 0;
|
||||||
|
for( int jjj = Math.max(0, iii-FILTER_SIZE); jjj < Math.min(isActiveList.size(), iii+FILTER_SIZE+1); jjj++ ) {
|
||||||
|
if( activeProbArray[jjj] > maxVal ) { maxVal = activeProbArray[jjj]; }
|
||||||
|
}
|
||||||
|
filteredProbArray[iii] = maxVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new ActivityProfile(parser, presetRegions, Arrays.asList(filteredProbArray), regionStartLoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Partition this profile into active regions
|
||||||
|
* @param activeRegionExtension
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public List<ActiveRegion> createActiveRegions( final int activeRegionExtension ) {
|
||||||
|
final int MAX_ACTIVE_REGION = ( presetRegions ? 16001 : 425 ); // TODO: needs to be set-able by the walker author
|
||||||
|
final double ACTIVE_PROB_THRESHOLD = 0.2; // TODO: needs to be set-able by the walker author
|
||||||
|
|
||||||
|
if( isActiveList.size() == 0 ) {
|
||||||
|
// no elements in the active list, just return an empty one
|
||||||
|
return Collections.emptyList();
|
||||||
|
} else if( isActiveList.size() == 1 ) {
|
||||||
|
// there's a single element, it's either active or inactive
|
||||||
|
boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD;
|
||||||
|
final ActiveRegion region = createActiveRegion(isActive, 0, 0, activeRegionExtension );
|
||||||
|
return Collections.singletonList(region);
|
||||||
|
} else {
|
||||||
|
// there are 2+ elements, divide these up into regions
|
||||||
|
final ArrayList<ActiveRegion> returnList = new ArrayList<ActiveRegion>();
|
||||||
|
boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD;
|
||||||
|
int curStart = 0;
|
||||||
|
for(int iii = 1; iii < isActiveList.size(); iii++ ) {
|
||||||
|
final boolean thisStatus = isActiveList.get(iii) > ACTIVE_PROB_THRESHOLD;
|
||||||
|
if( isActive != thisStatus || (iii-curStart) > MAX_ACTIVE_REGION ) {
|
||||||
|
returnList.add( createActiveRegion(isActive, curStart, iii-1, activeRegionExtension) );
|
||||||
|
isActive = thisStatus;
|
||||||
|
curStart = iii;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( curStart != isActiveList.size()-1 ) {
|
||||||
|
returnList.add( createActiveRegion(isActive, curStart, isActiveList.size()-1, activeRegionExtension) );
|
||||||
|
}
|
||||||
|
return returnList;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper routine to create an active region based on our current start and end offsets
|
||||||
|
* @param isActive should the region be active?
|
||||||
|
* @param curStart offset (0-based) from the start of this region
|
||||||
|
* @param curEnd offset (0-based) from the start of this region
|
||||||
|
* @param activeRegionExtension
|
||||||
|
* @return a fully initialized ActiveRegion with the above properties
|
||||||
|
*/
|
||||||
|
private final ActiveRegion createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension) {
|
||||||
|
final GenomeLoc loc = parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getStart() + curStart, regionStartLoc.getStart() + curEnd);
|
||||||
|
return new ActiveRegion( loc, isActive, parser, activeRegionExtension );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.activeregionqc;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests CountReadsInActiveRegions
|
||||||
|
*/
|
||||||
|
public class CountReadsInActiveRegionsIntegrationTest extends WalkerTest {
|
||||||
|
@Test
|
||||||
|
public void basicTest() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T CountReadsInActiveRegions -R " + b37KGReference + " -I " + b37GoodNA12878BAM + " -L 20:10,000,000-10,200,000 -o %s",
|
||||||
|
1,
|
||||||
|
Arrays.asList("fcd581aa6befe85c7297509fa7b34edf"));
|
||||||
|
executeTest("CountReadsInActiveRegions:", spec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,149 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// our package
|
||||||
|
package org.broadinstitute.sting.utils.activeregion;
|
||||||
|
|
||||||
|
|
||||||
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.BeforeSuite;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.lang.reflect.Array;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
|
public class ActivityProfileUnitTest extends BaseTest {
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
private GenomeLoc startLoc;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public void init() throws FileNotFoundException {
|
||||||
|
// sequence
|
||||||
|
ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference));
|
||||||
|
genomeLocParser = new GenomeLocParser(seq);
|
||||||
|
startLoc = genomeLocParser.createGenomeLoc("chr1", 1, 1, 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Basic tests Provider
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class BasicActivityProfileTestProvider extends TestDataProvider {
|
||||||
|
List<Double> probs;
|
||||||
|
List<ActiveRegion> expectedRegions;
|
||||||
|
int extension = 0;
|
||||||
|
GenomeLoc regionStart = startLoc;
|
||||||
|
|
||||||
|
public BasicActivityProfileTestProvider(final List<Double> probs, final List<ActiveRegion> expectedRegions) {
|
||||||
|
super(BasicActivityProfileTestProvider.class);
|
||||||
|
this.probs = probs;
|
||||||
|
this.expectedRegions = expectedRegions;
|
||||||
|
setName(getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
public BasicActivityProfileTestProvider(final List<Double> probs, boolean startActive, int ... startsAndStops) {
|
||||||
|
super(BasicActivityProfileTestProvider.class);
|
||||||
|
this.probs = probs;
|
||||||
|
this.expectedRegions = toRegions(startActive, startsAndStops);
|
||||||
|
setName(getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getName() {
|
||||||
|
return String.format("probs=%s expectedRegions=%s", Utils.join(",", probs), Utils.join(",", expectedRegions));
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<ActiveRegion> toRegions(boolean isActive, int[] startsAndStops) {
|
||||||
|
List<ActiveRegion> l = new ArrayList<ActiveRegion>();
|
||||||
|
for ( int i = 0; i < startsAndStops.length - 1; i++) {
|
||||||
|
int start = regionStart.getStart() + startsAndStops[i];
|
||||||
|
int end = regionStart.getStart() + startsAndStops[i+1] - 1;
|
||||||
|
GenomeLoc activeLoc = genomeLocParser.createGenomeLoc(regionStart.getContig(), start, end);
|
||||||
|
ActiveRegion r = new ActiveRegion(activeLoc, isActive, genomeLocParser, extension);
|
||||||
|
l.add(r);
|
||||||
|
isActive = ! isActive;
|
||||||
|
}
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "BasicActivityProfileTestProvider")
|
||||||
|
public Object[][] makeQualIntervalTestProvider() {
|
||||||
|
new BasicActivityProfileTestProvider(Arrays.asList(1.0), true, 0, 1);
|
||||||
|
// TODO -- RYAN THESE ALL EXHIBIT AN OFF-BY-ONE ERROR. SORRY I HAVE TO GO BUT I CANNOT FIX NOW
|
||||||
|
//new BasicActivityProfileTestProvider(Arrays.asList(1.0, 0.0), true, 0, 1, 2);
|
||||||
|
//new BasicActivityProfileTestProvider(Arrays.asList(0.0, 1.0), false, 0, 1, 2);
|
||||||
|
//new BasicActivityProfileTestProvider(Arrays.asList(1.0, 0.0, 1.0), true, 0, 1, 2, 3);
|
||||||
|
new BasicActivityProfileTestProvider(Arrays.asList(1.0, 1.0, 1.0), true, 0, 3);
|
||||||
|
|
||||||
|
return BasicActivityProfileTestProvider.getTests(BasicActivityProfileTestProvider.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "BasicActivityProfileTestProvider")
|
||||||
|
public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) {
|
||||||
|
ActivityProfile profile = new ActivityProfile(genomeLocParser, false);
|
||||||
|
|
||||||
|
Assert.assertEquals(profile.parser, genomeLocParser);
|
||||||
|
|
||||||
|
for ( int i = 0; i < cfg.probs.size(); i++ ) {
|
||||||
|
double p = cfg.probs.get(i);
|
||||||
|
GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i);
|
||||||
|
profile.add(loc, p);
|
||||||
|
}
|
||||||
|
Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() ));
|
||||||
|
|
||||||
|
Assert.assertEquals(profile.size(), cfg.probs.size());
|
||||||
|
Assert.assertEquals(profile.isActiveList, cfg.probs);
|
||||||
|
|
||||||
|
assertRegionsAreEqual(profile.createActiveRegions(0), cfg.expectedRegions);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertRegionsAreEqual(List<ActiveRegion> actual, List<ActiveRegion> expected) {
|
||||||
|
Assert.assertEquals(actual.size(), expected.size());
|
||||||
|
for ( int i = 0; i < actual.size(); i++ ) {
|
||||||
|
Assert.assertTrue(actual.get(i).equalExceptReads(expected.get(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// todo -- test extensions
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue