Haven't heard any complaints, so I'm deleting the original implementation of TraverseByLociByReference. All TbyLbyR's will now go through the new sharding code.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@637 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bae4256574
commit
a5154d99a3
|
|
@ -245,17 +245,8 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", Analysis_Name, INPUT_FILES));
|
Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", Analysis_Name, INPUT_FILES));
|
||||||
|
|
||||||
if ( WALK_ALL_LOCI ) {
|
if ( WALK_ALL_LOCI ) {
|
||||||
// TODO: Temporary debugging code. Activate the new debugging code only when the MicroScheduler
|
microScheduler = MicroScheduler.create( walker, INPUT_FILES, REF_FILE_ARG, rods, numThreads );
|
||||||
// is not filtered.
|
this.engine = microScheduler.getTraversalEngine();
|
||||||
if( !DISABLE_THREADING ) {
|
|
||||||
logger.warn("Preliminary threading support ENABLED");
|
|
||||||
microScheduler = MicroScheduler.create( walker, INPUT_FILES, REF_FILE_ARG, rods, numThreads );
|
|
||||||
this.engine = microScheduler.getTraversalEngine();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
logger.warn("Preliminary threading support DISABLED");
|
|
||||||
this.engine = new TraverseByLociByReference(INPUT_FILES, REF_FILE_ARG, rods);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
this.engine = new TraverseByLoci(INPUT_FILES, REF_FILE_ARG, rods);
|
this.engine = new TraverseByLoci(INPUT_FILES, REF_FILE_ARG, rods);
|
||||||
|
|
|
||||||
|
|
@ -1,122 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.traversals;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByHanger;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.io.File;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
|
||||||
import edu.mit.broad.picard.filter.FilteringIterator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A simple, short-term solution to iterating over all reference positions over a series of
|
|
||||||
* genomic locations. Simply overloads the superclass traverse function to go over the entire
|
|
||||||
* interval's reference positions.
|
|
||||||
*/
|
|
||||||
public class TraverseByLociByReference extends TraverseByLoci {
|
|
||||||
|
|
||||||
public TraverseByLociByReference(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
|
||||||
super(reads, ref, rods);
|
|
||||||
}
|
|
||||||
|
|
||||||
public <M,T> T traverse(Walker<M,T> walker, ArrayList<GenomeLoc> locations) {
|
|
||||||
//if ( locations.isEmpty() )
|
|
||||||
// Utils.scareUser("Requested all locations be processed without providing locations to be processed!");
|
|
||||||
|
|
||||||
return super.traverse(walker, locations);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected <M, T> T carryWalkerOverInterval( LocusWalker<M, T> walker,
|
|
||||||
Iterator<SAMRecord> readIter,
|
|
||||||
T sum,
|
|
||||||
GenomeLoc interval ) {
|
|
||||||
logger.debug(String.format("TraverseByLociByReference.carryWalkerOverInterval Genomic interval is %s", interval));
|
|
||||||
|
|
||||||
boolean done = false;
|
|
||||||
|
|
||||||
List<SAMRecord> NO_READS = new ArrayList<SAMRecord>();
|
|
||||||
List<Integer> NO_OFFSETS = new ArrayList<Integer>();
|
|
||||||
|
|
||||||
FilteringIterator filterIter = new FilteringIterator(readIter, new locusStreamFilterFunc());
|
|
||||||
LocusIterator locusIter = new LocusIteratorByHanger(filterIter); // prepare the iterator by loci from reads
|
|
||||||
|
|
||||||
ReferenceIterator refSite = null;
|
|
||||||
LocusContext locusFromReads = null;
|
|
||||||
|
|
||||||
if( interval != null ) {
|
|
||||||
refSite = refIter.seekForward(interval); // jump to the first reference site
|
|
||||||
locusFromReads = advanceReadsToLoc(locusIter, interval); // load up the next locus by reads
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
refSite = refIter.next();
|
|
||||||
locusFromReads = locusIter.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
// We keep processing while the next reference location is within the interval
|
|
||||||
while ( (interval == null || interval.containsP(refSite.getLocation())) && ! done ) {
|
|
||||||
logger.debug(String.format(" LocusFromReads is %s", locusFromReads == null ? null : locusFromReads.getLocation()));
|
|
||||||
|
|
||||||
TraversalStatistics.nRecords++;
|
|
||||||
GenomeLoc current = refSite.getLocation();
|
|
||||||
|
|
||||||
// Iterate forward to get all reference ordered data covering this locus
|
|
||||||
final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus(current);
|
|
||||||
|
|
||||||
LocusContext locus = null;
|
|
||||||
|
|
||||||
if ( locusFromReads != null && locusFromReads.getLocation().compareTo(current) == 0 ) { // we are at the same site
|
|
||||||
locus = locusFromReads;
|
|
||||||
if ( locusIter.hasNext() )
|
|
||||||
locusFromReads = locusIter.next(); // advance the iterator
|
|
||||||
}
|
|
||||||
else
|
|
||||||
locus = new LocusContext(current, NO_READS, NO_OFFSETS); // make the empty locus that has no reads
|
|
||||||
|
|
||||||
locus.setReferenceContig(refSite.getCurrentContig());
|
|
||||||
if ( DOWNSAMPLE_BY_COVERAGE )
|
|
||||||
locus.downsampleToCoverage(downsamplingCoverage);
|
|
||||||
sum = walkAtLocus( walker, sum, locus, refSite, tracker );
|
|
||||||
|
|
||||||
if (this.maxReads > 0 && TraversalStatistics.nRecords > this.maxReads) {
|
|
||||||
logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
|
|
||||||
done = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
printProgress("loci", locus.getLocation());
|
|
||||||
refSite = refIter.next(); // update our location
|
|
||||||
}
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private LocusContext advanceReadsToLoc(LocusIterator locusIter, GenomeLoc target) {
|
|
||||||
if ( ! locusIter.hasNext() )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
LocusContext locus = locusIter.next();
|
|
||||||
|
|
||||||
while ( ! target.containsP(locus.getLocation()) && locusIter.hasNext() ) {
|
|
||||||
logger.debug(String.format(" current locus is %s vs %s => %d", locus.getLocation(), target, locus.getLocation().compareTo(target)));
|
|
||||||
locus = locusIter.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.debug(String.format(" returning %s", locus.getLocation()));
|
|
||||||
return locus;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue