Cleanup in datasources.providers namespace. Make it easier for others writing traversal engines to use.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@803 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
02fc4f145f
commit
2a5be1debe
|
|
@ -0,0 +1,87 @@
|
||||||
|
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
|
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
/**
|
||||||
|
* User: hanna
|
||||||
|
* Date: May 13, 2009
|
||||||
|
* Time: 3:32:30 PM
|
||||||
|
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||||
|
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||||
|
* All rights are reserved.
|
||||||
|
*
|
||||||
|
* Users acknowledge that this software is supplied without any warranty or support.
|
||||||
|
* The Broad Institute is not responsible for its use, misuse, or
|
||||||
|
* functionality.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A LocusView over which the user can iterate.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class AllLocusView extends LocusView {
|
||||||
|
private GenomeLocusIterator locusIterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the current position in the view.
|
||||||
|
*/
|
||||||
|
private GenomeLoc nextPosition = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* What's the context for the last locus accessed?
|
||||||
|
* @param provider
|
||||||
|
*/
|
||||||
|
private LocusContext nextLocusContext = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new queue of locus contexts.
|
||||||
|
* @param provider
|
||||||
|
*/
|
||||||
|
public AllLocusView(ShardDataProvider provider) {
|
||||||
|
super( provider );
|
||||||
|
// Seed the state tracking members with the first possible seek position and the first possible locus context.
|
||||||
|
locusIterator = new GenomeLocusIterator( provider.getShard().getGenomeLoc() );
|
||||||
|
if( locusIterator.hasNext() ) {
|
||||||
|
nextPosition = locusIterator.next();
|
||||||
|
nextLocusContext = hasNextLocusContext() ? nextLocusContext() : createEmptyLocusContext(nextPosition);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
return nextPosition != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public LocusContext next() {
|
||||||
|
GenomeLoc currentPosition = nextPosition;
|
||||||
|
if( currentPosition == null )
|
||||||
|
throw new NoSuchElementException("No next is available in the all locus view");
|
||||||
|
|
||||||
|
// Determine the next locus.
|
||||||
|
nextPosition = locusIterator.hasNext() ? locusIterator.next() : null;
|
||||||
|
|
||||||
|
// Crank the iterator to (if possible) or past the next context.
|
||||||
|
while( nextLocusContext != null && nextLocusContext.getLocation().isBefore(currentPosition) && hasNextLocusContext() )
|
||||||
|
nextLocusContext = nextLocusContext();
|
||||||
|
|
||||||
|
// If actual data is present, return it. Otherwise, return empty data.
|
||||||
|
if( nextLocusContext != null && nextLocusContext.getLocation().equals(currentPosition) )
|
||||||
|
return nextLocusContext;
|
||||||
|
else
|
||||||
|
return createEmptyLocusContext( currentPosition );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a blank locus context at the specified location.
|
||||||
|
* @param site Site at which to create the blank locus context.
|
||||||
|
* @return empty context.
|
||||||
|
*/
|
||||||
|
private LocusContext createEmptyLocusContext( GenomeLoc site ) {
|
||||||
|
return new LocusContext(site, new ArrayList<SAMRecord>(), new ArrayList<Integer>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,64 @@
|
||||||
|
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.LocusContextIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger;
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import edu.mit.broad.picard.filter.FilteringIterator;
|
||||||
|
/**
|
||||||
|
* User: hanna
|
||||||
|
* Date: May 12, 2009
|
||||||
|
* Time: 11:24:42 AM
|
||||||
|
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||||
|
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||||
|
* All rights are reserved.
|
||||||
|
*
|
||||||
|
* Users acknowledge that this software is supplied without any warranty or support.
|
||||||
|
* The Broad Institute is not responsible for its use, misuse, or
|
||||||
|
* functionality.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A queue of locus contexts. Provides unidirectional seek. Stripped down
|
||||||
|
* implementation of java.util.Queue interface.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class CoveredLocusView extends LocusView {
|
||||||
|
/**
|
||||||
|
* Gets the position to which the last seek was requested.
|
||||||
|
*/
|
||||||
|
private GenomeLoc seekPoint;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* What's the context for the last locus accessed?
|
||||||
|
* @param provider
|
||||||
|
*/
|
||||||
|
private LocusContext nextLocusContext = null;
|
||||||
|
|
||||||
|
private static Logger logger = Logger.getLogger(CoveredLocusView.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new queue of locus contexts.
|
||||||
|
* @param provider
|
||||||
|
*/
|
||||||
|
public CoveredLocusView(ShardDataProvider provider) {
|
||||||
|
super(provider);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
return hasNextLocusContext();
|
||||||
|
}
|
||||||
|
|
||||||
|
public LocusContext next() {
|
||||||
|
return nextLocusContext();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,119 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
|
||||||
|
|
||||||
import java.util.NoSuchElementException;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
/**
|
|
||||||
* User: hanna
|
|
||||||
* Date: May 13, 2009
|
|
||||||
* Time: 3:32:30 PM
|
|
||||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
|
||||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
|
||||||
* All rights are reserved.
|
|
||||||
*
|
|
||||||
* Users acknowledge that this software is supplied without any warranty or support.
|
|
||||||
* The Broad Institute is not responsible for its use, misuse, or
|
|
||||||
* functionality.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A LocusContextQueue over which the user can iterate.
|
|
||||||
*/
|
|
||||||
|
|
||||||
public class IterableLocusContextQueue extends LocusContextQueue implements LocusIterator {
|
|
||||||
/**
|
|
||||||
* What's the context for the last locus accessed?
|
|
||||||
* @param provider
|
|
||||||
*/
|
|
||||||
private LocusContext prefetched = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Has this prefetch been consumed? If this flag is set,
|
|
||||||
* the prefetch will skip to the next argument in the system.
|
|
||||||
*/
|
|
||||||
private boolean prefetchConsumed = true;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new queue of locus contexts.
|
|
||||||
* @param provider
|
|
||||||
*/
|
|
||||||
public IterableLocusContextQueue(ShardDataProvider provider) {
|
|
||||||
super( provider );
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Is there another locus present in this iterator.
|
|
||||||
* @return True if another locus present in this iterator. Otherwise, false.
|
|
||||||
*/
|
|
||||||
public boolean hasNext() {
|
|
||||||
prefetchLocusContext();
|
|
||||||
return prefetched != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves the next element in the queue.
|
|
||||||
* @return Next element in the queue.
|
|
||||||
*/
|
|
||||||
public GenomeLoc next() {
|
|
||||||
prefetchLocusContext();
|
|
||||||
prefetchConsumed = true;
|
|
||||||
// Signal that the prefetcher needs to grab another entry off the queue.
|
|
||||||
return prefetched.getLocation();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the next locus context within the bounds of a member variable and store
|
|
||||||
* it in the prefetched member variable. When the prefetch is consumed, the 'consumer'
|
|
||||||
* should signal it as such by marking prefetchConsumed = true.
|
|
||||||
*/
|
|
||||||
private void prefetchLocusContext() {
|
|
||||||
if( !prefetchConsumed )
|
|
||||||
return;
|
|
||||||
|
|
||||||
prefetched = null;
|
|
||||||
prefetchConsumed = false;
|
|
||||||
|
|
||||||
// If another locus context bounded by this shard exists, find it.
|
|
||||||
boolean prefetchOutOfBounds = true;
|
|
||||||
while( hasNextLocusContext() && prefetchOutOfBounds ) {
|
|
||||||
prefetched = getNextLocusContext();
|
|
||||||
prefetchOutOfBounds = (prefetched.getLocation().isBefore(shard.getGenomeLoc()) ||
|
|
||||||
prefetched.getLocation().isPast(shard.getGenomeLoc()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Can't find a valid prefetch? Set prefetch to null. If prefetched == null and
|
|
||||||
// prefetchConsumed == false, the queue is out of entries.
|
|
||||||
if( prefetchOutOfBounds )
|
|
||||||
prefetched = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Unsupported.
|
|
||||||
*/
|
|
||||||
public void remove() {
|
|
||||||
throw new UnsupportedOperationException("Unable to remove elements from this queue.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Peek at the next locus context in the chain.
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public LocusContext peek() {
|
|
||||||
if( prefetched == null )
|
|
||||||
throw new NoSuchElementException("No more elements remaining in queue");
|
|
||||||
return prefetched;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Seek to the specified position in the contig.
|
|
||||||
* @param seekPoint
|
|
||||||
*/
|
|
||||||
public LocusContextQueue seek( GenomeLoc seekPoint ) {
|
|
||||||
if( prefetched == null || !seekPoint.equals(prefetched.getLocation()) )
|
|
||||||
throw new IllegalArgumentException("IterableLocusContextQueue doesn't support seeking and iterator is in the wrong position.");
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -1,118 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
import edu.mit.broad.picard.filter.FilteringIterator;
|
|
||||||
import edu.mit.broad.picard.filter.SamRecordFilter;
|
|
||||||
/**
|
|
||||||
* User: hanna
|
|
||||||
* Date: May 13, 2009
|
|
||||||
* Time: 3:30:16 PM
|
|
||||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
|
||||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
|
||||||
* All rights are reserved.
|
|
||||||
*
|
|
||||||
* Users acknowledge that this software is supplied without any warranty or support.
|
|
||||||
* The Broad Institute is not responsible for its use, misuse, or
|
|
||||||
* functionality.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A queue of locus context entries.
|
|
||||||
*/
|
|
||||||
|
|
||||||
public abstract class LocusContextQueue {
|
|
||||||
protected Shard shard;
|
|
||||||
|
|
||||||
private Reads sourceInfo;
|
|
||||||
private LocusContextIterator loci;
|
|
||||||
|
|
||||||
public LocusContextQueue(ShardDataProvider provider) {
|
|
||||||
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc());
|
|
||||||
this.loci = new LocusContextIteratorByHanger(reads);
|
|
||||||
this.sourceInfo = provider.getReadIterator().getSourceInfo();
|
|
||||||
this.shard = provider.getShard();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the locus context at the given position.
|
|
||||||
* @return Locus context, or null if no locus context exists at this position.
|
|
||||||
*/
|
|
||||||
public abstract LocusContext peek();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Seek to the given point the queue of locus contexts.
|
|
||||||
* @param target Target base pair to which to seek. Must be a single base pair.
|
|
||||||
* @return an instance of itself for parameter chaining.
|
|
||||||
*/
|
|
||||||
public abstract LocusContextQueue seek(GenomeLoc target);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the next locus context, applying filtering as necessary.
|
|
||||||
* @return Locus context to work with.
|
|
||||||
*/
|
|
||||||
protected LocusContext getNextLocusContext() {
|
|
||||||
LocusContext next = loci.next();
|
|
||||||
if( sourceInfo.getDownsampleToCoverage() != null )
|
|
||||||
next.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() );
|
|
||||||
return next;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* hasNext()-style iterator for base iterator.
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
protected boolean hasNextLocusContext() {
|
|
||||||
return loci.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Class to filter out un-handle-able reads from the stream. We currently are skipping
|
|
||||||
* unmapped reads, non-primary reads, unaligned reads, and duplicate reads.
|
|
||||||
*/
|
|
||||||
private static class LocusStreamFilterFunc implements SamRecordFilter {
|
|
||||||
SAMRecord lastRead = null;
|
|
||||||
public boolean filterOut(SAMRecord rec) {
|
|
||||||
boolean result = false;
|
|
||||||
String why = "";
|
|
||||||
if (rec.getReadUnmappedFlag()) {
|
|
||||||
TraversalStatistics.nUnmappedReads++;
|
|
||||||
result = true;
|
|
||||||
why = "Unmapped";
|
|
||||||
} else if (rec.getNotPrimaryAlignmentFlag()) {
|
|
||||||
TraversalStatistics.nNotPrimary++;
|
|
||||||
result = true;
|
|
||||||
why = "Not Primary";
|
|
||||||
} else if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) {
|
|
||||||
TraversalStatistics.nBadAlignments++;
|
|
||||||
result = true;
|
|
||||||
why = "No alignment start";
|
|
||||||
} else if (rec.getDuplicateReadFlag()) {
|
|
||||||
TraversalStatistics.nDuplicates++;
|
|
||||||
result = true;
|
|
||||||
why = "Duplicate reads";
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
result = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result) {
|
|
||||||
TraversalStatistics.nSkippedReads++;
|
|
||||||
//System.out.printf(" [filter] %s => %b %s", rec.getReadName(), result, why);
|
|
||||||
} else {
|
|
||||||
TraversalStatistics.nReads++;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,70 @@
|
||||||
|
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
/**
|
||||||
|
* User: hanna
|
||||||
|
* Date: May 22, 2009
|
||||||
|
* Time: 12:24:23 PM
|
||||||
|
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||||
|
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||||
|
* All rights are reserved.
|
||||||
|
*
|
||||||
|
* Users acknowledge that this software is supplied without any warranty or support.
|
||||||
|
* The Broad Institute is not responsible for its use, misuse, or
|
||||||
|
* functionality.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides access to the portion of the reference covering a single locus.
|
||||||
|
*/
|
||||||
|
public class LocusReferenceView extends ReferenceView {
|
||||||
|
/**
|
||||||
|
* Bound the reference view to make sure all accesses are within the shard.
|
||||||
|
*/
|
||||||
|
private final GenomeLoc bounds;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Track the reference sequence and the last point accessed. Used to
|
||||||
|
* track state when traversing over the reference.
|
||||||
|
*/
|
||||||
|
private ReferenceSequence referenceSequence;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new locus reference view.
|
||||||
|
* @param provider source for locus data.
|
||||||
|
*/
|
||||||
|
public LocusReferenceView( ShardDataProvider provider ) {
|
||||||
|
super( provider );
|
||||||
|
bounds = provider.getShard().getGenomeLoc();
|
||||||
|
this.referenceSequence = reference.getSubsequenceAt( bounds.getContig(),
|
||||||
|
bounds.getStart(),
|
||||||
|
bounds.getStop() );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the reference base associated with this particular point on the genome.
|
||||||
|
* @param genomeLoc Region for which to retrieve the base. GenomeLoc must represent a 1-base region.
|
||||||
|
* @return The base at the position represented by this genomeLoc.
|
||||||
|
*/
|
||||||
|
public char getReferenceBase( GenomeLoc genomeLoc ) {
|
||||||
|
validateLocation( genomeLoc );
|
||||||
|
int offset = (int)(genomeLoc.getStart() - bounds.getStart());
|
||||||
|
return StringUtil.bytesToString( referenceSequence.getBases(), offset, 1 ).charAt(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates that the genomeLoc is one base wide and is in the reference sequence.
|
||||||
|
* @param genomeLoc location to verify.
|
||||||
|
*/
|
||||||
|
private void validateLocation( GenomeLoc genomeLoc ) throws InvalidPositionException {
|
||||||
|
//
|
||||||
|
if( !genomeLoc.isSingleBP() )
|
||||||
|
throw new InvalidPositionException(
|
||||||
|
String.format("Requested position larger than one base; start = %d, stop = %d", genomeLoc.getStart(), genomeLoc.getStop()));
|
||||||
|
if( !bounds.containsP(genomeLoc) )
|
||||||
|
throw new InvalidPositionException(
|
||||||
|
String.format("Requested position %s not within interval %s", genomeLoc, bounds));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,201 @@
|
||||||
|
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.LocusContextIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import edu.mit.broad.picard.filter.FilteringIterator;
|
||||||
|
import edu.mit.broad.picard.filter.SamRecordFilter;
|
||||||
|
/**
|
||||||
|
* User: hanna
|
||||||
|
* Date: May 13, 2009
|
||||||
|
* Time: 3:30:16 PM
|
||||||
|
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||||
|
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||||
|
* All rights are reserved.
|
||||||
|
*
|
||||||
|
* Users acknowledge that this software is supplied without any warranty or support.
|
||||||
|
* The Broad Institute is not responsible for its use, misuse, or
|
||||||
|
* functionality.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A queue of locus context entries.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public abstract class LocusView extends LocusContextIterator implements View {
|
||||||
|
/**
|
||||||
|
* The shard bounding this view.
|
||||||
|
*/
|
||||||
|
protected Shard shard;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Source info for this view. Informs the class about downsampling requirements.
|
||||||
|
*/
|
||||||
|
private Reads sourceInfo;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The actual locus context iterator.
|
||||||
|
*/
|
||||||
|
private LocusContextIterator loci;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The next locus context from the iterator. This value must always be within
|
||||||
|
* the shard; if its null, there's nothing for the consumer to look at.
|
||||||
|
*/
|
||||||
|
private LocusContext nextLocusContext = null;
|
||||||
|
|
||||||
|
public LocusView(ShardDataProvider provider) {
|
||||||
|
this.shard = provider.getShard();
|
||||||
|
|
||||||
|
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc());
|
||||||
|
this.sourceInfo = provider.getReadIterator().getSourceInfo();
|
||||||
|
|
||||||
|
this.loci = new LocusContextIteratorByHanger(reads);
|
||||||
|
seedNextLocusContext();
|
||||||
|
|
||||||
|
provider.register(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Only one view of the locus is supported at any given time.
|
||||||
|
* @return A list consisting of all other locus views.
|
||||||
|
*/
|
||||||
|
public Collection<Class<? extends View>> getConflictingViews() {
|
||||||
|
return Arrays.<Class<? extends View>>asList(LocusView.class,ReadView.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close this view.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
// Set everything to null with the hope of failing fast.
|
||||||
|
shard = null;
|
||||||
|
sourceInfo = null;
|
||||||
|
loci = null;
|
||||||
|
|
||||||
|
super.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is there another covered locus context bounded by this view.
|
||||||
|
* @return True if another covered locus context exists. False otherwise.
|
||||||
|
*/
|
||||||
|
public abstract boolean hasNext();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next covered locus context in the shard.
|
||||||
|
* @return Next covered locus context in the shard.
|
||||||
|
* @throw NoSuchElementException if no such element exists.
|
||||||
|
*/
|
||||||
|
public abstract LocusContext next();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unsupported.
|
||||||
|
* @throw UnsupportedOperationException always.
|
||||||
|
*/
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException("Unable to remove elements from this queue.");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is there another locus context bounded by this shard.
|
||||||
|
* @return True if another locus context is bounded by this shard.
|
||||||
|
*/
|
||||||
|
protected boolean hasNextLocusContext() {
|
||||||
|
return nextLocusContext != null && !nextLocusContext.getLocation().isPast(shard.getGenomeLoc());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the next locus context bounded by this shard.
|
||||||
|
* @return Next locus context bounded by this shard.
|
||||||
|
* @throw NoSuchElementException if the next element is missing.
|
||||||
|
*/
|
||||||
|
protected LocusContext nextLocusContext() {
|
||||||
|
if( nextLocusContext == null || nextLocusContext.getLocation().isPast(shard.getGenomeLoc()) )
|
||||||
|
throw new NoSuchElementException("No more elements remain in locus context queue.");
|
||||||
|
|
||||||
|
// Cache the current and apply filtering.
|
||||||
|
LocusContext current = nextLocusContext;
|
||||||
|
|
||||||
|
// Find the next.
|
||||||
|
if( loci.hasNext() ) {
|
||||||
|
nextLocusContext = loci.next();
|
||||||
|
if( sourceInfo.getDownsampleToCoverage() != null )
|
||||||
|
current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() );
|
||||||
|
if( nextLocusContext.getLocation().isPast(shard.getGenomeLoc()) )
|
||||||
|
nextLocusContext = null;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
nextLocusContext = null;
|
||||||
|
|
||||||
|
return current;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Seed the nextLocusContext variable with the contents of the next locus context (if one exists).
|
||||||
|
*/
|
||||||
|
private void seedNextLocusContext() {
|
||||||
|
if( loci.hasNext() )
|
||||||
|
nextLocusContext = loci.next();
|
||||||
|
|
||||||
|
// Iterate past cruft at the beginning to the first locus in the shard.
|
||||||
|
while( nextLocusContext != null && nextLocusContext.getLocation().isBefore(shard.getGenomeLoc()) && loci.hasNext() )
|
||||||
|
nextLocusContext = loci.next();
|
||||||
|
|
||||||
|
// If nothing in the shard was found, indicate that by setting nextLocusContext to null.
|
||||||
|
if( nextLocusContext != null && nextLocusContext.getLocation().isBefore(shard.getGenomeLoc()) )
|
||||||
|
nextLocusContext = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class to filter out un-handle-able reads from the stream. We currently are skipping
|
||||||
|
* unmapped reads, non-primary reads, unaligned reads, and duplicate reads.
|
||||||
|
*/
|
||||||
|
private static class LocusStreamFilterFunc implements SamRecordFilter {
|
||||||
|
SAMRecord lastRead = null;
|
||||||
|
public boolean filterOut(SAMRecord rec) {
|
||||||
|
boolean result = false;
|
||||||
|
String why = "";
|
||||||
|
if (rec.getReadUnmappedFlag()) {
|
||||||
|
TraversalStatistics.nUnmappedReads++;
|
||||||
|
result = true;
|
||||||
|
why = "Unmapped";
|
||||||
|
} else if (rec.getNotPrimaryAlignmentFlag()) {
|
||||||
|
TraversalStatistics.nNotPrimary++;
|
||||||
|
result = true;
|
||||||
|
why = "Not Primary";
|
||||||
|
} else if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) {
|
||||||
|
TraversalStatistics.nBadAlignments++;
|
||||||
|
result = true;
|
||||||
|
why = "No alignment start";
|
||||||
|
} else if (rec.getDuplicateReadFlag()) {
|
||||||
|
TraversalStatistics.nDuplicates++;
|
||||||
|
result = true;
|
||||||
|
why = "Duplicate reads";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result) {
|
||||||
|
TraversalStatistics.nSkippedReads++;
|
||||||
|
//System.out.printf(" [filter] %s => %b %s", rec.getReadName(), result, why);
|
||||||
|
} else {
|
||||||
|
TraversalStatistics.nReads++;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||||
|
/**
|
||||||
|
* User: hanna
|
||||||
|
* Date: May 22, 2009
|
||||||
|
* Time: 12:36:14 PM
|
||||||
|
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||||
|
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||||
|
* All rights are reserved.
|
||||||
|
*
|
||||||
|
* Users acknowledge that this software is supplied without any warranty or support.
|
||||||
|
* The Broad Institute is not responsible for its use, misuse, or
|
||||||
|
* functionality.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides access to the reference over a single read.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class ReadReferenceView extends ReferenceView {
|
||||||
|
/**
|
||||||
|
* Create a view of the reference with respect to a single read.
|
||||||
|
* @param provider
|
||||||
|
*/
|
||||||
|
public ReadReferenceView( ShardDataProvider provider ) {
|
||||||
|
super( provider );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the bases of the reference that are aligned to the given read.
|
||||||
|
* @param read the read for which to extract reference information.
|
||||||
|
* @return The bases corresponding to this read, or null if the read is unmapped.
|
||||||
|
* If the alignment goes off the end of the contig, return just the portion
|
||||||
|
* mapped to the reference.
|
||||||
|
*/
|
||||||
|
public char[] getReferenceBases( SAMRecord read ) {
|
||||||
|
if( read.getReadUnmappedFlag() )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
String contig = read.getReferenceName();
|
||||||
|
int start = read.getAlignmentStart();
|
||||||
|
int stop = read.getAlignmentEnd();
|
||||||
|
|
||||||
|
SAMSequenceRecord sequenceRecord = reference.getSequenceDictionary().getSequence(contig);
|
||||||
|
if( stop > sequenceRecord.getSequenceLength() )
|
||||||
|
stop = sequenceRecord.getSequenceLength();
|
||||||
|
|
||||||
|
ReferenceSequence alignmentToReference = reference.getSubsequenceAt( contig, start, stop );
|
||||||
|
return StringUtil.bytesToString(alignmentToReference.getBases()).toCharArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,64 @@
|
||||||
|
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
/**
|
||||||
|
* User: hanna
|
||||||
|
* Date: May 22, 2009
|
||||||
|
* Time: 12:06:54 PM
|
||||||
|
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||||
|
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||||
|
* All rights are reserved.
|
||||||
|
*
|
||||||
|
* Users acknowledge that this software is supplied without any warranty or support.
|
||||||
|
* The Broad Institute is not responsible for its use, misuse, or
|
||||||
|
* functionality.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A view into the reads that a provider can provide.
|
||||||
|
*/
|
||||||
|
public class ReadView implements View, Iterable<SAMRecord> {
|
||||||
|
/**
|
||||||
|
* The iterator into the reads supplied by this provider.
|
||||||
|
*/
|
||||||
|
private StingSAMIterator reads;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new view of the reads given the current data set.
|
||||||
|
* @param provider Source for the data.
|
||||||
|
*/
|
||||||
|
public ReadView( ShardDataProvider provider ) {
|
||||||
|
reads = provider.getReadIterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Other reads and loci conflict with this view.
|
||||||
|
* @return Array of reads and loci.
|
||||||
|
*/
|
||||||
|
public Collection<Class<? extends View>> getConflictingViews() {
|
||||||
|
return Arrays.<Class<? extends View>>asList(ReadView.class, LocusView.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the view over these reads. Note that this method closes just
|
||||||
|
* the view into the reads, not the reads themselves.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
// Don't close the reads. The provider is responsible for this.
|
||||||
|
// Just dispose of the pointer.
|
||||||
|
reads = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets an iterator into the reads supplied by this provider.
|
||||||
|
* @return Iterator into the reads that this provider covers.
|
||||||
|
*/
|
||||||
|
public StingSAMIterator iterator() {
|
||||||
|
return reads;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -7,6 +7,8 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 21, 2009
|
* Date: May 21, 2009
|
||||||
|
|
@ -43,9 +45,11 @@ public class ReferenceOrderedView implements View {
|
||||||
for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() )
|
for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() )
|
||||||
states.add( new ReferenceOrderedDataState( dataSource, (ReferenceOrderedData.RODIterator)dataSource.seek(provider.getShard()) ) );
|
states.add( new ReferenceOrderedDataState( dataSource, (ReferenceOrderedData.RODIterator)dataSource.seek(provider.getShard()) ) );
|
||||||
|
|
||||||
provider.register(this);
|
provider.register(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Collection<Class<? extends View>> getConflictingViews() { return Collections.emptyList(); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets an object which can track the reference-ordered data at every locus.
|
* Gets an object which can track the reference-ordered data at every locus.
|
||||||
* @param loc Locus at which to track.
|
* @param loc Locus at which to track.
|
||||||
|
|
|
||||||
|
|
@ -1,102 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
|
||||||
import net.sf.samtools.util.StringUtil;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: hanna
|
|
||||||
* Date: Apr 8, 2009
|
|
||||||
* Time: 5:01:37 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class ReferenceProvider {
|
|
||||||
private IndexedFastaSequenceFile sequenceFile;
|
|
||||||
private Shard shard;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Track the reference sequence and the last point accessed. Used to
|
|
||||||
* track state when traversing over the reference.
|
|
||||||
*/
|
|
||||||
private ReferenceSequence referenceSequence;
|
|
||||||
private GenomeLoc referenceInterval;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new reference provider supplying data from the given reference.
|
|
||||||
* @param sequenceFile Reference file to use.
|
|
||||||
* @param shard Shard over which to retrieve data.
|
|
||||||
*/
|
|
||||||
public ReferenceProvider( IndexedFastaSequenceFile sequenceFile, Shard shard ) {
|
|
||||||
this.sequenceFile = sequenceFile;
|
|
||||||
this.shard = shard;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the reference base at a single point.
|
|
||||||
* @param genomeLoc The location at which to fetch the reference base.
|
|
||||||
* @return The character representing the reference base.
|
|
||||||
* @throws InvalidPositionException in case the position is invalid.
|
|
||||||
*/
|
|
||||||
public char getReferenceBase( GenomeLoc genomeLoc ) throws InvalidPositionException {
|
|
||||||
if( referenceSequence == null )
|
|
||||||
lazyInitializeLocusAccess();
|
|
||||||
|
|
||||||
validateLocation( genomeLoc );
|
|
||||||
int offset = (int)(genomeLoc.getStart() - referenceInterval.getStart());
|
|
||||||
return StringUtil.bytesToString( referenceSequence.getBases(), offset, 1 ).charAt(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the bases of the reference that are aligned to the given read.
|
|
||||||
* @param read the read for which to extract reference information.
|
|
||||||
* @return The bases corresponding to this read, or null if the read is unmapped.
|
|
||||||
* If the alignment goes off the end of the contig, return just the portion
|
|
||||||
* mapped to the reference.
|
|
||||||
*/
|
|
||||||
public char[] getReferenceBases( SAMRecord read ) {
|
|
||||||
if( read.getReadUnmappedFlag() )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
String contig = read.getReferenceName();
|
|
||||||
int start = read.getAlignmentStart();
|
|
||||||
int stop = read.getAlignmentEnd();
|
|
||||||
|
|
||||||
SAMSequenceRecord sequenceRecord = sequenceFile.getSequenceDictionary().getSequence(contig);
|
|
||||||
if( stop > sequenceRecord.getSequenceLength() )
|
|
||||||
stop = sequenceRecord.getSequenceLength();
|
|
||||||
|
|
||||||
ReferenceSequence alignmentToReference = sequenceFile.getSubsequenceAt( contig, start, stop );
|
|
||||||
return StringUtil.bytesToString(alignmentToReference.getBases()).toCharArray();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Perform a lazy initialization of access to the locus. Sets up the reference sequence and
|
|
||||||
* limits the user to work only at that sequence.
|
|
||||||
*/
|
|
||||||
private void lazyInitializeLocusAccess() {
|
|
||||||
GenomeLoc position = shard.getGenomeLoc();
|
|
||||||
this.referenceSequence = sequenceFile.getSubsequenceAt( position.getContig(),
|
|
||||||
position.getStart(),
|
|
||||||
position.getStop() );
|
|
||||||
this.referenceInterval = position;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Validates that the genomeLoc is one base wide and is in the reference sequence.
|
|
||||||
* @param genomeLoc location to verify.
|
|
||||||
*/
|
|
||||||
private void validateLocation( GenomeLoc genomeLoc ) throws InvalidPositionException {
|
|
||||||
//
|
|
||||||
if( !referenceInterval.containsP(genomeLoc) )
|
|
||||||
throw new InvalidPositionException(
|
|
||||||
String.format("Requested position %s not within interval %s", genomeLoc, referenceInterval));
|
|
||||||
if( genomeLoc.getStart() != genomeLoc.getStop() )
|
|
||||||
throw new InvalidPositionException(
|
|
||||||
String.format("Requested position larger than one base; start = %d, stop = %d", genomeLoc.getStart(), genomeLoc.getStop()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,51 @@
|
||||||
|
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Collection;
|
||||||
|
/**
|
||||||
|
* User: hanna
|
||||||
|
* Date: May 22, 2009
|
||||||
|
* Time: 12:19:17 PM
|
||||||
|
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||||
|
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||||
|
* All rights are reserved.
|
||||||
|
*
|
||||||
|
* Users acknowledge that this software is supplied without any warranty or support.
|
||||||
|
* The Broad Institute is not responsible for its use, misuse, or
|
||||||
|
* functionality.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A view into the reference backing this shard.
|
||||||
|
*/
|
||||||
|
public class ReferenceView implements View {
|
||||||
|
/**
|
||||||
|
* The source of reference data.
|
||||||
|
*/
|
||||||
|
protected IndexedFastaSequenceFile reference = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new ReferenceView.
|
||||||
|
* @param provider
|
||||||
|
*/
|
||||||
|
public ReferenceView( ShardDataProvider provider ) {
|
||||||
|
this.reference = provider.getReference();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reference views don't conflict with anything else.
|
||||||
|
* @return Empty list.
|
||||||
|
*/
|
||||||
|
public Collection<Class<? extends View>> getConflictingViews() { return Collections.emptyList(); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deinitialize pointers for fast fail. Someone else will handle file management.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
reference = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,124 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger;
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
import edu.mit.broad.picard.filter.FilteringIterator;
|
|
||||||
/**
|
|
||||||
* User: hanna
|
|
||||||
* Date: May 12, 2009
|
|
||||||
* Time: 11:24:42 AM
|
|
||||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
|
||||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
|
||||||
* All rights are reserved.
|
|
||||||
*
|
|
||||||
* Users acknowledge that this software is supplied without any warranty or support.
|
|
||||||
* The Broad Institute is not responsible for its use, misuse, or
|
|
||||||
* functionality.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A queue of locus contexts. Provides unidirectional seek. Stripped down
|
|
||||||
* implementation of java.util.Queue interface.
|
|
||||||
*/
|
|
||||||
|
|
||||||
public class SeekableLocusContextQueue extends LocusContextQueue {
|
|
||||||
/**
|
|
||||||
* Gets the position to which the last seek was requested.
|
|
||||||
*/
|
|
||||||
private GenomeLoc seekPoint;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* What's the context for the last locus accessed?
|
|
||||||
* @param provider
|
|
||||||
*/
|
|
||||||
private LocusContext nextLocusContext = null;
|
|
||||||
|
|
||||||
private static Logger logger = Logger.getLogger(SeekableLocusContextQueue.class);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new queue of locus contexts.
|
|
||||||
* @param provider
|
|
||||||
*/
|
|
||||||
public SeekableLocusContextQueue(ShardDataProvider provider) {
|
|
||||||
super(provider);
|
|
||||||
|
|
||||||
// Seed the state tracking members with the first possible seek position and the first possible locus context.
|
|
||||||
seekPoint = new GenomeLoc(shard.getGenomeLoc().getContigIndex(),shard.getGenomeLoc().getStart());
|
|
||||||
|
|
||||||
if( hasNextLocusContext() )
|
|
||||||
nextLocusContext = getNextLocusContext();
|
|
||||||
else
|
|
||||||
nextLocusContext = this.createEmptyLocusContext(seekPoint);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the locus context at the given position.
|
|
||||||
* @return Locus context, or null if no locus context exists at this position.
|
|
||||||
*/
|
|
||||||
public LocusContext peek() {
|
|
||||||
// Haven't reached the next locus context in the list yet. Return null.
|
|
||||||
if( seekPoint.isBefore(nextLocusContext.getLocation()) )
|
|
||||||
return createEmptyLocusContext(seekPoint);
|
|
||||||
|
|
||||||
return nextLocusContext;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Seek to the given point the queue of locus contexts.
|
|
||||||
* @param target Target base pair to which to seek. Must be a single base pair.
|
|
||||||
* @return an instance of itself for parameter chaining.
|
|
||||||
*/
|
|
||||||
public LocusContextQueue seek(GenomeLoc target) {
|
|
||||||
if( !target.isSingleBP() )
|
|
||||||
throw new IllegalArgumentException("Seek point must be a single base pair.");
|
|
||||||
|
|
||||||
// If outside the range of the target, throw an illegal argument exception.
|
|
||||||
if( target.isBefore(shard.getGenomeLoc()) || target.isPast(shard.getGenomeLoc()))
|
|
||||||
throw new IllegalArgumentException(String.format("Target is out of range; target = %s, valid range = %s",target,shard.getGenomeLoc()));
|
|
||||||
|
|
||||||
seekPoint = (GenomeLoc)target.clone();
|
|
||||||
|
|
||||||
// Search for the next locus context following the target positions.
|
|
||||||
while (nextLocusContext.getLocation().isBefore(target) && hasNextLocusContext() ) {
|
|
||||||
logger.debug(String.format(" current locus is %s vs %s => %d", nextLocusContext.getLocation(),
|
|
||||||
target,
|
|
||||||
nextLocusContext.getLocation().compareTo(target)));
|
|
||||||
nextLocusContext = getNextLocusContext();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Couldn't find a next? Force the nextLocusContext to null.
|
|
||||||
if( nextLocusContext.getLocation().isBefore(target) && !hasNextLocusContext() )
|
|
||||||
nextLocusContext = createEmptyLocusContext( seekPoint );
|
|
||||||
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the point to which the queue has currently seeked.
|
|
||||||
* @return Single bp position where the queue has been positioned. A locus context may or may not
|
|
||||||
* exist at this point.
|
|
||||||
*/
|
|
||||||
public GenomeLoc getSeekPoint() {
|
|
||||||
return seekPoint;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a blank locus context at the specified location.
|
|
||||||
* @param site Site at which to create the blank locus context.
|
|
||||||
* @return empty context.
|
|
||||||
*/
|
|
||||||
private LocusContext createEmptyLocusContext( GenomeLoc site ) {
|
|
||||||
return new LocusContext(site, new ArrayList<SAMRecord>(), new ArrayList<Integer>());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -3,16 +3,16 @@ package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
import org.broadinstitute.sting.gatk.iterators.NullSAMIterator;
|
import org.broadinstitute.sting.gatk.iterators.NullSAMIterator;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.ReferenceOrderedDataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.io.File;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 8, 2009
|
* Date: May 8, 2009
|
||||||
|
|
@ -49,7 +49,7 @@ public class ShardDataProvider {
|
||||||
/**
|
/**
|
||||||
* Provider of reference data for this particular shard.
|
* Provider of reference data for this particular shard.
|
||||||
*/
|
*/
|
||||||
private final ReferenceProvider referenceProvider;
|
private final IndexedFastaSequenceFile reference;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sources of reference-ordered data.
|
* Sources of reference-ordered data.
|
||||||
|
|
@ -77,18 +77,25 @@ public class ShardDataProvider {
|
||||||
* @return True if possible, false otherwise.
|
* @return True if possible, false otherwise.
|
||||||
*/
|
*/
|
||||||
public boolean hasReference() {
|
public boolean hasReference() {
|
||||||
return referenceProvider != null;
|
return reference != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets an iterator over all the reads bound by this shard.
|
* Gets an iterator over all the reads bound by this shard.
|
||||||
* WARNING: Right now, this cannot be concurrently accessed with getLocusContext().
|
|
||||||
* @return An iterator over all reads in this shard.
|
* @return An iterator over all reads in this shard.
|
||||||
*/
|
*/
|
||||||
public StingSAMIterator getReadIterator() {
|
StingSAMIterator getReadIterator() {
|
||||||
return reads;
|
return reads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a pointer into the given indexed fasta sequence file.
|
||||||
|
* @return The indexed fasta sequence file.
|
||||||
|
*/
|
||||||
|
IndexedFastaSequenceFile getReference() {
|
||||||
|
return reference;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a window into the reference-ordered data. Package protected so that only
|
* Gets a window into the reference-ordered data. Package protected so that only
|
||||||
* views can access it.
|
* views can access it.
|
||||||
|
|
@ -98,35 +105,17 @@ public class ShardDataProvider {
|
||||||
return referenceOrderedData;
|
return referenceOrderedData;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the reference base associated with this particular point on the genome.
|
|
||||||
* @param genomeLoc Region for which to retrieve the base. GenomeLoc must represent a 1-base region.
|
|
||||||
* @return The base at the position represented by this genomeLoc.
|
|
||||||
*/
|
|
||||||
public char getReferenceBase( GenomeLoc genomeLoc ) {
|
|
||||||
return referenceProvider.getReferenceBase(genomeLoc);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the reference sequence, as a char[], for the provided read.
|
|
||||||
* @param read the read to fetch the reference sequence for
|
|
||||||
* @return a char string of bases representing the reference sequence mapped to passed in read
|
|
||||||
*/
|
|
||||||
public char[] getReferenceForRead( SAMRecord read ) {
|
|
||||||
return referenceProvider.getReferenceBases(read);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a data provider for the shard given the reads and reference.
|
* Create a data provider for the shard given the reads and reference.
|
||||||
* @param shard The chunk of data over which traversals happen.
|
* @param shard The chunk of data over which traversals happen.
|
||||||
* @param reads A window into the reads for a given region.
|
* @param reads A window into the reads for a given region.
|
||||||
* @param reference A getter for a section of the reference.
|
* @param reference A getter for a section of the reference.
|
||||||
*/
|
*/
|
||||||
public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference, List<ReferenceOrderedDataSource> rods) {
|
public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference, List<ReferenceOrderedDataSource> rods) {
|
||||||
this.shard = shard;
|
this.shard = shard;
|
||||||
// Provide basic reads information.
|
// Provide basic reads information.
|
||||||
this.reads = (reads != null) ? reads.seek( shard ) : new NullSAMIterator(new Reads(new ArrayList<File>()));
|
this.reads = (reads != null) ? reads.seek( shard ) : new NullSAMIterator(new Reads(new ArrayList<File>()));
|
||||||
this.referenceProvider = (reference != null) ? new ReferenceProvider(reference,shard) : null;
|
this.reference = reference;
|
||||||
this.referenceOrderedData = rods;
|
this.referenceOrderedData = rods;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -138,11 +127,36 @@ public class ShardDataProvider {
|
||||||
ShardDataProvider( Shard shard, StingSAMIterator reads ) {
|
ShardDataProvider( Shard shard, StingSAMIterator reads ) {
|
||||||
this.shard = shard;
|
this.shard = shard;
|
||||||
this.reads = reads;
|
this.reads = reads;
|
||||||
this.referenceProvider = null;
|
this.reference = null;
|
||||||
this.referenceOrderedData = null;
|
this.referenceOrderedData = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register this view with the shard provider, and make sure it has no conflicts with any other views.
|
||||||
|
* @param view The new view.
|
||||||
|
*/
|
||||||
void register( View view ) {
|
void register( View view ) {
|
||||||
|
// Check all registered classes to see whether a conflict exists.
|
||||||
|
for( View registeredView: registeredViews ) {
|
||||||
|
Collection<Class<? extends View>> conflicts = registeredView.getConflictingViews();
|
||||||
|
for( Class<? extends View> conflict: conflicts ) {
|
||||||
|
if( conflict.isInstance(view) )
|
||||||
|
throw new StingException(String.format("Tried to registered two conflicting views: %s and %s",
|
||||||
|
registeredView.getClass().getSimpleName(),
|
||||||
|
view.getClass().getSimpleName()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check whether this class has any objection to any other classes.
|
||||||
|
for( Class<? extends View> conflict: view.getConflictingViews() ) {
|
||||||
|
for( View registeredView: registeredViews ) {
|
||||||
|
if( conflict.isInstance(registeredView) )
|
||||||
|
throw new StingException(String.format("Tried to registered two conflicting views: %s and %s",
|
||||||
|
registeredView.getClass().getSimpleName(),
|
||||||
|
view.getClass().getSimpleName()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
this.registeredViews.add(view);
|
this.registeredViews.add(view);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Collection;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 21, 2009
|
* Date: May 21, 2009
|
||||||
|
|
@ -16,6 +19,11 @@ package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
* Represents a view into given data.
|
* Represents a view into given data.
|
||||||
*/
|
*/
|
||||||
public interface View {
|
public interface View {
|
||||||
|
/**
|
||||||
|
* Gets a list of all types of views which can conflict with this view.
|
||||||
|
*/
|
||||||
|
public Collection<Class<? extends View>> getConflictingViews();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Inform this view that the data provided to it no longer exists.
|
* Inform this view that the data provided to it no longer exists.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
|
|
@ -21,7 +20,7 @@ import java.util.NoSuchElementException;
|
||||||
/**
|
/**
|
||||||
* Iterates through all of the loci provided in the reference.
|
* Iterates through all of the loci provided in the reference.
|
||||||
*/
|
*/
|
||||||
public class ReferenceLocusIterator implements LocusIterator {
|
public class GenomeLocusIterator implements LocusIterator {
|
||||||
/**
|
/**
|
||||||
* The entire region over which we're iterating.
|
* The entire region over which we're iterating.
|
||||||
*/
|
*/
|
||||||
|
|
@ -38,11 +37,9 @@ public class ReferenceLocusIterator implements LocusIterator {
|
||||||
* @param provider Data provider to use as a backing source.
|
* @param provider Data provider to use as a backing source.
|
||||||
* Provider must have a reference (hasReference() == true).
|
* Provider must have a reference (hasReference() == true).
|
||||||
*/
|
*/
|
||||||
public ReferenceLocusIterator( ShardDataProvider provider ) {
|
public GenomeLocusIterator( GenomeLoc completeLocus ) {
|
||||||
if( !provider.hasReference() )
|
this.completeLocus = completeLocus;
|
||||||
throw new StingException("Trying to iterate through reference, but no reference has been provided.");
|
this.currentLocus = new GenomeLoc(completeLocus.getContig(),completeLocus.getStart());
|
||||||
completeLocus = provider.getShard().getGenomeLoc();
|
|
||||||
currentLocus = new GenomeLoc(completeLocus.getContig(),completeLocus.getStart());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -4,6 +4,7 @@ import net.sf.samtools.SAMRecord;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.providers.ReadView;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
||||||
|
|
@ -311,7 +312,7 @@ public class TraverseDuplicates extends TraversalEngine {
|
||||||
// -> those with the same mate pair position, for paired reads
|
// -> those with the same mate pair position, for paired reads
|
||||||
// -> those flagged as unpaired and duplicated but having the same start and end and
|
// -> those flagged as unpaired and duplicated but having the same start and end and
|
||||||
|
|
||||||
FilteringIterator filterIter = new FilteringIterator(dataProvider.getReadIterator(), new duplicateStreamFilterFunc());
|
FilteringIterator filterIter = new FilteringIterator(new ReadView(dataProvider).iterator(), new duplicateStreamFilterFunc());
|
||||||
PushbackIterator<SAMRecord> iter = new PushbackIterator<SAMRecord>(filterIter);
|
PushbackIterator<SAMRecord> iter = new PushbackIterator<SAMRecord>(filterIter);
|
||||||
return actuallyTraverse(dupWalker, iter, sum);
|
return actuallyTraverse(dupWalker, iter, sum);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,16 +6,16 @@ import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceLocusIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.SeekableLocusContextQueue;
|
import org.broadinstitute.sting.gatk.dataSources.providers.AllLocusView;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextQueue;
|
import org.broadinstitute.sting.gatk.dataSources.providers.CoveredLocusView;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.IterableLocusContextQueue;
|
import org.broadinstitute.sting.gatk.dataSources.providers.LocusView;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceOrderedView;
|
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceOrderedView;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceView;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.providers.LocusReferenceView;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
@ -25,9 +25,7 @@ import java.util.ArrayList;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A simple, short-term solution to iterating over all reference positions over a series of
|
* A simple solution to iterating over all reference positions over a series of genomic locations.
|
||||||
* genomic locations. Simply overloads the superclass traverse function to go over the entire
|
|
||||||
* interval's reference positions.
|
|
||||||
*/
|
*/
|
||||||
public class TraverseLoci extends TraversalEngine {
|
public class TraverseLoci extends TraversalEngine {
|
||||||
|
|
||||||
|
|
@ -59,36 +57,20 @@ public class TraverseLoci extends TraversalEngine {
|
||||||
|
|
||||||
LocusWalker<M, T> locusWalker = (LocusWalker<M, T>)walker;
|
LocusWalker<M, T> locusWalker = (LocusWalker<M, T>)walker;
|
||||||
|
|
||||||
LocusIterator locusIterator = null;
|
LocusView locusView = getLocusView( walker, dataProvider );
|
||||||
LocusContextQueue locusContextQueue = null;
|
LocusReferenceView referenceView = new LocusReferenceView( dataProvider );
|
||||||
ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
||||||
|
|
||||||
DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
|
|
||||||
switch( dataSource ) {
|
|
||||||
case REFERENCE:
|
|
||||||
locusIterator = new ReferenceLocusIterator( dataProvider );
|
|
||||||
locusContextQueue = new SeekableLocusContextQueue( dataProvider );
|
|
||||||
break;
|
|
||||||
case READS:
|
|
||||||
IterableLocusContextQueue iterableQueue = new IterableLocusContextQueue( dataProvider );
|
|
||||||
locusIterator = iterableQueue;
|
|
||||||
locusContextQueue = iterableQueue;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
|
||||||
}
|
|
||||||
|
|
||||||
// We keep processing while the next reference location is within the interval
|
// We keep processing while the next reference location is within the interval
|
||||||
while( locusIterator.hasNext() ) {
|
while( locusView.hasNext() ) {
|
||||||
GenomeLoc site = locusIterator.next();
|
LocusContext locus = locusView.next();
|
||||||
|
|
||||||
TraversalStatistics.nRecords++;
|
TraversalStatistics.nRecords++;
|
||||||
|
|
||||||
// Iterate forward to get all reference ordered data covering this locus
|
// Iterate forward to get all reference ordered data covering this locus
|
||||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(site);
|
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
|
||||||
|
|
||||||
LocusContext locus = locusContextQueue.seek( site ).peek();
|
char refBase = referenceView.getReferenceBase(locus.getLocation());
|
||||||
char refBase = dataProvider.getReferenceBase( site );
|
|
||||||
|
|
||||||
final boolean keepMeP = locusWalker.filter(tracker, refBase, locus);
|
final boolean keepMeP = locusWalker.filter(tracker, refBase, locus);
|
||||||
if (keepMeP) {
|
if (keepMeP) {
|
||||||
|
|
@ -116,4 +98,19 @@ public class TraverseLoci extends TraversalEngine {
|
||||||
public <T> void printOnTraversalDone( T sum ) {
|
public <T> void printOnTraversalDone( T sum ) {
|
||||||
printOnTraversalDone( "loci", sum );
|
printOnTraversalDone( "loci", sum );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the best view of loci for this walker given the available data.
|
||||||
|
* @param walker walker to interrogate.
|
||||||
|
* @param dataProvider Data which which to drive the locus view.
|
||||||
|
*/
|
||||||
|
private LocusView getLocusView( Walker walker, ShardDataProvider dataProvider ) {
|
||||||
|
DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
|
||||||
|
if( dataSource == DataSource.READS )
|
||||||
|
return new CoveredLocusView(dataProvider);
|
||||||
|
else if( dataSource == DataSource.REFERENCE )
|
||||||
|
return new AllLocusView(dataProvider);
|
||||||
|
else
|
||||||
|
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@ import net.sf.samtools.SAMRecord;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.providers.ReadView;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.providers.ReadReferenceView;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
|
|
@ -87,8 +89,11 @@ public class TraverseReads extends TraversalEngine {
|
||||||
|
|
||||||
ReadWalker<M, T> readWalker = (ReadWalker<M, T>) walker;
|
ReadWalker<M, T> readWalker = (ReadWalker<M, T>) walker;
|
||||||
|
|
||||||
|
ReadView reads = new ReadView(dataProvider);
|
||||||
|
ReadReferenceView reference = new ReadReferenceView(dataProvider);
|
||||||
|
|
||||||
// while we still have more reads
|
// while we still have more reads
|
||||||
for (SAMRecord read : dataProvider.getReadIterator()) {
|
for (SAMRecord read : reads) {
|
||||||
|
|
||||||
// our locus context
|
// our locus context
|
||||||
LocusContext locus = null;
|
LocusContext locus = null;
|
||||||
|
|
@ -105,7 +110,7 @@ public class TraverseReads extends TraversalEngine {
|
||||||
|
|
||||||
// get the array of characters for the reference sequence, since we're a mapped read
|
// get the array of characters for the reference sequence, since we're a mapped read
|
||||||
if( dataProvider.hasReference() )
|
if( dataProvider.hasReference() )
|
||||||
refSeq = dataProvider.getReferenceForRead( read );
|
refSeq = reference.getReferenceBases( read );
|
||||||
}
|
}
|
||||||
|
|
||||||
// update the number of reads we've seen
|
// update the number of reads we've seen
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,59 @@
|
||||||
|
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
|
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
/**
|
||||||
|
* User: hanna
|
||||||
|
* Date: May 12, 2009
|
||||||
|
* Time: 2:34:46 PM
|
||||||
|
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||||
|
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||||
|
* All rights are reserved.
|
||||||
|
*
|
||||||
|
* Users acknowledge that this software is supplied without any warranty or support.
|
||||||
|
* The Broad Institute is not responsible for its use, misuse, or
|
||||||
|
* functionality.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the view of all loci.
|
||||||
|
*/
|
||||||
|
public class AllLocusViewTest extends LocusViewTemplate {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected LocusView createView(ShardDataProvider provider) {
|
||||||
|
return new AllLocusView(provider);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the reads according to an independently derived context.
|
||||||
|
* @param view
|
||||||
|
* @param bounds
|
||||||
|
* @param reads
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected void testReadsInContext( LocusView view, GenomeLoc bounds, List<SAMRecord> reads ) {
|
||||||
|
AllLocusView allLocusView = (AllLocusView)view;
|
||||||
|
|
||||||
|
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
|
||||||
|
GenomeLoc site = new GenomeLoc("chr1",i);
|
||||||
|
LocusContext locusContext = allLocusView.next();
|
||||||
|
Assert.assertEquals("Locus context location is incorrect", site, locusContext.getLocation() );
|
||||||
|
int expectedReadsAtSite = 0;
|
||||||
|
|
||||||
|
for( SAMRecord read: reads ) {
|
||||||
|
if(new GenomeLoc(read).containsP(locusContext.getLocation())) {
|
||||||
|
Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) );
|
||||||
|
expectedReadsAtSite++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.assertEquals("Found wrong number of reads at site", expectedReadsAtSite, locusContext.getReads().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,31 +1,11 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
|
|
||||||
import org.junit.Test;
|
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.BeforeClass;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.LocusShard;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.Cigar;
|
|
||||||
import net.sf.samtools.CigarElement;
|
|
||||||
import net.sf.samtools.CigarOperator;
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
|
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 12, 2009
|
* Date: May 12, 2009
|
||||||
|
|
@ -40,24 +20,27 @@ import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test the locus context queue.
|
* Test the CoveredLocusView.
|
||||||
*/
|
*/
|
||||||
public class IterableLocusContextQueueTest extends LocusContextQueueTemplate {
|
public class CoveredLocusViewTest extends LocusViewTemplate {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve a covered locus view.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
protected LocusContextQueue createQueue(ShardDataProvider provider) {
|
protected LocusView createView(ShardDataProvider provider) {
|
||||||
return new IterableLocusContextQueue(provider);
|
return new CoveredLocusView(provider);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test the reads according to an independently derived context.
|
* Test the reads according to an independently derived context.
|
||||||
* @param queue
|
* @param view
|
||||||
* @param bounds
|
* @param bounds
|
||||||
* @param reads
|
* @param reads
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
protected void testReadsInContext( LocusContextQueue queue, GenomeLoc bounds, List<SAMRecord> reads ) {
|
protected void testReadsInContext( LocusView view, GenomeLoc bounds, List<SAMRecord> reads ) {
|
||||||
IterableLocusContextQueue iterableQueue = (IterableLocusContextQueue)queue;
|
CoveredLocusView coveredLocusView = (CoveredLocusView)view;
|
||||||
|
|
||||||
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
|
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
|
||||||
GenomeLoc site = new GenomeLoc("chr1",i);
|
GenomeLoc site = new GenomeLoc("chr1",i);
|
||||||
|
|
@ -71,12 +54,9 @@ public class IterableLocusContextQueueTest extends LocusContextQueueTemplate {
|
||||||
if( expectedReadsAtSite < 1 )
|
if( expectedReadsAtSite < 1 )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
Assert.assertTrue("Incorrect number of loci in queue",iterableQueue.hasNext());
|
Assert.assertTrue("Incorrect number of loci in view",coveredLocusView.hasNext());
|
||||||
|
|
||||||
GenomeLoc nextLocus = iterableQueue.next();
|
LocusContext locusContext = coveredLocusView.next();
|
||||||
Assert.assertEquals("Next locus context returned is incorrect", site, nextLocus );
|
|
||||||
|
|
||||||
LocusContext locusContext = iterableQueue.seek(site).peek();
|
|
||||||
Assert.assertEquals("Target locus context location is incorrect", site, locusContext.getLocation() );
|
Assert.assertEquals("Target locus context location is incorrect", site, locusContext.getLocation() );
|
||||||
Assert.assertEquals("Found wrong number of reads at site", expectedReadsAtSite, locusContext.getReads().size());
|
Assert.assertEquals("Found wrong number of reads at site", expectedReadsAtSite, locusContext.getReads().size());
|
||||||
|
|
||||||
|
|
@ -86,6 +66,6 @@ public class IterableLocusContextQueueTest extends LocusContextQueueTemplate {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertFalse("Iterator is not bounded at boundaries of shard", iterableQueue.hasNext());
|
Assert.assertFalse("Iterator is not bounded at boundaries of shard", coveredLocusView.hasNext());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -40,10 +40,10 @@ import net.sf.samtools.CigarOperator;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base support for testing variants of the LocusContextQueue family of classes.
|
* Base support for testing variants of the LocusView family of classes.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public abstract class LocusContextQueueTemplate extends BaseTest {
|
public abstract class LocusViewTemplate extends BaseTest {
|
||||||
protected static ReferenceSequenceFile sequenceSourceFile = null;
|
protected static ReferenceSequenceFile sequenceSourceFile = null;
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
|
|
@ -60,9 +60,9 @@ public abstract class LocusContextQueueTemplate extends BaseTest {
|
||||||
Shard shard = new LocusShard(shardBounds);
|
Shard shard = new LocusShard(shardBounds);
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||||
|
|
||||||
LocusContextQueue queue = createQueue( dataProvider );
|
LocusView view = createView( dataProvider );
|
||||||
|
|
||||||
testReadsInContext( queue, shard.getGenomeLoc(), Collections.<SAMRecord>emptyList() );
|
testReadsInContext( view, shard.getGenomeLoc(), Collections.<SAMRecord>emptyList() );
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -74,9 +74,9 @@ public abstract class LocusContextQueueTemplate extends BaseTest {
|
||||||
Shard shard = new LocusShard(shardBounds);
|
Shard shard = new LocusShard(shardBounds);
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||||
|
|
||||||
LocusContextQueue queue = createQueue( dataProvider );
|
LocusView view = createView( dataProvider );
|
||||||
|
|
||||||
testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) );
|
testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) );
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -86,9 +86,9 @@ public abstract class LocusContextQueueTemplate extends BaseTest {
|
||||||
|
|
||||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||||
LocusContextQueue queue = createQueue( dataProvider );
|
LocusView view = createView( dataProvider );
|
||||||
|
|
||||||
testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) );
|
testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) );
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -98,9 +98,9 @@ public abstract class LocusContextQueueTemplate extends BaseTest {
|
||||||
|
|
||||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||||
LocusContextQueue queue = createQueue( dataProvider );
|
LocusView view = createView( dataProvider );
|
||||||
|
|
||||||
testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) );
|
testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) );
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -110,9 +110,9 @@ public abstract class LocusContextQueueTemplate extends BaseTest {
|
||||||
|
|
||||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||||
LocusContextQueue queue = createQueue( dataProvider );
|
LocusView view = createView( dataProvider );
|
||||||
|
|
||||||
testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) );
|
testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) );
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -122,9 +122,9 @@ public abstract class LocusContextQueueTemplate extends BaseTest {
|
||||||
|
|
||||||
Shard shard = new LocusShard(new GenomeLoc("chr1",6,15));
|
Shard shard = new LocusShard(new GenomeLoc("chr1",6,15));
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||||
LocusContextQueue queue = createQueue( dataProvider );
|
LocusView view = createView( dataProvider );
|
||||||
|
|
||||||
testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) );
|
testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) );
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -134,9 +134,9 @@ public abstract class LocusContextQueueTemplate extends BaseTest {
|
||||||
|
|
||||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||||
LocusContextQueue queue = createQueue( dataProvider );
|
LocusView view = createView( dataProvider );
|
||||||
|
|
||||||
testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) );
|
testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) );
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -147,11 +147,11 @@ public abstract class LocusContextQueueTemplate extends BaseTest {
|
||||||
|
|
||||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||||
LocusContextQueue queue = createQueue( dataProvider );
|
LocusView view = createView( dataProvider );
|
||||||
|
|
||||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||||
Collections.addAll(expectedReads,read1,read2);
|
Collections.addAll(expectedReads,read1,read2);
|
||||||
testReadsInContext( queue, shard.getGenomeLoc(), expectedReads );
|
testReadsInContext( view, shard.getGenomeLoc(), expectedReads );
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -164,11 +164,11 @@ public abstract class LocusContextQueueTemplate extends BaseTest {
|
||||||
|
|
||||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||||
LocusContextQueue queue = createQueue( dataProvider );
|
LocusView view = createView( dataProvider );
|
||||||
|
|
||||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||||
Collections.addAll(expectedReads,read1,read2,read3,read4);
|
Collections.addAll(expectedReads,read1,read2,read3,read4);
|
||||||
testReadsInContext( queue, shard.getGenomeLoc(), expectedReads );
|
testReadsInContext( view, shard.getGenomeLoc(), expectedReads );
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -181,11 +181,11 @@ public abstract class LocusContextQueueTemplate extends BaseTest {
|
||||||
|
|
||||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||||
LocusContextQueue queue = createQueue( dataProvider );
|
LocusView view = createView( dataProvider );
|
||||||
|
|
||||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||||
Collections.addAll(expectedReads,read1,read2,read3,read4);
|
Collections.addAll(expectedReads,read1,read2,read3,read4);
|
||||||
testReadsInContext( queue, shard.getGenomeLoc(), expectedReads );
|
testReadsInContext( view, shard.getGenomeLoc(), expectedReads );
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -200,11 +200,11 @@ public abstract class LocusContextQueueTemplate extends BaseTest {
|
||||||
|
|
||||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||||
LocusContextQueue queue = createQueue( dataProvider );
|
LocusView view = createView( dataProvider );
|
||||||
|
|
||||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||||
Collections.addAll(expectedReads,read1,read2,read3,read4,read5,read6);
|
Collections.addAll(expectedReads,read1,read2,read3,read4,read5,read6);
|
||||||
testReadsInContext( queue, shard.getGenomeLoc(), expectedReads );
|
testReadsInContext( view, shard.getGenomeLoc(), expectedReads );
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -226,27 +226,27 @@ public abstract class LocusContextQueueTemplate extends BaseTest {
|
||||||
|
|
||||||
Shard shard = new LocusShard(new GenomeLoc("chr1",6,15));
|
Shard shard = new LocusShard(new GenomeLoc("chr1",6,15));
|
||||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||||
LocusContextQueue queue = createQueue( dataProvider );
|
LocusView view = createView( dataProvider );
|
||||||
|
|
||||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||||
Collections.addAll(expectedReads,read01,read02,read03,read04,read05,read06,
|
Collections.addAll(expectedReads,read01,read02,read03,read04,read05,read06,
|
||||||
read07,read08,read09,read10,read11,read12);
|
read07,read08,read09,read10,read11,read12);
|
||||||
testReadsInContext( queue, shard.getGenomeLoc(), expectedReads );
|
testReadsInContext( view, shard.getGenomeLoc(), expectedReads );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a queue of the type required for testing.
|
* Creates a view of the type required for testing.
|
||||||
* @return The correct queue to test.
|
* @return The correct view to test.
|
||||||
*/
|
*/
|
||||||
protected abstract LocusContextQueue createQueue( ShardDataProvider provider );
|
protected abstract LocusView createView( ShardDataProvider provider );
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test the reads according to an independently derived context.
|
* Test the reads according to an independently derived context.
|
||||||
* @param queue
|
* @param view
|
||||||
* @param bounds
|
* @param bounds
|
||||||
* @param reads
|
* @param reads
|
||||||
*/
|
*/
|
||||||
protected abstract void testReadsInContext( LocusContextQueue queue, GenomeLoc bounds, List<SAMRecord> reads );
|
protected abstract void testReadsInContext( LocusView view, GenomeLoc bounds, List<SAMRecord> reads );
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fake a reference sequence file. Essentially, seek a header with a bunch of dummy data.
|
* Fake a reference sequence file. Essentially, seek a header with a bunch of dummy data.
|
||||||
|
|
@ -1,90 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
|
||||||
|
|
||||||
import org.junit.Test;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.BeforeClass;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.LocusShard;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import net.sf.samtools.Cigar;
|
|
||||||
import net.sf.samtools.CigarElement;
|
|
||||||
import net.sf.samtools.CigarOperator;
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
|
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
|
||||||
/**
|
|
||||||
* User: hanna
|
|
||||||
* Date: May 12, 2009
|
|
||||||
* Time: 2:34:46 PM
|
|
||||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
|
||||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
|
||||||
* All rights are reserved.
|
|
||||||
*
|
|
||||||
* Users acknowledge that this software is supplied without any warranty or support.
|
|
||||||
* The Broad Institute is not responsible for its use, misuse, or
|
|
||||||
* functionality.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test the locus context queue.
|
|
||||||
*/
|
|
||||||
public class SeekableLocusContextQueueTest extends LocusContextQueueTemplate {
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieve a seekable locus context queue.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
protected LocusContextQueue createQueue(ShardDataProvider provider) {
|
|
||||||
return new SeekableLocusContextQueue(provider);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test the reads according to an independently derived context.
|
|
||||||
* @param queue
|
|
||||||
* @param bounds
|
|
||||||
* @param reads
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
protected void testReadsInContext( LocusContextQueue queue, GenomeLoc bounds, List<SAMRecord> reads ) {
|
|
||||||
SeekableLocusContextQueue seekableQueue = (SeekableLocusContextQueue)queue;
|
|
||||||
|
|
||||||
Assert.assertEquals("Initial position of queue is incorrect", new GenomeLoc(bounds.getContig(),bounds.getStart()), seekableQueue.getSeekPoint() );
|
|
||||||
|
|
||||||
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
|
|
||||||
GenomeLoc site = new GenomeLoc("chr1",i);
|
|
||||||
seekableQueue.seek(site);
|
|
||||||
Assert.assertEquals("Seeked queue is incorrect", site, seekableQueue.getSeekPoint() );
|
|
||||||
|
|
||||||
LocusContext locusContext = seekableQueue.peek();
|
|
||||||
Assert.assertEquals("Target locus context location is incorrect", site, locusContext.getLocation() );
|
|
||||||
int expectedReadsAtSite = 0;
|
|
||||||
|
|
||||||
for( SAMRecord read: reads ) {
|
|
||||||
if(new GenomeLoc(read).containsP(locusContext.getLocation())) {
|
|
||||||
Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) );
|
|
||||||
expectedReadsAtSite++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Assert.assertEquals("Found wrong number of reads at site", expectedReadsAtSite, locusContext.getReads().size());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue