Added support for accessing the reference in read traversal
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@60 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
feb70ff627
commit
3b5003bd11
|
|
@ -42,6 +42,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
addModule("SingleSampleGenotyper", new SingleSampleGenotyper());
|
||||
addModule("Null", new NullWalker());
|
||||
addModule("DepthOfCoverage", new DepthOfCoverageWalker());
|
||||
addModule("CountMismatches", new MismatchCounterWalker());
|
||||
}
|
||||
|
||||
private TraversalEngine engine = null;
|
||||
|
|
|
|||
|
|
@ -3,27 +3,99 @@ package org.broadinstitute.sting.gatk;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.lang.ref.Reference;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||
|
||||
/**
|
||||
* Useful class for forwarding on locusContext data from this iterator
|
||||
*
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mdepristo
|
||||
* Date: Feb 22, 2009
|
||||
* Time: 3:01:34 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public interface LocusContext {
|
||||
// get all of the reads within this context
|
||||
public List<SAMRecord> getReads();
|
||||
public class LocusContext {
|
||||
private GenomeLoc loc = null;
|
||||
private List<SAMRecord> reads = null;
|
||||
private List<Integer> offsets = null;
|
||||
private ReferenceSequence refContig = null;
|
||||
|
||||
// get a list of the equivalent positions within in the reads at Pos
|
||||
public List<Integer> getOffsets();
|
||||
/**
|
||||
* Create a new LocusContext object
|
||||
*
|
||||
* @param loc
|
||||
* @param reads
|
||||
* @param offsets
|
||||
*/
|
||||
public LocusContext(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets) {
|
||||
this.loc = loc;
|
||||
this.reads = reads;
|
||||
this.offsets = offsets;
|
||||
}
|
||||
|
||||
/**
|
||||
* get all of the reads within this context
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public List<SAMRecord> getReads() { return reads; }
|
||||
|
||||
public String getContig();
|
||||
public long getPosition();
|
||||
public GenomeLoc getLocation();
|
||||
/**
|
||||
* Are there any reads associated with this locus?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean hasReads() {
|
||||
return reads != null;
|
||||
}
|
||||
|
||||
public int numReads();
|
||||
/**
|
||||
* How many reads cover this locus?
|
||||
* @return
|
||||
*/
|
||||
public int numReads() {
|
||||
assert( reads != null );
|
||||
return reads.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* get a list of the equivalent positions within in the reads at Pos
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public List<Integer> getOffsets() {
|
||||
return offsets;
|
||||
}
|
||||
|
||||
public String getContig() { return getLocation().getContig(); }
|
||||
public long getPosition() { return getLocation().getStart(); }
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
|
||||
/**
|
||||
* Returns the entire reference sequence contig associated with these reads
|
||||
*
|
||||
* @return ReferenceSequence object, or null if unavailable
|
||||
*/
|
||||
public ReferenceSequence getReferenceContig() {
|
||||
return refContig;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if reference sequence contig is available
|
||||
*/
|
||||
public boolean hasReferenceContig() {
|
||||
return refContig != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the reference sequence for this locus to contig
|
||||
*
|
||||
* @param contig
|
||||
*/
|
||||
public void setReferenceContig(final ReferenceSequence contig) {
|
||||
refContig = contig;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -451,6 +451,7 @@ public class TraversalEngine {
|
|||
// Jump forward in the reference to this locus location
|
||||
final ReferenceIterator refSite = refIter.seekForward(locus.getLocation());
|
||||
final char refBase = refSite.getBaseAsChar();
|
||||
locus.setReferenceContig(refSite.getCurrentContig());
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this locus
|
||||
final List<ReferenceOrderedDatum> rodData = getReferenceOrderedDataAtLocus(rodIters, locus.getLocation());
|
||||
|
|
@ -502,6 +503,7 @@ public class TraversalEngine {
|
|||
|
||||
// Initialize the sum
|
||||
R sum = walker.reduceInit();
|
||||
List<Integer> offsets = Arrays.asList(0); // Offset of a single read is always 0
|
||||
|
||||
boolean done = false;
|
||||
while ( samReadIter.hasNext() && ! done ) {
|
||||
|
|
@ -509,16 +511,23 @@ public class TraversalEngine {
|
|||
|
||||
// get the next read
|
||||
final SAMRecord read = samReadIter.next();
|
||||
GenomeLoc loc = new GenomeLoc(read.getReferenceName(), read.getAlignmentStart());
|
||||
final List<SAMRecord> reads = Arrays.asList(read);
|
||||
GenomeLoc loc = Utils.genomicLocationOf(read);
|
||||
|
||||
// Jump forward in the reference to this locus location
|
||||
final ReferenceIterator refSite = refIter.seekForward(loc);
|
||||
final char refBase = refSite.getBaseAsChar();
|
||||
LocusContext locus = new LocusContext(loc, reads, offsets);
|
||||
locus.setReferenceContig(refSite.getCurrentContig());
|
||||
|
||||
if ( inLocations(loc) ) {
|
||||
|
||||
//
|
||||
// execute the walker contact
|
||||
//
|
||||
final boolean keepMeP = walker.filter(null, read);
|
||||
final boolean keepMeP = walker.filter(locus, read);
|
||||
if ( keepMeP ) {
|
||||
M x = walker.map(null, read);
|
||||
M x = walker.map(locus, read);
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -30,29 +30,6 @@ public class LocusIteratorByHanger extends LocusIterator {
|
|||
final int INCREMENT_SIZE = 100;
|
||||
final boolean DEBUG = false;
|
||||
|
||||
/** sy
|
||||
* Useful class for forwarding on locusContext data from this iterator
|
||||
*/
|
||||
public class MyLocusContext implements LocusContext {
|
||||
GenomeLoc loc = null;
|
||||
private List<SAMRecord> reads = null;
|
||||
private List<Integer> offsets = null;
|
||||
|
||||
private MyLocusContext(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets) {
|
||||
this.loc = loc;
|
||||
this.reads = reads;
|
||||
this.offsets = offsets;
|
||||
}
|
||||
|
||||
public String getContig() { return getLocation().getContig(); }
|
||||
public long getPosition() { return getLocation().getStart(); }
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
|
||||
public List<SAMRecord> getReads() { return reads; }
|
||||
public List<Integer> getOffsets() { return offsets; }
|
||||
public int numReads() { return reads.size(); }
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors and other basic operations
|
||||
|
|
@ -95,7 +72,7 @@ public class LocusIteratorByHanger extends LocusIterator {
|
|||
// next() routine and associated collection operations
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
public MyLocusContext next() {
|
||||
public LocusContext next() {
|
||||
if ( ! currentPositionIsFullyCovered() )
|
||||
expandWindow(INCREMENT_SIZE);
|
||||
|
||||
|
|
@ -107,7 +84,7 @@ public class LocusIteratorByHanger extends LocusIterator {
|
|||
RefHanger.Hanger rhanger = readHanger.popLeft();
|
||||
RefHanger.Hanger ohanger = offsetHanger.popLeft();
|
||||
|
||||
return new MyLocusContext(rhanger.loc, rhanger.data, ohanger.data);
|
||||
return new LocusContext(rhanger.loc, rhanger.data, ohanger.data);
|
||||
}
|
||||
|
||||
protected void hangRead(final SAMRecord read) {
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ import edu.mit.broad.picard.filter.FilteringIterator;
|
|||
/**
|
||||
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
|
||||
*/
|
||||
public class SingleLocusIterator extends LocusIterator implements LocusContext {
|
||||
public class SingleLocusIterator extends LocusIterator {
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -32,14 +32,6 @@ public class SingleLocusIterator extends LocusIterator implements LocusContext {
|
|||
private List<SAMRecord> reads = new ArrayList<SAMRecord>(100);
|
||||
private List<Integer> offsets = new ArrayList<Integer>(100);
|
||||
|
||||
public String getContig() { return contig; }
|
||||
public long getPosition() { return position; }
|
||||
public GenomeLoc getLocation() { return new GenomeLoc(contig, position); }
|
||||
|
||||
public List<SAMRecord> getReads() { return reads; }
|
||||
public List<Integer> getOffsets() { return offsets; }
|
||||
public int numReads() { return reads.size(); }
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors and other basic operations
|
||||
|
|
@ -97,7 +89,7 @@ public class SingleLocusIterator extends LocusIterator implements LocusContext {
|
|||
// at this point, window contains all reads covering the pos, we need to return them
|
||||
// and the offsets into each read for this loci
|
||||
calcOffsetsOfWindow(position);
|
||||
return this;
|
||||
return new LocusContext(new GenomeLoc(contig, position), reads, offsets);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,28 @@ public class Utils {
|
|||
return filtered;
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> subseq(byte[] fullArray) {
|
||||
return subseq(fullArray, 0, fullArray.length);
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> subseq(byte[] fullArray, int start, int end) {
|
||||
ArrayList<Byte> dest = new ArrayList<Byte>(end-start+1);
|
||||
for ( int i = start; i < end; i++ ) {
|
||||
dest.add(fullArray[i]);
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
public static String baseList2string(List<Byte> bases) {
|
||||
byte[] basesAsbytes = new byte[bases.size()];
|
||||
int i = 0;
|
||||
for ( Byte b : bases ) {
|
||||
basesAsbytes[i] = b;
|
||||
i++;
|
||||
}
|
||||
return new String(basesAsbytes);
|
||||
}
|
||||
|
||||
public static GenomeLoc genomicLocationOf( final SAMRecord read ) {
|
||||
return new GenomeLoc( read.getReferenceName(), read.getAlignmentStart() );
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue