Added support for accessing the reference in read traversal

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@60 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-03-16 14:46:19 +00:00
parent feb70ff627
commit 3b5003bd11
6 changed files with 120 additions and 47 deletions

View File

@ -42,6 +42,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
addModule("SingleSampleGenotyper", new SingleSampleGenotyper());
addModule("Null", new NullWalker());
addModule("DepthOfCoverage", new DepthOfCoverageWalker());
addModule("CountMismatches", new MismatchCounterWalker());
}
private TraversalEngine engine = null;

View File

@ -3,27 +3,99 @@ package org.broadinstitute.sting.gatk;
import net.sf.samtools.SAMRecord;
import java.util.List;
import java.lang.ref.Reference;
import org.broadinstitute.sting.utils.GenomeLoc;
import edu.mit.broad.picard.reference.ReferenceSequence;
/**
* Useful class for forwarding on locusContext data from this iterator
*
* Created by IntelliJ IDEA.
* User: mdepristo
* Date: Feb 22, 2009
* Time: 3:01:34 PM
* To change this template use File | Settings | File Templates.
*/
public interface LocusContext {
// get all of the reads within this context
public List<SAMRecord> getReads();
public class LocusContext {
private GenomeLoc loc = null;
private List<SAMRecord> reads = null;
private List<Integer> offsets = null;
private ReferenceSequence refContig = null;
// get a list of the equivalent positions within in the reads at Pos
public List<Integer> getOffsets();
/**
* Create a new LocusContext object
*
* @param loc
* @param reads
* @param offsets
*/
public LocusContext(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets) {
this.loc = loc;
this.reads = reads;
this.offsets = offsets;
}
/**
* get all of the reads within this context
*
* @return
*/
public List<SAMRecord> getReads() { return reads; }
public String getContig();
public long getPosition();
public GenomeLoc getLocation();
/**
* Are there any reads associated with this locus?
*
* @return
*/
public boolean hasReads() {
return reads != null;
}
public int numReads();
/**
* How many reads cover this locus?
* @return
*/
public int numReads() {
assert( reads != null );
return reads.size();
}
/**
* get a list of the equivalent positions within in the reads at Pos
*
* @return
*/
public List<Integer> getOffsets() {
return offsets;
}
public String getContig() { return getLocation().getContig(); }
public long getPosition() { return getLocation().getStart(); }
public GenomeLoc getLocation() { return loc; }
/**
* Returns the entire reference sequence contig associated with these reads
*
* @return ReferenceSequence object, or null if unavailable
*/
public ReferenceSequence getReferenceContig() {
return refContig;
}
/**
* @return True if reference sequence contig is available
*/
public boolean hasReferenceContig() {
return refContig != null;
}
/**
* Sets the reference sequence for this locus to contig
*
* @param contig
*/
public void setReferenceContig(final ReferenceSequence contig) {
refContig = contig;
}
}

View File

@ -451,6 +451,7 @@ public class TraversalEngine {
// Jump forward in the reference to this locus location
final ReferenceIterator refSite = refIter.seekForward(locus.getLocation());
final char refBase = refSite.getBaseAsChar();
locus.setReferenceContig(refSite.getCurrentContig());
// Iterate forward to get all reference ordered data covering this locus
final List<ReferenceOrderedDatum> rodData = getReferenceOrderedDataAtLocus(rodIters, locus.getLocation());
@ -502,6 +503,7 @@ public class TraversalEngine {
// Initialize the sum
R sum = walker.reduceInit();
List<Integer> offsets = Arrays.asList(0); // Offset of a single read is always 0
boolean done = false;
while ( samReadIter.hasNext() && ! done ) {
@ -509,16 +511,23 @@ public class TraversalEngine {
// get the next read
final SAMRecord read = samReadIter.next();
GenomeLoc loc = new GenomeLoc(read.getReferenceName(), read.getAlignmentStart());
final List<SAMRecord> reads = Arrays.asList(read);
GenomeLoc loc = Utils.genomicLocationOf(read);
// Jump forward in the reference to this locus location
final ReferenceIterator refSite = refIter.seekForward(loc);
final char refBase = refSite.getBaseAsChar();
LocusContext locus = new LocusContext(loc, reads, offsets);
locus.setReferenceContig(refSite.getCurrentContig());
if ( inLocations(loc) ) {
//
// execute the walker contact
//
final boolean keepMeP = walker.filter(null, read);
final boolean keepMeP = walker.filter(locus, read);
if ( keepMeP ) {
M x = walker.map(null, read);
M x = walker.map(locus, read);
sum = walker.reduce(x, sum);
}

View File

@ -30,29 +30,6 @@ public class LocusIteratorByHanger extends LocusIterator {
final int INCREMENT_SIZE = 100;
final boolean DEBUG = false;
/** sy
* Useful class for forwarding on locusContext data from this iterator
*/
public class MyLocusContext implements LocusContext {
GenomeLoc loc = null;
private List<SAMRecord> reads = null;
private List<Integer> offsets = null;
private MyLocusContext(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets) {
this.loc = loc;
this.reads = reads;
this.offsets = offsets;
}
public String getContig() { return getLocation().getContig(); }
public long getPosition() { return getLocation().getStart(); }
public GenomeLoc getLocation() { return loc; }
public List<SAMRecord> getReads() { return reads; }
public List<Integer> getOffsets() { return offsets; }
public int numReads() { return reads.size(); }
}
// -----------------------------------------------------------------------------------------------------------------
//
// constructors and other basic operations
@ -95,7 +72,7 @@ public class LocusIteratorByHanger extends LocusIterator {
// next() routine and associated collection operations
//
// -----------------------------------------------------------------------------------------------------------------
public MyLocusContext next() {
public LocusContext next() {
if ( ! currentPositionIsFullyCovered() )
expandWindow(INCREMENT_SIZE);
@ -107,7 +84,7 @@ public class LocusIteratorByHanger extends LocusIterator {
RefHanger.Hanger rhanger = readHanger.popLeft();
RefHanger.Hanger ohanger = offsetHanger.popLeft();
return new MyLocusContext(rhanger.loc, rhanger.data, ohanger.data);
return new LocusContext(rhanger.loc, rhanger.data, ohanger.data);
}
protected void hangRead(final SAMRecord read) {

View File

@ -19,7 +19,7 @@ import edu.mit.broad.picard.filter.FilteringIterator;
/**
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
*/
public class SingleLocusIterator extends LocusIterator implements LocusContext {
public class SingleLocusIterator extends LocusIterator {
// -----------------------------------------------------------------------------------------------------------------
//
@ -32,14 +32,6 @@ public class SingleLocusIterator extends LocusIterator implements LocusContext {
private List<SAMRecord> reads = new ArrayList<SAMRecord>(100);
private List<Integer> offsets = new ArrayList<Integer>(100);
public String getContig() { return contig; }
public long getPosition() { return position; }
public GenomeLoc getLocation() { return new GenomeLoc(contig, position); }
public List<SAMRecord> getReads() { return reads; }
public List<Integer> getOffsets() { return offsets; }
public int numReads() { return reads.size(); }
// -----------------------------------------------------------------------------------------------------------------
//
// constructors and other basic operations
@ -97,7 +89,7 @@ public class SingleLocusIterator extends LocusIterator implements LocusContext {
// at this point, window contains all reads covering the pos, we need to return them
// and the offsets into each read for this loci
calcOffsetsOfWindow(position);
return this;
return new LocusContext(new GenomeLoc(contig, position), reads, offsets);
}
}

View File

@ -29,6 +29,28 @@ public class Utils {
return filtered;
}
public static ArrayList<Byte> subseq(byte[] fullArray) {
return subseq(fullArray, 0, fullArray.length);
}
public static ArrayList<Byte> subseq(byte[] fullArray, int start, int end) {
ArrayList<Byte> dest = new ArrayList<Byte>(end-start+1);
for ( int i = start; i < end; i++ ) {
dest.add(fullArray[i]);
}
return dest;
}
public static String baseList2string(List<Byte> bases) {
byte[] basesAsbytes = new byte[bases.size()];
int i = 0;
for ( Byte b : bases ) {
basesAsbytes[i] = b;
i++;
}
return new String(basesAsbytes);
}
public static GenomeLoc genomicLocationOf( final SAMRecord read ) {
return new GenomeLoc( read.getReferenceName(), read.getAlignmentStart() );
}