diff --git a/playground/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java b/playground/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java index 0ff2c2b3e..134d39816 100644 --- a/playground/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java @@ -42,6 +42,7 @@ public class GenomeAnalysisTK extends CommandLineProgram { addModule("SingleSampleGenotyper", new SingleSampleGenotyper()); addModule("Null", new NullWalker()); addModule("DepthOfCoverage", new DepthOfCoverageWalker()); + addModule("CountMismatches", new MismatchCounterWalker()); } private TraversalEngine engine = null; diff --git a/playground/java/src/org/broadinstitute/sting/gatk/LocusContext.java b/playground/java/src/org/broadinstitute/sting/gatk/LocusContext.java index bb84463a2..c17815ac2 100755 --- a/playground/java/src/org/broadinstitute/sting/gatk/LocusContext.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/LocusContext.java @@ -3,27 +3,99 @@ package org.broadinstitute.sting.gatk; import net.sf.samtools.SAMRecord; import java.util.List; +import java.lang.ref.Reference; import org.broadinstitute.sting.utils.GenomeLoc; +import edu.mit.broad.picard.reference.ReferenceSequence; /** + * Useful class for forwarding on locusContext data from this iterator + * * Created by IntelliJ IDEA. * User: mdepristo * Date: Feb 22, 2009 * Time: 3:01:34 PM * To change this template use File | Settings | File Templates. */ -public interface LocusContext { - // get all of the reads within this context - public List getReads(); +public class LocusContext { + private GenomeLoc loc = null; + private List reads = null; + private List offsets = null; + private ReferenceSequence refContig = null; - // get a list of the equivalent positions within in the reads at Pos - public List getOffsets(); + /** + * Create a new LocusContext object + * + * @param loc + * @param reads + * @param offsets + */ + public LocusContext(GenomeLoc loc, List reads, List offsets) { + this.loc = loc; + this.reads = reads; + this.offsets = offsets; + } + /** + * get all of the reads within this context + * + * @return + */ + public List getReads() { return reads; } - public String getContig(); - public long getPosition(); - public GenomeLoc getLocation(); + /** + * Are there any reads associated with this locus? + * + * @return + */ + public boolean hasReads() { + return reads != null; + } - public int numReads(); + /** + * How many reads cover this locus? + * @return + */ + public int numReads() { + assert( reads != null ); + return reads.size(); + } + + /** + * get a list of the equivalent positions within in the reads at Pos + * + * @return + */ + public List getOffsets() { + return offsets; + } + + public String getContig() { return getLocation().getContig(); } + public long getPosition() { return getLocation().getStart(); } + public GenomeLoc getLocation() { return loc; } + + /** + * Returns the entire reference sequence contig associated with these reads + * + * @return ReferenceSequence object, or null if unavailable + */ + public ReferenceSequence getReferenceContig() { + return refContig; + } + + /** + * @return True if reference sequence contig is available + */ + public boolean hasReferenceContig() { + return refContig != null; + } + + /** + * Sets the reference sequence for this locus to contig + * + * @param contig + */ + public void setReferenceContig(final ReferenceSequence contig) { + refContig = contig; + } } diff --git a/playground/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java b/playground/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java index ec6b12fb2..1c2a71580 100755 --- a/playground/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java @@ -451,6 +451,7 @@ public class TraversalEngine { // Jump forward in the reference to this locus location final ReferenceIterator refSite = refIter.seekForward(locus.getLocation()); final char refBase = refSite.getBaseAsChar(); + locus.setReferenceContig(refSite.getCurrentContig()); // Iterate forward to get all reference ordered data covering this locus final List rodData = getReferenceOrderedDataAtLocus(rodIters, locus.getLocation()); @@ -502,6 +503,7 @@ public class TraversalEngine { // Initialize the sum R sum = walker.reduceInit(); + List offsets = Arrays.asList(0); // Offset of a single read is always 0 boolean done = false; while ( samReadIter.hasNext() && ! done ) { @@ -509,16 +511,23 @@ public class TraversalEngine { // get the next read final SAMRecord read = samReadIter.next(); - GenomeLoc loc = new GenomeLoc(read.getReferenceName(), read.getAlignmentStart()); + final List reads = Arrays.asList(read); + GenomeLoc loc = Utils.genomicLocationOf(read); + + // Jump forward in the reference to this locus location + final ReferenceIterator refSite = refIter.seekForward(loc); + final char refBase = refSite.getBaseAsChar(); + LocusContext locus = new LocusContext(loc, reads, offsets); + locus.setReferenceContig(refSite.getCurrentContig()); if ( inLocations(loc) ) { // // execute the walker contact // - final boolean keepMeP = walker.filter(null, read); + final boolean keepMeP = walker.filter(locus, read); if ( keepMeP ) { - M x = walker.map(null, read); + M x = walker.map(locus, read); sum = walker.reduce(x, sum); } diff --git a/playground/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByHanger.java b/playground/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByHanger.java index 67b27a201..edf8b5686 100755 --- a/playground/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByHanger.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByHanger.java @@ -30,29 +30,6 @@ public class LocusIteratorByHanger extends LocusIterator { final int INCREMENT_SIZE = 100; final boolean DEBUG = false; - /** sy - * Useful class for forwarding on locusContext data from this iterator - */ - public class MyLocusContext implements LocusContext { - GenomeLoc loc = null; - private List reads = null; - private List offsets = null; - - private MyLocusContext(GenomeLoc loc, List reads, List offsets) { - this.loc = loc; - this.reads = reads; - this.offsets = offsets; - } - - public String getContig() { return getLocation().getContig(); } - public long getPosition() { return getLocation().getStart(); } - public GenomeLoc getLocation() { return loc; } - - public List getReads() { return reads; } - public List getOffsets() { return offsets; } - public int numReads() { return reads.size(); } - } - // ----------------------------------------------------------------------------------------------------------------- // // constructors and other basic operations @@ -95,7 +72,7 @@ public class LocusIteratorByHanger extends LocusIterator { // next() routine and associated collection operations // // ----------------------------------------------------------------------------------------------------------------- - public MyLocusContext next() { + public LocusContext next() { if ( ! currentPositionIsFullyCovered() ) expandWindow(INCREMENT_SIZE); @@ -107,7 +84,7 @@ public class LocusIteratorByHanger extends LocusIterator { RefHanger.Hanger rhanger = readHanger.popLeft(); RefHanger.Hanger ohanger = offsetHanger.popLeft(); - return new MyLocusContext(rhanger.loc, rhanger.data, ohanger.data); + return new LocusContext(rhanger.loc, rhanger.data, ohanger.data); } protected void hangRead(final SAMRecord read) { diff --git a/playground/java/src/org/broadinstitute/sting/gatk/iterators/SingleLocusIterator.java b/playground/java/src/org/broadinstitute/sting/gatk/iterators/SingleLocusIterator.java index 749489d44..b6037d36f 100755 --- a/playground/java/src/org/broadinstitute/sting/gatk/iterators/SingleLocusIterator.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/iterators/SingleLocusIterator.java @@ -19,7 +19,7 @@ import edu.mit.broad.picard.filter.FilteringIterator; /** * Iterator that traverses a SAM File, accumulating information on a per-locus basis */ -public class SingleLocusIterator extends LocusIterator implements LocusContext { +public class SingleLocusIterator extends LocusIterator { // ----------------------------------------------------------------------------------------------------------------- // @@ -32,14 +32,6 @@ public class SingleLocusIterator extends LocusIterator implements LocusContext { private List reads = new ArrayList(100); private List offsets = new ArrayList(100); - public String getContig() { return contig; } - public long getPosition() { return position; } - public GenomeLoc getLocation() { return new GenomeLoc(contig, position); } - - public List getReads() { return reads; } - public List getOffsets() { return offsets; } - public int numReads() { return reads.size(); } - // ----------------------------------------------------------------------------------------------------------------- // // constructors and other basic operations @@ -97,7 +89,7 @@ public class SingleLocusIterator extends LocusIterator implements LocusContext { // at this point, window contains all reads covering the pos, we need to return them // and the offsets into each read for this loci calcOffsetsOfWindow(position); - return this; + return new LocusContext(new GenomeLoc(contig, position), reads, offsets); } } diff --git a/playground/java/src/org/broadinstitute/sting/utils/Utils.java b/playground/java/src/org/broadinstitute/sting/utils/Utils.java index 6f03a989e..4616d791e 100755 --- a/playground/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/playground/java/src/org/broadinstitute/sting/utils/Utils.java @@ -29,6 +29,28 @@ public class Utils { return filtered; } + public static ArrayList subseq(byte[] fullArray) { + return subseq(fullArray, 0, fullArray.length); + } + + public static ArrayList subseq(byte[] fullArray, int start, int end) { + ArrayList dest = new ArrayList(end-start+1); + for ( int i = start; i < end; i++ ) { + dest.add(fullArray[i]); + } + return dest; + } + + public static String baseList2string(List bases) { + byte[] basesAsbytes = new byte[bases.size()]; + int i = 0; + for ( Byte b : bases ) { + basesAsbytes[i] = b; + i++; + } + return new String(basesAsbytes); + } + public static GenomeLoc genomicLocationOf( final SAMRecord read ) { return new GenomeLoc( read.getReferenceName(), read.getAlignmentStart() ); }