diff --git a/playground/java/build.xml b/playground/java/build.xml index 939b0a989..fecfd682f 100644 --- a/playground/java/build.xml +++ b/playground/java/build.xml @@ -61,7 +61,7 @@ - + diff --git a/playground/java/src/org/broadinstitute/sting/atk/AnalysisTK.java b/playground/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java similarity index 92% rename from playground/java/src/org/broadinstitute/sting/atk/AnalysisTK.java rename to playground/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java index 86ac6ba49..0ff2c2b3e 100644 --- a/playground/java/src/org/broadinstitute/sting/atk/AnalysisTK.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java @@ -1,19 +1,19 @@ -package org.broadinstitute.sting.atk; +package org.broadinstitute.sting.gatk; import net.sf.samtools.SAMFileReader.ValidationStringency; import edu.mit.broad.picard.cmdline.CommandLineProgram; import edu.mit.broad.picard.cmdline.Usage; import edu.mit.broad.picard.cmdline.Option; -import org.broadinstitute.sting.atk.modules.*; -import org.broadinstitute.sting.utils.ReferenceOrderedData; -import org.broadinstitute.sting.utils.rodGFF; -import org.broadinstitute.sting.utils.rodDbSNP; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; +import org.broadinstitute.sting.gatk.refdata.rodGFF; import java.io.*; import java.util.HashMap; -public class AnalysisTK extends CommandLineProgram { +public class GenomeAnalysisTK extends CommandLineProgram { // Usage and parameters @Usage(programVersion="0.1") public String USAGE = "SAM Validator\n"; @Option(shortName="I", doc="SAM or BAM file for validation") public File INPUT_FILE; @@ -49,7 +49,7 @@ public class AnalysisTK extends CommandLineProgram { /** Required main method implementation. */ public static void main(String[] argv) { - System.exit(new AnalysisTK().instanceMain(argv)); + System.exit(new GenomeAnalysisTK().instanceMain(argv)); } protected int doWork() { @@ -75,7 +75,6 @@ public class AnalysisTK extends CommandLineProgram { } this.engine = new TraversalEngine(INPUT_FILE, REF_FILE_ARG, rods); - //engine.testReference(); ValidationStringency strictness; if ( STRICTNESS_ARG == null ) { @@ -96,12 +95,13 @@ public class AnalysisTK extends CommandLineProgram { engine.setDebugging(! ( DEBUGGING_STR == null || DEBUGGING_STR.toLowerCase().equals("true"))); engine.setMaxReads(Integer.parseInt(MAX_READS_ARG)); - engine.initialize(ENABLED_THREADED_IO.toLowerCase().equals("true")); - if ( REGION_STR != null ) { engine.setLocation(REGION_STR); } + engine.initialize(ENABLED_THREADED_IO.toLowerCase().equals("true")); + //engine.testReference(); + //LocusWalker walker = new PileupWalker(); // Try to get the module specified diff --git a/playground/java/src/org/broadinstitute/sting/atk/LocusContext.java b/playground/java/src/org/broadinstitute/sting/gatk/LocusContext.java similarity index 94% rename from playground/java/src/org/broadinstitute/sting/atk/LocusContext.java rename to playground/java/src/org/broadinstitute/sting/gatk/LocusContext.java index d08339cad..bb84463a2 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/LocusContext.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/LocusContext.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.atk; +package org.broadinstitute.sting.gatk; import net.sf.samtools.SAMRecord; diff --git a/playground/java/src/org/broadinstitute/sting/atk/PrepareROD.java b/playground/java/src/org/broadinstitute/sting/gatk/PrepareROD.java similarity index 92% rename from playground/java/src/org/broadinstitute/sting/atk/PrepareROD.java rename to playground/java/src/org/broadinstitute/sting/gatk/PrepareROD.java index c393f2afc..860cd9b78 100644 --- a/playground/java/src/org/broadinstitute/sting/atk/PrepareROD.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/PrepareROD.java @@ -1,16 +1,17 @@ -package org.broadinstitute.sting.atk; +package org.broadinstitute.sting.gatk; -import net.sf.samtools.SAMFileReader.ValidationStringency; import net.sf.samtools.SAMSequenceRecord; import edu.mit.broad.picard.cmdline.CommandLineProgram; import edu.mit.broad.picard.cmdline.Usage; import edu.mit.broad.picard.cmdline.Option; import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory; -import edu.mit.broad.picard.reference.ReferenceSequence; import edu.mit.broad.picard.reference.ReferenceSequenceFile; -import org.broadinstitute.sting.atk.modules.*; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; +import org.broadinstitute.sting.gatk.refdata.rodGFF; import java.io.*; import java.util.HashMap; diff --git a/playground/java/src/org/broadinstitute/sting/atk/TraversalEngine.java b/playground/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java similarity index 92% rename from playground/java/src/org/broadinstitute/sting/atk/TraversalEngine.java rename to playground/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java index b277a0ae1..ec6b12fb2 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/TraversalEngine.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java @@ -1,8 +1,7 @@ -package org.broadinstitute.sting.atk; +package org.broadinstitute.sting.gatk; import net.sf.samtools.*; import net.sf.samtools.SAMFileReader.ValidationStringency; -import net.sf.samtools.util.CloseableIterator; import net.sf.samtools.util.RuntimeIOException; import edu.mit.broad.picard.filter.SamRecordFilter; import edu.mit.broad.picard.filter.FilteringIterator; @@ -10,6 +9,11 @@ import edu.mit.broad.picard.reference.ReferenceSequenceFile; import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory; import edu.mit.broad.picard.reference.ReferenceSequence; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.iterators.*; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import java.io.*; import java.util.*; @@ -22,8 +26,11 @@ import net.sf.functionalj.Functions; import net.sf.functionalj.util.Operators; public class TraversalEngine { - // Usage and parameters - private List rods = null; // list of reference ordered data objects + // list of reference ordered data objects + private List rods = null; + + // Iterator over rods + List rodIters; //private String regionStr = null; // String dec //private String traversalType = null; // String describing this traversal type @@ -262,9 +269,17 @@ public class TraversalEngine { */ public boolean initialize(final boolean THREADED_IO) { lastProgressPrintTime = startTime = System.currentTimeMillis(); - loadReference(); + initializeReference(); + initializeReads(THREADED_IO); + // Initial the reference ordered data iterators + initializeRODs(); + //testReference(); //loadReference(); + return true; + } + + private void initializeReads(final boolean THREADED_IO) { try { final FileInputStream samFileStream = new FileInputStream(readsFile); final InputStream bufferedStream= new BufferedInputStream(samFileStream); @@ -276,8 +291,8 @@ public class TraversalEngine { System.err.println("Sort order is: " + header.getSortOrder()); samReadingTracker = new FileProgressTracker( readsFile, samReader.iterator(), samFileStream.getChannel(), 1000 ); - samReadIter = samReadingTracker; - + samReadIter = new VerifyingSamIterator(samReadingTracker); + if ( THREADED_IO ) { System.out.printf("Enabling threaded I/O with buffer of %d reads%n", THREADED_IO_BUFFER_SIZE); samReadIter = new ThreadedIterator(samReadIter, THREADED_IO_BUFFER_SIZE); @@ -287,8 +302,6 @@ public class TraversalEngine { catch (IOException e) { throw new RuntimeIOException(e); } - - return true; } @@ -296,11 +309,14 @@ public class TraversalEngine { * Prepare the reference for stream processing * */ - protected void loadReference() { + protected void initializeReference() { if ( refFileName!= null ) { this.refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFileName); this.refIter = new ReferenceIterator(this.refFile); - Utils.setupRefContigOrdering(this.refFile); + if ( ! Utils.setupRefContigOrdering(this.refFile) ) { + // We couldn't process the reference contig ordering, fail since we need it + throw new RuntimeException("We couldn't load the contig dictionary associated with %s. At the current time we require this dictionary file to efficiently access the FASTA file. In the near future this program will automatically construct the dictionary for you and save it down."); + } } } @@ -311,7 +327,7 @@ public class TraversalEngine { */ protected List initializeRODs() { // set up reference ordered data - List rodIters = new ArrayList(); + rodIters = new ArrayList(); for ( ReferenceOrderedData data : rods ) { rodIters.add(data.iterator()); } @@ -413,14 +429,12 @@ public class TraversalEngine { //LocusIterator iter = new SingleLocusIterator(filterIter); LocusIterator iter = new LocusIteratorByHanger(filterIter); - // Initial the reference ordered data iterators - List rodIters = initializeRODs(); - // initialize the walker object walker.initialize(); // Initialize the T sum using the walker T sum = walker.reduceInit(); boolean done = false; + GenomeLoc prevLoc = null; while ( iter.hasNext() && ! done ) { this.nRecords++; @@ -429,7 +443,10 @@ public class TraversalEngine { final LocusContext locus = iter.next(); // Poor man's version of index LOL - if ( inLocations(locus.getLocation()) ) { + GenomeLoc curLoc = locus.getLocation(); + if ( inLocations(curLoc) ) { + if ( prevLoc != null && curLoc.compareContigs(prevLoc) != 0 ) + System.out.printf("Traversing to next chromosome...%n"); // Jump forward in the reference to this locus location final ReferenceIterator refSite = refIter.seekForward(locus.getLocation()); @@ -522,3 +539,4 @@ public class TraversalEngine { return 0; } } + diff --git a/playground/java/src/org/broadinstitute/sting/atk/LocusIterator.java b/playground/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java similarity index 74% rename from playground/java/src/org/broadinstitute/sting/atk/LocusIterator.java rename to playground/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java index 7e4a3b29f..34920bb3b 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/LocusIterator.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java @@ -1,16 +1,11 @@ -package org.broadinstitute.sting.atk; +package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.util.CloseableIterator; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.PushbackIterator; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.Predicate; -import org.broadinstitute.sting.utils.GenomeLoc; -import java.util.List; -import java.util.ArrayList; import java.util.Iterator; +import org.broadinstitute.sting.gatk.LocusContext; + /** * Iterator that traverses a SAM File, accumulating information on a per-locus basis */ diff --git a/playground/java/src/org/broadinstitute/sting/atk/LocusIteratorByHanger.java b/playground/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByHanger.java similarity index 88% rename from playground/java/src/org/broadinstitute/sting/atk/LocusIteratorByHanger.java rename to playground/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByHanger.java index a6f35895b..67b27a201 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/LocusIteratorByHanger.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByHanger.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.atk; +package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.util.CloseableIterator; import net.sf.samtools.SAMRecord; @@ -9,6 +9,9 @@ import java.util.List; import java.util.Iterator; import org.broadinstitute.sting.utils.RefHanger; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; +import org.broadinstitute.sting.gatk.iterators.LocusIterator; +import org.broadinstitute.sting.gatk.LocusContext; /** * Iterator that traverses a SAM File, accumulating information on a per-locus basis @@ -27,7 +30,7 @@ public class LocusIteratorByHanger extends LocusIterator { final int INCREMENT_SIZE = 100; final boolean DEBUG = false; - /** + /** sy * Useful class for forwarding on locusContext data from this iterator */ public class MyLocusContext implements LocusContext { @@ -145,15 +148,23 @@ public class LocusIteratorByHanger extends LocusIterator { } private final boolean currentPositionIsFullyCovered() { - final SAMRecord read = it.next(); - GenomeLoc readLoc = new GenomeLoc(read.getReferenceName(), read.getAlignmentStart()); - final boolean coveredP = currentPositionIsFullyCovered(readLoc); - if ( coveredP ) - it.pushback(read); - return coveredP; + if ( ! it.hasNext() ) // if there are no more reads, we are fully covered + return true; + else { + final SAMRecord read = it.peek(); + GenomeLoc readLoc = Utils.genomicLocationOf(read); + final boolean coveredP = currentPositionIsFullyCovered(readLoc); + //System.out.printf("CoverP = %s => %b%n", readLoc, coveredP); + return coveredP; + } } private final void expandWindow(final int incrementSize) { + if ( DEBUG ) { + System.out.printf("entering expandWindow..., hasNext=%b%n", it.hasNext()); + printState(); + } + while ( it.hasNext() ) { if ( DEBUG ) { System.out.printf("Expanding window%n"); @@ -162,7 +173,7 @@ public class LocusIteratorByHanger extends LocusIterator { SAMRecord read = it.next(); - GenomeLoc readLoc = new GenomeLoc(read.getReferenceName(), read.getAlignmentStart()); + GenomeLoc readLoc = Utils.genomicLocationOf(read); if ( DEBUG ) { System.out.printf(" Expanding window sizes %d with %d : left=%s, right=%s, readLoc = %s, cmp=%d%n", readHanger.size(), incrementSize, diff --git a/playground/java/src/org/broadinstitute/sting/utils/PushbackIterator.java b/playground/java/src/org/broadinstitute/sting/gatk/iterators/PushbackIterator.java similarity index 82% rename from playground/java/src/org/broadinstitute/sting/utils/PushbackIterator.java rename to playground/java/src/org/broadinstitute/sting/gatk/iterators/PushbackIterator.java index 1adf13052..6eb524544 100755 --- a/playground/java/src/org/broadinstitute/sting/utils/PushbackIterator.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/iterators/PushbackIterator.java @@ -7,7 +7,7 @@ * This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ -package org.broadinstitute.sting.utils; +package org.broadinstitute.sting.gatk.iterators; import java.util.Iterator; @@ -23,16 +23,25 @@ public class PushbackIterator implements Iterator { return pushedElement != null || underlyingIterator.hasNext(); } + public T peek() { + T x = next(); + pushback(x); + return x; + } + public T next() { if (pushedElement != null) { final T ret = pushedElement; pushedElement = null; return ret; + } else { + return underlyingIterator.next(); } - return underlyingIterator.next(); } public void pushback(T elt) { + assert(pushedElement == null); + pushedElement = elt; } diff --git a/playground/java/src/org/broadinstitute/sting/utils/ReferenceIterator.java b/playground/java/src/org/broadinstitute/sting/gatk/iterators/ReferenceIterator.java similarity index 94% rename from playground/java/src/org/broadinstitute/sting/utils/ReferenceIterator.java rename to playground/java/src/org/broadinstitute/sting/gatk/iterators/ReferenceIterator.java index 83f268cf6..c69a5e926 100755 --- a/playground/java/src/org/broadinstitute/sting/utils/ReferenceIterator.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/iterators/ReferenceIterator.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils; +package org.broadinstitute.sting.gatk.iterators; import edu.mit.broad.picard.reference.ReferenceSequenceFile; import edu.mit.broad.picard.reference.ReferenceSequence; @@ -7,6 +7,8 @@ import net.sf.samtools.util.StringUtil; import java.util.Iterator; import java.util.NoSuchElementException; +import org.broadinstitute.sting.utils.GenomeLoc; + /** * Created by IntelliJ IDEA. * User: depristo @@ -119,6 +121,8 @@ public class ReferenceIterator implements Iterator { } else { while (true) { + //System.out.printf("Seeking to contig %s, cur=%s, next=%s%n", contigName, currentContig.getName(), + // nextContig != null ? nextContig.getName() : "not loaded yet"); // go searching through the reference if ( ! loadNextContig() ) { // never found anything diff --git a/playground/java/src/org/broadinstitute/sting/atk/SingleLocusIterator.java b/playground/java/src/org/broadinstitute/sting/gatk/iterators/SingleLocusIterator.java similarity index 96% rename from playground/java/src/org/broadinstitute/sting/atk/SingleLocusIterator.java rename to playground/java/src/org/broadinstitute/sting/gatk/iterators/SingleLocusIterator.java index 139f447dc..749489d44 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/SingleLocusIterator.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/iterators/SingleLocusIterator.java @@ -1,8 +1,10 @@ -package org.broadinstitute.sting.atk; +package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.util.CloseableIterator; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.PushbackIterator; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; +import org.broadinstitute.sting.gatk.iterators.LocusIterator; +import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Predicate; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/playground/java/src/org/broadinstitute/sting/utils/ThreadedIterator.java b/playground/java/src/org/broadinstitute/sting/gatk/iterators/ThreadedIterator.java similarity index 97% rename from playground/java/src/org/broadinstitute/sting/utils/ThreadedIterator.java rename to playground/java/src/org/broadinstitute/sting/gatk/iterators/ThreadedIterator.java index e88393f47..2866cd041 100755 --- a/playground/java/src/org/broadinstitute/sting/utils/ThreadedIterator.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/iterators/ThreadedIterator.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils; +package org.broadinstitute.sting.gatk.iterators; import java.util.Iterator; import java.util.concurrent.BlockingQueue; diff --git a/playground/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java b/playground/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java new file mode 100644 index 000000000..81ab4d047 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java @@ -0,0 +1,52 @@ +package org.broadinstitute.sting.gatk.iterators; + +import net.sf.samtools.SAMRecord; +import net.sf.samtools.util.RuntimeIOException; + +import java.util.Iterator; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; + +/** + * Created by IntelliJ IDEA. + * User: mdepristo + * Date: Mar 15, 2009 + * Time: 6:02:31 PM + * To change this template use File | Settings | File Templates. + */ +public class VerifyingSamIterator implements Iterator { + Iterator it; + SAMRecord last = null; + boolean checkOrderP = true; + + public VerifyingSamIterator(Iterator it) { + this.it = it; + } + + public boolean hasNext() { return this.it.hasNext(); } + public SAMRecord next() { + + SAMRecord cur = it.next(); + if ( last != null ) + verifyRecord(last, cur); + last = cur; + return cur; + } + + public void verifyRecord( final SAMRecord last, final SAMRecord cur ) { + if ( checkOrderP ) { + GenomeLoc lastLoc = Utils.genomicLocationOf( last ); + GenomeLoc curLoc = Utils.genomicLocationOf( cur ); + + //System.out.printf("VerifyingRecords %s %s%n", lastLoc, curLoc ); + + if ( curLoc.compareTo(lastLoc) == -1 ) + throw new RuntimeIOException(String.format("Reads are out of order:%nlast:%n%s%ncurrent%n%s%n", last.format(), cur.format()) ); + } + } + + public void remove() { + throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); + } +} diff --git a/playground/java/src/org/broadinstitute/sting/utils/ReferenceOrderedData.java b/playground/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java similarity index 97% rename from playground/java/src/org/broadinstitute/sting/utils/ReferenceOrderedData.java rename to playground/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java index c3bffe30a..2263ecfed 100644 --- a/playground/java/src/org/broadinstitute/sting/utils/ReferenceOrderedData.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java @@ -1,7 +1,6 @@ -package org.broadinstitute.sting.utils; +package org.broadinstitute.sting.gatk.refdata; import java.io.File; -import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.util.Iterator; @@ -9,6 +8,8 @@ import java.util.ArrayList; import java.util.Collections; import edu.mit.broad.picard.util.TabbedTextFileParser; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; +import org.broadinstitute.sting.utils.GenomeLoc; /** * Class for representing arbitrary reference ordered data sets diff --git a/playground/java/src/org/broadinstitute/sting/utils/ReferenceOrderedDatum.java b/playground/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDatum.java similarity index 89% rename from playground/java/src/org/broadinstitute/sting/utils/ReferenceOrderedDatum.java rename to playground/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDatum.java index 85b111e46..51b1379b5 100644 --- a/playground/java/src/org/broadinstitute/sting/utils/ReferenceOrderedDatum.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDatum.java @@ -1,4 +1,6 @@ -package org.broadinstitute.sting.utils; +package org.broadinstitute.sting.gatk.refdata; + +import org.broadinstitute.sting.utils.GenomeLoc; /** * Created by IntelliJ IDEA. diff --git a/playground/java/src/org/broadinstitute/sting/utils/rodDbSNP.java b/playground/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java similarity index 95% rename from playground/java/src/org/broadinstitute/sting/utils/rodDbSNP.java rename to playground/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java index 393a42d58..05c0244ba 100644 --- a/playground/java/src/org/broadinstitute/sting/utils/rodDbSNP.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java @@ -1,16 +1,13 @@ -package org.broadinstitute.sting.utils; +package org.broadinstitute.sting.gatk.refdata; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.util.CloseableIterator; -import edu.mit.broad.picard.util.TabbedTextFileParser; import edu.mit.broad.picard.util.SequenceUtil; -import java.io.File; -import java.io.InputStream; -import java.io.FileInputStream; -import java.io.BufferedInputStream; import java.util.*; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; + /** * Example format: * 585 chr1 433 433 rs56289060 0 + - - -/C genomic insertion unknown 0 0 unknown between 1 diff --git a/playground/java/src/org/broadinstitute/sting/utils/rodGFF.java b/playground/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java similarity index 90% rename from playground/java/src/org/broadinstitute/sting/utils/rodGFF.java rename to playground/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java index 7d2515ca5..a82911b57 100644 --- a/playground/java/src/org/broadinstitute/sting/utils/rodGFF.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java @@ -1,16 +1,10 @@ -package org.broadinstitute.sting.utils; +package org.broadinstitute.sting.gatk.refdata; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.util.CloseableIterator; -import edu.mit.broad.picard.util.TabbedTextFileParser; - -import java.io.File; -import java.io.InputStream; -import java.io.FileInputStream; -import java.io.BufferedInputStream; -import java.util.Iterator; import java.util.HashMap; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.utils.GenomeLoc; + /** * Class for representing arbitrary reference ordered data sets * diff --git a/playground/java/src/org/broadinstitute/sting/atk/modules/AlleleFrequencyWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java similarity index 97% rename from playground/java/src/org/broadinstitute/sting/atk/modules/AlleleFrequencyWalker.java rename to playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java index 53247657a..792e2a61f 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/modules/AlleleFrequencyWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java @@ -1,8 +1,7 @@ -package org.broadinstitute.sting.atk.modules; +package org.broadinstitute.sting.gatk.walkers; -import org.broadinstitute.sting.atk.LocusIterator; -import org.broadinstitute.sting.atk.LocusContext; -import org.broadinstitute.sting.utils.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import net.sf.samtools.SAMRecord; import java.util.List; diff --git a/playground/java/src/org/broadinstitute/sting/atk/modules/BaseQualityHistoWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/BaseQualityHistoWalker.java similarity index 87% rename from playground/java/src/org/broadinstitute/sting/atk/modules/BaseQualityHistoWalker.java rename to playground/java/src/org/broadinstitute/sting/gatk/walkers/BaseQualityHistoWalker.java index 5e8e86d3c..215af2871 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/modules/BaseQualityHistoWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/BaseQualityHistoWalker.java @@ -1,8 +1,8 @@ -package org.broadinstitute.sting.atk.modules; +package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.atk.ReadWalker; -import org.broadinstitute.sting.atk.LocusContext; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.LocusContext; /** * Created by IntelliJ IDEA. @@ -27,7 +27,7 @@ public class BaseQualityHistoWalker implements ReadWalker { return true; // We are keeping all the reads } - // Map over the org.broadinstitute.sting.atk.LocusContext + // Map over the org.broadinstitute.sting.gatk.LocusContext public Integer map(LocusContext context, SAMRecord read) { for ( byte qual : read.getBaseQualities() ) { //System.out.println(qual); diff --git a/playground/java/src/org/broadinstitute/sting/atk/modules/BasicLociWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/BasicLociWalker.java similarity index 73% rename from playground/java/src/org/broadinstitute/sting/atk/modules/BasicLociWalker.java rename to playground/java/src/org/broadinstitute/sting/gatk/walkers/BasicLociWalker.java index aff1efdae..3a42d18fe 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/modules/BasicLociWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/BasicLociWalker.java @@ -1,10 +1,8 @@ -package org.broadinstitute.sting.atk.modules; +package org.broadinstitute.sting.gatk.walkers; -import org.broadinstitute.sting.atk.LocusWalker; -import org.broadinstitute.sting.atk.LocusIterator; -import org.broadinstitute.sting.atk.LocusContext; -import org.broadinstitute.sting.utils.ReferenceOrderedDatum; -import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import java.util.List; @@ -27,7 +25,8 @@ public abstract class BasicLociWalker implements LocusWalke return true; // We are keeping all the reads } - public void onTraveralDone() { + public void onTraversalDone() { + ; } // These three capabilities must be overidden diff --git a/playground/java/src/org/broadinstitute/sting/atk/modules/BasicReadWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/BasicReadWalker.java similarity index 79% rename from playground/java/src/org/broadinstitute/sting/atk/modules/BasicReadWalker.java rename to playground/java/src/org/broadinstitute/sting/gatk/walkers/BasicReadWalker.java index 8692f49f5..209aa6773 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/modules/BasicReadWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/BasicReadWalker.java @@ -1,8 +1,8 @@ -package org.broadinstitute.sting.atk.modules; +package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.atk.LocusContext; -import org.broadinstitute.sting.atk.ReadWalker; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; /** * Created by IntelliJ IDEA. @@ -20,8 +20,8 @@ public abstract class BasicReadWalker implements ReadWalker return true; } - public void onTraveralDone() { - + public void onTraversalDone() { + ; } // Three basic abstract function that *must* be overridden diff --git a/playground/java/src/org/broadinstitute/sting/atk/modules/CountLociWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/CountLociWalker.java similarity index 75% rename from playground/java/src/org/broadinstitute/sting/atk/modules/CountLociWalker.java rename to playground/java/src/org/broadinstitute/sting/gatk/walkers/CountLociWalker.java index af3a9d733..bf4c758d1 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/modules/CountLociWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/CountLociWalker.java @@ -1,7 +1,7 @@ -package org.broadinstitute.sting.atk.modules; +package org.broadinstitute.sting.gatk.walkers; -import org.broadinstitute.sting.atk.LocusContext; -import org.broadinstitute.sting.utils.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import java.util.List; diff --git a/playground/java/src/org/broadinstitute/sting/atk/modules/CountReadsWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/CountReadsWalker.java similarity index 77% rename from playground/java/src/org/broadinstitute/sting/atk/modules/CountReadsWalker.java rename to playground/java/src/org/broadinstitute/sting/gatk/walkers/CountReadsWalker.java index 0f40c2c7b..595349688 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/modules/CountReadsWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/CountReadsWalker.java @@ -1,7 +1,7 @@ -package org.broadinstitute.sting.atk.modules; +package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.atk.LocusContext; +import org.broadinstitute.sting.gatk.LocusContext; public class CountReadsWalker extends BasicReadWalker { public Integer map(LocusContext context, SAMRecord read) { diff --git a/playground/java/src/org/broadinstitute/sting/atk/modules/DepthOfCoverageWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java similarity index 76% rename from playground/java/src/org/broadinstitute/sting/atk/modules/DepthOfCoverageWalker.java rename to playground/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java index 0e1f45903..1253f245c 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/modules/DepthOfCoverageWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java @@ -1,12 +1,10 @@ -package org.broadinstitute.sting.atk.modules; +package org.broadinstitute.sting.gatk.walkers; -import org.broadinstitute.sting.atk.LocusContext; -import org.broadinstitute.sting.utils.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import java.util.List; -import net.sf.samtools.SAMRecord; - /** * Created by IntelliJ IDEA. * User: mdepristo diff --git a/playground/java/src/org/broadinstitute/sting/atk/LocusWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java similarity index 74% rename from playground/java/src/org/broadinstitute/sting/atk/LocusWalker.java rename to playground/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index 2888c98d6..2f563f752 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/LocusWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -1,7 +1,7 @@ -package org.broadinstitute.sting.atk; +package org.broadinstitute.sting.gatk.walkers; -import org.broadinstitute.sting.atk.LocusIterator; -import org.broadinstitute.sting.utils.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.LocusContext; import java.util.List; @@ -19,7 +19,7 @@ public interface LocusWalker { // Do we actually want to operate on the context? boolean filter(List rodData, char ref, LocusContext context); - // Map over the org.broadinstitute.sting.atk.LocusContext + // Map over the org.broadinstitute.sting.gatk.LocusContext MapType map(List rodData, char ref, LocusContext context); // Given result of map function diff --git a/playground/java/src/org/broadinstitute/sting/atk/modules/NullWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/NullWalker.java similarity index 62% rename from playground/java/src/org/broadinstitute/sting/atk/modules/NullWalker.java rename to playground/java/src/org/broadinstitute/sting/gatk/walkers/NullWalker.java index 16a810a95..5b0347f4b 100644 --- a/playground/java/src/org/broadinstitute/sting/atk/modules/NullWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/NullWalker.java @@ -1,12 +1,8 @@ -package org.broadinstitute.sting.atk.modules; +package org.broadinstitute.sting.gatk.walkers; -import org.broadinstitute.sting.atk.LocusWalker; -import org.broadinstitute.sting.atk.LocusIterator; -import org.broadinstitute.sting.atk.LocusContext; -import org.broadinstitute.sting.utils.ReferenceOrderedDatum; -import org.broadinstitute.sting.utils.rodDbSNP; -import org.broadinstitute.sting.utils.Utils; -import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import java.util.List; @@ -24,7 +20,7 @@ public class NullWalker implements LocusWalker { return true; // We are keeping all the reads } - // Map over the org.broadinstitute.sting.atk.LocusContext + // Map over the org.broadinstitute.sting.gatk.LocusContext public Integer map(List rodData, char ref, LocusContext context) { return 1; @@ -40,6 +36,6 @@ public class NullWalker implements LocusWalker { return 0; } - public void onTraveralDone() { + public void onTraversalDone() { } } diff --git a/playground/java/src/org/broadinstitute/sting/atk/modules/PileupWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java similarity index 84% rename from playground/java/src/org/broadinstitute/sting/atk/modules/PileupWalker.java rename to playground/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index d44b08c79..a895849c6 100644 --- a/playground/java/src/org/broadinstitute/sting/atk/modules/PileupWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -1,11 +1,8 @@ -package org.broadinstitute.sting.atk.modules; +package org.broadinstitute.sting.gatk.walkers; -import org.broadinstitute.sting.atk.LocusWalker; -import org.broadinstitute.sting.atk.LocusIterator; -import org.broadinstitute.sting.atk.LocusContext; -import org.broadinstitute.sting.utils.ReferenceOrderedDatum; -import org.broadinstitute.sting.utils.rodDbSNP; -import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import net.sf.samtools.SAMRecord; import java.util.List; @@ -17,7 +14,7 @@ import java.util.List; * Time: 3:22:14 PM * To change this template use File | Settings | File Templates. */ -public class PileupWalker implements LocusWalker { +public class PileupWalker extends BasicLociWalker { public void initialize() { } @@ -28,7 +25,7 @@ public class PileupWalker implements LocusWalker { return true; // We are keeping all the reads } - // Map over the org.broadinstitute.sting.atk.LocusContext + // Map over the org.broadinstitute.sting.gatk.LocusContext public Integer map(List rodData, char ref, LocusContext context) { //System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size()); //for ( SAMRecord read : context.getReads() ) { @@ -84,7 +81,4 @@ public class PileupWalker implements LocusWalker { public Integer reduce(Integer value, Integer sum) { return value + sum; } - - public void onTraveralDone() { - } } diff --git a/playground/java/src/org/broadinstitute/sting/atk/modules/PrintReadsWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java similarity index 79% rename from playground/java/src/org/broadinstitute/sting/atk/modules/PrintReadsWalker.java rename to playground/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index b709d5563..4aff75943 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/modules/PrintReadsWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -1,7 +1,7 @@ -package org.broadinstitute.sting.atk.modules; +package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.atk.LocusContext; +import org.broadinstitute.sting.gatk.LocusContext; public class PrintReadsWalker extends BasicReadWalker { public Integer map(LocusContext context, SAMRecord read) { diff --git a/playground/java/src/org/broadinstitute/sting/atk/ReadWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java similarity index 79% rename from playground/java/src/org/broadinstitute/sting/atk/ReadWalker.java rename to playground/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java index bce2333c6..14cecfa96 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/ReadWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java @@ -1,7 +1,7 @@ -package org.broadinstitute.sting.atk; +package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.atk.LocusContext; +import org.broadinstitute.sting.gatk.LocusContext; /** * Created by IntelliJ IDEA. @@ -17,7 +17,7 @@ public interface ReadWalker { // Do we actually want to operate on the context? boolean filter(LocusContext context, SAMRecord read); - // Map over the org.broadinstitute.sting.atk.LocusContext + // Map over the org.broadinstitute.sting.gatk.LocusContext MapType map(LocusContext context, SAMRecord read); // Given result of map function diff --git a/playground/java/src/org/broadinstitute/sting/atk/modules/SingleSampleGenotyper.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/SingleSampleGenotyper.java similarity index 86% rename from playground/java/src/org/broadinstitute/sting/atk/modules/SingleSampleGenotyper.java rename to playground/java/src/org/broadinstitute/sting/gatk/walkers/SingleSampleGenotyper.java index f9c9f7a4b..3b483f6c1 100644 --- a/playground/java/src/org/broadinstitute/sting/atk/modules/SingleSampleGenotyper.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/SingleSampleGenotyper.java @@ -1,10 +1,8 @@ -package org.broadinstitute.sting.atk.modules; +package org.broadinstitute.sting.gatk.walkers; -import org.broadinstitute.sting.atk.LocusWalker; -import org.broadinstitute.sting.atk.LocusIterator; -import org.broadinstitute.sting.atk.LocusContext; -import org.broadinstitute.sting.utils.ReferenceOrderedDatum; -import org.broadinstitute.sting.utils.rodDbSNP; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.utils.Utils; import net.sf.samtools.SAMRecord; @@ -13,13 +11,7 @@ import java.util.List; // Draft single sample genotyper // j.maguire 3-7-2009 -public class SingleSampleGenotyper implements LocusWalker { - public void initialize() { - } - - public String walkerType() { return "ByLocus"; } - - // Do we actually want to operate on the context? +public class SingleSampleGenotyper extends BasicLociWalker { public boolean filter(List rodData, char ref, LocusContext context) { return true; // We are keeping all the reads } @@ -86,7 +78,7 @@ public class SingleSampleGenotyper implements LocusWalker { } - // Map over the org.broadinstitute.sting.atk.LocusContext + // Map over the org.broadinstitute.sting.gatk.LocusContext public Integer map(List rodData, char ref, LocusContext context) { //System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size()); //for ( SAMRecord read : context.getReads() ) { @@ -105,7 +97,7 @@ public class SingleSampleGenotyper implements LocusWalker { { if ( datum != null ) { - if ( datum instanceof rodDbSNP) + if ( datum instanceof rodDbSNP) { rodDbSNP dbsnp = (rodDbSNP)datum; rodString += dbsnp.toMediumString(); @@ -144,7 +136,4 @@ public class SingleSampleGenotyper implements LocusWalker { public Integer reduce(Integer value, Integer sum) { return value + sum; } - - public void onTraveralDone() { - } } diff --git a/playground/java/src/org/broadinstitute/sting/utils/Utils.java b/playground/java/src/org/broadinstitute/sting/utils/Utils.java index 6458459bc..6f03a989e 100755 --- a/playground/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/playground/java/src/org/broadinstitute/sting/utils/Utils.java @@ -29,6 +29,10 @@ public class Utils { return filtered; } + public static GenomeLoc genomicLocationOf( final SAMRecord read ) { + return new GenomeLoc( read.getReferenceName(), read.getAlignmentStart() ); + } + private static final Map readFlagNames = new HashMap(); @@ -100,7 +104,7 @@ public class Utils { return average(vals, vals.size()); } - public static void setupRefContigOrdering(final ReferenceSequenceFile refFile) { + public static boolean setupRefContigOrdering(final ReferenceSequenceFile refFile) { List refContigs = refFile.getSequenceDictionary(); HashMap refContigOrdering = new HashMap(); @@ -116,6 +120,7 @@ public class Utils { } GenomeLoc.setContigOrdering(refContigOrdering); + return refContigOrdering != null; } // Java Generics can't do primitive types, so I had to do this the simplistic way diff --git a/playground/shell/TraverseTest.sh b/playground/shell/TraverseTest.sh index cd4a42a54..031151ec4 100755 --- a/playground/shell/TraverseTest.sh +++ b/playground/shell/TraverseTest.sh @@ -1 +1 @@ -java -Xmx4096m -jar ../java/dist/AnalysisTK.jar $* +java -Xmx8192m -jar dist/GenomeAnalysisTK.jar $* diff --git a/playground/shell/TraverseTestProf.sh b/playground/shell/TraverseTestProf.sh index f6e2398ca..a0478611c 100755 --- a/playground/shell/TraverseTestProf.sh +++ b/playground/shell/TraverseTestProf.sh @@ -1 +1 @@ -java -Xmx4096m -agentlib:hprof=cpu=samples -jar ../java/dist/AnalysisTK.jar $* +java -Xmx4096m -agentlib:hprof=cpu=samples,depth=10 -jar dist/GenomeAnalysisTK.jar $*