diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIterator.java
new file mode 100644
index 000000000..6f677d1cd
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIterator.java
@@ -0,0 +1,139 @@
+package org.broadinstitute.sting.utils.sam;
+
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ *
+ * Class ArtificialPatternedSAMIterator
+ *
+ * A descriptions should go here. Blame aaron if it's missing.
+ */
+public class ArtificialPatternedSAMIterator extends ArtificialSAMIterator {
+
+ /** the pattern we're implementing */
+ public enum PATTERN {
+ RANDOM_READS, IN_ORDER_READS;
+ }
+
+ // our pattern
+ private final PATTERN mPattern;
+
+ /**
+ * this is pretty heavy (and it could be extremely heavy, given the amount of reads they request, but it
+ * allows us to give them each read once, reguardless of the order specified
+ */
+ private final int[] reads;
+ private final int readCount;
+
+ /**
+ * create the fake iterator, given the mapping of chromosomes and read counts. If pattern
+ * is specified to be random, it will generate reads that are randomly placed on the current chromosome
+ *
+ * @param startingChr the starting chromosome
+ * @param endingChr the ending chromosome
+ * @param readCount the number of reads in each chromosome
+ * @param header the associated header
+ * @param pattern the pattern to implement
+ */
+ ArtificialPatternedSAMIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount, SAMFileHeader header, PATTERN pattern ) {
+ super(startingChr, endingChr, readCount, unmappedReadCount, header);
+ mPattern = pattern;
+ this.readCount = readCount;
+ reads = new int[readCount];
+
+ for (int x = 0; x < readCount; x++) {
+ reads[x] = x;
+ }
+ if (pattern == PATTERN.RANDOM_READS) {
+ // scramble a bunch of the reads
+ for (int y = 0; y < readCount; y++) {
+ int ranOne = (int) Math.round(Math.random() * ( readCount - 1 ));
+ int ranTwo = (int) Math.round(Math.random() * ( readCount - 1 ));
+ int temp = reads[ranOne];
+ reads[ranOne] = reads[ranTwo];
+ reads[ranTwo] = temp;
+ }
+ /**
+ * up to this point there's no garauntee that the random() has made the reads out of order (though it's
+ * extremely extremely unlikely it's failed). Let's make sure there at least out of order:
+ */
+ if (this.reads[0] < this.reads[reads.length -1]) {
+ int temp = reads[0];
+ reads[0] = reads[reads.length -1];
+ reads[reads.length -1] = temp;
+ }
+
+ }
+
+ }
+
+ /**
+ * override the default ArtificialSAMIterator createNextRead method, which creates the next read
+ * @return
+ */
+ protected boolean createNextRead() {
+ if (currentRead >= rCount) {
+ currentChromo++;
+ currentRead = 0;
+ }
+ // check for end condition, have we finished the chromosome listing, and have no unmapped reads
+ if (currentChromo >= eChromosomeCount) {
+ if (uMappedReadCount < 1) {
+ this.next = null;
+ return false;
+ } else {
+ ++totalReadCount;
+ this.next = ArtificialSamUtils.createArtificialRead(this.header, String.valueOf(totalReadCount), -1, -1, 50);
+ --uMappedReadCount;
+ return true;
+ }
+ }
+ ++totalReadCount;
+ this.next = getNextRecord(currentRead);
+
+ ++currentRead;
+ return true;
+ }
+
+
+ /**
+ * get the next read, given it's index in the chromosome
+ * @param read the read index in the chromosome
+ * @return a SAMRecord
+ */
+ private SAMRecord getNextRecord( int read ) {
+ if (read > this.readCount) {
+ return ArtificialSamUtils.createArtificialRead(this.header, String.valueOf(reads[readCount - 1]), currentChromo, reads[readCount - 1], 50);
+ }
+ return ArtificialSamUtils.createArtificialRead(this.header, String.valueOf(reads[read]), currentChromo, reads[read], 50);
+ }
+
+}
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java
new file mode 100644
index 000000000..9d56c3c26
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java
@@ -0,0 +1,144 @@
+package org.broadinstitute.sting.utils.sam;
+
+import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
+import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
+import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.gatk.walkers.ReadWalker;
+import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
+import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
+import org.broadinstitute.sting.gatk.LocusContext;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.apache.log4j.Logger;
+
+import java.util.Arrays;
+
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMFileHeader;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ *
+ * Class ArtificialReadsTraversal
+ *
+ * A descriptions should go here. Blame aaron if it's missing.
+ */
+public class ArtificialReadsTraversal extends TraversalEngine {
+
+ public int startingChr = 1;
+ public int endingChr = 5;
+ public int readsPerChr = 100;
+ public int unMappedReads = 1000;
+ private int DEFAULT_READ_LENGTH = ArtificialSamUtils.DEFAULT_READ_LENGTH;
+ private ArtificialPatternedSAMIterator iter;
+ /** our log, which we want to capture anything from this class */
+ protected static Logger logger = Logger.getLogger(ArtificialReadsTraversal.class);
+
+ /** Creates a new, uninitialized ArtificialReadsTraversal */
+ public ArtificialReadsTraversal() {
+ super(null, null, null);
+ }
+
+ // what read ordering are we using
+ private ArtificialPatternedSAMIterator.PATTERN readOrder = ArtificialPatternedSAMIterator.PATTERN.IN_ORDER_READS;
+
+
+ /**
+ * set the read ordering of the reads given to the walker
+ *
+ * @param readOrdering
+ */
+ public void setReadOrder( ArtificialPatternedSAMIterator.PATTERN readOrdering ) {
+ readOrder = readOrdering;
+ }
+
+ /**
+ * Traverse by reads, given the data and the walker
+ *
+ * @param walker the walker to traverse with
+ * @param shard the shard, specifying the range of data to iterate over
+ * @param dataProvider the provider of the reads data
+ * @param sum the value of type T, specified by the walker, to feed to the walkers reduce function
+ * @param the map type of the walker
+ * @param the reduce type of the walker
+ *
+ * @return the reduce variable of the read walker
+ */
+ public T traverse( Walker walker,
+ Shard shard,
+ ShardDataProvider dataProvider,
+ T sum ) {
+
+ if (!( walker instanceof ReadWalker ))
+ throw new IllegalArgumentException("Walker isn't a read walker!");
+
+ ReadWalker readWalker = (ReadWalker) walker;
+ SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readsPerChr + DEFAULT_READ_LENGTH);
+ iter = new ArtificialPatternedSAMIterator(this.startingChr,
+ this.endingChr,
+ this.readsPerChr,
+ this.unMappedReads,
+ header,
+ this.readOrder);
+
+ // while we still have more reads
+ for (SAMRecord read : iter) {
+
+ // our locus context
+ LocusContext locus = null;
+
+ // an array of characters that represent the reference
+ char[] refSeq = null;
+
+ // update the number of reads we've seen
+ TraversalStatistics.nRecords++;
+
+ final boolean keepMeP = readWalker.filter(refSeq, read);
+ if (keepMeP) {
+ M x = readWalker.map(refSeq, read);
+ sum = readWalker.reduce(x, sum);
+ }
+
+ if (locus != null) { printProgress("loci", locus.getLocation()); }
+ }
+ return sum;
+ }
+
+ /**
+ * Temporary override of printOnTraversalDone.
+ * TODO: Add some sort of TE.getName() function once all TraversalEngines are ported.
+ *
+ * @param sum Result of the computation.
+ * @param Type of the result.
+ */
+ public void printOnTraversalDone( T sum ) {
+ printOnTraversalDone("reads", sum);
+ }
+
+
+}
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java
index e371749f9..f727581c4 100644
--- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java
@@ -109,7 +109,7 @@ public class ArtificialSAMIterator implements PeekingStingIterator {
return false;
}
- private boolean createNextRead() {
+ protected boolean createNextRead() {
if (currentRead >= rCount) {
currentChromo++;
currentRead = 0;
@@ -143,6 +143,10 @@ public class ArtificialSAMIterator implements PeekingStingIterator {
throw new UnsupportedOperationException("You've tried to remove on a StingSAMIterator (unsupported), not to mention that this is a fake iterator.");
}
+ /**
+ * return this iterator, for the iterable interface
+ * @return
+ */
public Iterator iterator() {
return this;
}
diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java
new file mode 100644
index 000000000..b8e39f5e5
--- /dev/null
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java
@@ -0,0 +1,118 @@
+package org.broadinstitute.sting.gatk.walkers;
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.executive.Accumulator;
+import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
+import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
+import org.broadinstitute.sting.utils.sam.ArtificialReadsTraversal;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter;
+import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
+import org.junit.Before;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMFileHeader;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ *
+ * Class PrintReadsWalkerTest
+ *
+ * This tests the print reads walker, using the artificial reads traversal
+ */
+public class PrintReadsWalkerTest extends BaseTest {
+
+ /**
+ * our private fake reads traversal. This traversal seeds the
+ * the walker with a specified number of fake reads, which are named sequentially
+ */
+ private ArtificialReadsTraversal trav;
+ private int readTotal = 0;
+ private char bases[] = {'a', 't'};
+
+ @Before
+ public void before() {
+ trav = new ArtificialReadsTraversal();
+ readTotal = ( ( trav.endingChr - trav.startingChr ) + 1 ) * trav.readsPerChr + trav.unMappedReads;
+ }
+
+ /** test that we get out the same number of reads we put in */
+ @Test
+ public void testReadCount() {
+ PrintReadsWalker walker = new PrintReadsWalker();
+ ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
+ trav.traverse(walker, (Shard) null, (ShardDataProvider) null, writer);
+ assertEquals(readTotal, writer.getRecords().size());
+ }
+
+ /** test that we're ok with a null read */
+ @Test
+ public void testNullRead() {
+ PrintReadsWalker walker = new PrintReadsWalker();
+
+ SAMRecord rec = walker.map(bases, null);
+ assertTrue(rec == null);
+ }
+
+ /** tes that we get the read we put into the map function */
+ @Test
+ public void testReturnRead() {
+ PrintReadsWalker walker = new PrintReadsWalker();
+ SAMFileHeader head = ArtificialSamUtils.createArtificialSamHeader(3,1,1000);
+ SAMRecord rec = ArtificialSamUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
+ SAMRecord ret = walker.map(bases, rec);
+ assertTrue(ret == rec);
+ assertTrue(ret.getReadName().equals(rec.getReadName()));
+ }
+
+ /** test that the read makes it to the output source */
+ @Test
+ public void testReducingRead() {
+ PrintReadsWalker walker = new PrintReadsWalker();
+ SAMFileHeader head = ArtificialSamUtils.createArtificialSamHeader(3,1,1000);
+ SAMRecord rec = ArtificialSamUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
+ ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
+ SAMRecord ret = walker.map(bases, null);
+ walker.reduce(ret,writer);
+
+ assertTrue(writer.getRecords().size() == 1);
+ }
+
+ /** test that we close the output source */
+ @Test
+ public void testClosingOnTraversalDone() {
+ ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
+ assertTrue(!writer.isClosed());
+ PrintReadsWalker walker = new PrintReadsWalker();
+ walker.onTraversalDone(writer);
+ assertTrue(writer.isClosed());
+
+ }
+}
diff --git a/java/test/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIteratorTest.java b/java/test/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIteratorTest.java
new file mode 100644
index 000000000..46ddb30bc
--- /dev/null
+++ b/java/test/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIteratorTest.java
@@ -0,0 +1,97 @@
+package org.broadinstitute.sting.utils.sam;
+
+import org.broadinstitute.sting.BaseTest;
+import org.junit.Before;
+import org.junit.Test;
+import static org.junit.Assert.fail;
+import static org.junit.Assert.assertTrue;
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ *
+ * @author aaron
+ *
+ * Class ArtificialPatternedSAMIteratorTest
+ *
+ * tests ArtificialPatternedSAMIterator, making sure that if you specify in order
+ * you get reads in order, and if you specify out of order you get them out of order.
+ */
+public class ArtificialPatternedSAMIteratorTest extends BaseTest {
+
+ // our artifical patterned iterator
+ ArtificialPatternedSAMIterator iter;
+
+ private int startingChr = 1;
+ private int endingChr = 2;
+ private int readCount = 100;
+ private int DEFAULT_READ_LENGTH = ArtificialSamUtils.DEFAULT_READ_LENGTH;
+ SAMFileHeader header;
+
+ @Before
+ public void before() {
+ header = ArtificialSamUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
+
+ }
+ @Test
+ public void testInOrder() {
+ iter = new ArtificialPatternedSAMIterator(startingChr,endingChr,readCount,0,header, ArtificialPatternedSAMIterator.PATTERN.IN_ORDER_READS);
+ if (!iter.hasNext()) {
+ fail("no reads in the ArtificialPatternedSAMIterator");
+ }
+ SAMRecord last = iter.next();
+ while (iter.hasNext()) {
+ SAMRecord rec = iter.next();
+ if (!(rec.getReferenceIndex() > last.getReferenceIndex()) && (rec.getAlignmentStart() <= last.getAlignmentStart())) {
+ fail("read " + rec.getReadName() + " out of order compared to last read, " + last.getReadName());
+ }
+ last = rec;
+ }
+
+ }
+ @Test
+ public void testOutOfOrder() {
+ int outOfOrderCount = 0;
+ iter = new ArtificialPatternedSAMIterator(startingChr,endingChr,readCount,0,header, ArtificialPatternedSAMIterator.PATTERN.RANDOM_READS);
+ if (!iter.hasNext()) {
+ fail("no reads in the ArtificialPatternedSAMIterator");
+ }
+ SAMRecord last = iter.next();
+ while (iter.hasNext()) {
+ SAMRecord rec = iter.next();
+ if (!(rec.getReferenceIndex() > last.getReferenceIndex()) && (rec.getAlignmentStart() <= last.getAlignmentStart())) {
+ ++outOfOrderCount;
+ }
+ last = rec;
+ }
+ assertTrue(outOfOrderCount > 0);
+ }
+
+
+}