diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIterator.java new file mode 100644 index 000000000..6f677d1cd --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIterator.java @@ -0,0 +1,139 @@ +package org.broadinstitute.sting.utils.sam; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; + + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author aaron + *

+ * Class ArtificialPatternedSAMIterator + *

+ * A descriptions should go here. Blame aaron if it's missing. + */ +public class ArtificialPatternedSAMIterator extends ArtificialSAMIterator { + + /** the pattern we're implementing */ + public enum PATTERN { + RANDOM_READS, IN_ORDER_READS; + } + + // our pattern + private final PATTERN mPattern; + + /** + * this is pretty heavy (and it could be extremely heavy, given the amount of reads they request, but it + * allows us to give them each read once, reguardless of the order specified + */ + private final int[] reads; + private final int readCount; + + /** + * create the fake iterator, given the mapping of chromosomes and read counts. If pattern + * is specified to be random, it will generate reads that are randomly placed on the current chromosome + * + * @param startingChr the starting chromosome + * @param endingChr the ending chromosome + * @param readCount the number of reads in each chromosome + * @param header the associated header + * @param pattern the pattern to implement + */ + ArtificialPatternedSAMIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount, SAMFileHeader header, PATTERN pattern ) { + super(startingChr, endingChr, readCount, unmappedReadCount, header); + mPattern = pattern; + this.readCount = readCount; + reads = new int[readCount]; + + for (int x = 0; x < readCount; x++) { + reads[x] = x; + } + if (pattern == PATTERN.RANDOM_READS) { + // scramble a bunch of the reads + for (int y = 0; y < readCount; y++) { + int ranOne = (int) Math.round(Math.random() * ( readCount - 1 )); + int ranTwo = (int) Math.round(Math.random() * ( readCount - 1 )); + int temp = reads[ranOne]; + reads[ranOne] = reads[ranTwo]; + reads[ranTwo] = temp; + } + /** + * up to this point there's no garauntee that the random() has made the reads out of order (though it's + * extremely extremely unlikely it's failed). Let's make sure there at least out of order: + */ + if (this.reads[0] < this.reads[reads.length -1]) { + int temp = reads[0]; + reads[0] = reads[reads.length -1]; + reads[reads.length -1] = temp; + } + + } + + } + + /** + * override the default ArtificialSAMIterator createNextRead method, which creates the next read + * @return + */ + protected boolean createNextRead() { + if (currentRead >= rCount) { + currentChromo++; + currentRead = 0; + } + // check for end condition, have we finished the chromosome listing, and have no unmapped reads + if (currentChromo >= eChromosomeCount) { + if (uMappedReadCount < 1) { + this.next = null; + return false; + } else { + ++totalReadCount; + this.next = ArtificialSamUtils.createArtificialRead(this.header, String.valueOf(totalReadCount), -1, -1, 50); + --uMappedReadCount; + return true; + } + } + ++totalReadCount; + this.next = getNextRecord(currentRead); + + ++currentRead; + return true; + } + + + /** + * get the next read, given it's index in the chromosome + * @param read the read index in the chromosome + * @return a SAMRecord + */ + private SAMRecord getNextRecord( int read ) { + if (read > this.readCount) { + return ArtificialSamUtils.createArtificialRead(this.header, String.valueOf(reads[readCount - 1]), currentChromo, reads[readCount - 1], 50); + } + return ArtificialSamUtils.createArtificialRead(this.header, String.valueOf(reads[read]), currentChromo, reads[read], 50); + } + +} diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java new file mode 100644 index 000000000..9d56c3c26 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java @@ -0,0 +1,144 @@ +package org.broadinstitute.sting.utils.sam; + +import org.broadinstitute.sting.gatk.traversals.TraversalEngine; +import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.dataSources.shards.Shard; +import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.apache.log4j.Logger; + +import java.util.Arrays; + +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileHeader; + + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author aaron + *

+ * Class ArtificialReadsTraversal + *

+ * A descriptions should go here. Blame aaron if it's missing. + */ +public class ArtificialReadsTraversal extends TraversalEngine { + + public int startingChr = 1; + public int endingChr = 5; + public int readsPerChr = 100; + public int unMappedReads = 1000; + private int DEFAULT_READ_LENGTH = ArtificialSamUtils.DEFAULT_READ_LENGTH; + private ArtificialPatternedSAMIterator iter; + /** our log, which we want to capture anything from this class */ + protected static Logger logger = Logger.getLogger(ArtificialReadsTraversal.class); + + /** Creates a new, uninitialized ArtificialReadsTraversal */ + public ArtificialReadsTraversal() { + super(null, null, null); + } + + // what read ordering are we using + private ArtificialPatternedSAMIterator.PATTERN readOrder = ArtificialPatternedSAMIterator.PATTERN.IN_ORDER_READS; + + + /** + * set the read ordering of the reads given to the walker + * + * @param readOrdering + */ + public void setReadOrder( ArtificialPatternedSAMIterator.PATTERN readOrdering ) { + readOrder = readOrdering; + } + + /** + * Traverse by reads, given the data and the walker + * + * @param walker the walker to traverse with + * @param shard the shard, specifying the range of data to iterate over + * @param dataProvider the provider of the reads data + * @param sum the value of type T, specified by the walker, to feed to the walkers reduce function + * @param the map type of the walker + * @param the reduce type of the walker + * + * @return the reduce variable of the read walker + */ + public T traverse( Walker walker, + Shard shard, + ShardDataProvider dataProvider, + T sum ) { + + if (!( walker instanceof ReadWalker )) + throw new IllegalArgumentException("Walker isn't a read walker!"); + + ReadWalker readWalker = (ReadWalker) walker; + SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readsPerChr + DEFAULT_READ_LENGTH); + iter = new ArtificialPatternedSAMIterator(this.startingChr, + this.endingChr, + this.readsPerChr, + this.unMappedReads, + header, + this.readOrder); + + // while we still have more reads + for (SAMRecord read : iter) { + + // our locus context + LocusContext locus = null; + + // an array of characters that represent the reference + char[] refSeq = null; + + // update the number of reads we've seen + TraversalStatistics.nRecords++; + + final boolean keepMeP = readWalker.filter(refSeq, read); + if (keepMeP) { + M x = readWalker.map(refSeq, read); + sum = readWalker.reduce(x, sum); + } + + if (locus != null) { printProgress("loci", locus.getLocation()); } + } + return sum; + } + + /** + * Temporary override of printOnTraversalDone. + * TODO: Add some sort of TE.getName() function once all TraversalEngines are ported. + * + * @param sum Result of the computation. + * @param Type of the result. + */ + public void printOnTraversalDone( T sum ) { + printOnTraversalDone("reads", sum); + } + + +} diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java index e371749f9..f727581c4 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java @@ -109,7 +109,7 @@ public class ArtificialSAMIterator implements PeekingStingIterator { return false; } - private boolean createNextRead() { + protected boolean createNextRead() { if (currentRead >= rCount) { currentChromo++; currentRead = 0; @@ -143,6 +143,10 @@ public class ArtificialSAMIterator implements PeekingStingIterator { throw new UnsupportedOperationException("You've tried to remove on a StingSAMIterator (unsupported), not to mention that this is a fake iterator."); } + /** + * return this iterator, for the iterable interface + * @return + */ public Iterator iterator() { return this; } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java new file mode 100644 index 000000000..b8e39f5e5 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java @@ -0,0 +1,118 @@ +package org.broadinstitute.sting.gatk.walkers; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.executive.Accumulator; +import org.broadinstitute.sting.gatk.dataSources.shards.Shard; +import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider; +import org.broadinstitute.sting.utils.sam.ArtificialReadsTraversal; +import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter; +import org.broadinstitute.sting.utils.sam.ArtificialSamUtils; +import org.junit.Before; +import org.junit.Test; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileHeader; + + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author aaron + *

+ * Class PrintReadsWalkerTest + *

+ * This tests the print reads walker, using the artificial reads traversal + */ +public class PrintReadsWalkerTest extends BaseTest { + + /** + * our private fake reads traversal. This traversal seeds the + * the walker with a specified number of fake reads, which are named sequentially + */ + private ArtificialReadsTraversal trav; + private int readTotal = 0; + private char bases[] = {'a', 't'}; + + @Before + public void before() { + trav = new ArtificialReadsTraversal(); + readTotal = ( ( trav.endingChr - trav.startingChr ) + 1 ) * trav.readsPerChr + trav.unMappedReads; + } + + /** test that we get out the same number of reads we put in */ + @Test + public void testReadCount() { + PrintReadsWalker walker = new PrintReadsWalker(); + ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter(); + trav.traverse(walker, (Shard) null, (ShardDataProvider) null, writer); + assertEquals(readTotal, writer.getRecords().size()); + } + + /** test that we're ok with a null read */ + @Test + public void testNullRead() { + PrintReadsWalker walker = new PrintReadsWalker(); + + SAMRecord rec = walker.map(bases, null); + assertTrue(rec == null); + } + + /** tes that we get the read we put into the map function */ + @Test + public void testReturnRead() { + PrintReadsWalker walker = new PrintReadsWalker(); + SAMFileHeader head = ArtificialSamUtils.createArtificialSamHeader(3,1,1000); + SAMRecord rec = ArtificialSamUtils.createArtificialRead(head, "FakeRead", 1, 1, 50); + SAMRecord ret = walker.map(bases, rec); + assertTrue(ret == rec); + assertTrue(ret.getReadName().equals(rec.getReadName())); + } + + /** test that the read makes it to the output source */ + @Test + public void testReducingRead() { + PrintReadsWalker walker = new PrintReadsWalker(); + SAMFileHeader head = ArtificialSamUtils.createArtificialSamHeader(3,1,1000); + SAMRecord rec = ArtificialSamUtils.createArtificialRead(head, "FakeRead", 1, 1, 50); + ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter(); + SAMRecord ret = walker.map(bases, null); + walker.reduce(ret,writer); + + assertTrue(writer.getRecords().size() == 1); + } + + /** test that we close the output source */ + @Test + public void testClosingOnTraversalDone() { + ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter(); + assertTrue(!writer.isClosed()); + PrintReadsWalker walker = new PrintReadsWalker(); + walker.onTraversalDone(writer); + assertTrue(writer.isClosed()); + + } +} diff --git a/java/test/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIteratorTest.java b/java/test/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIteratorTest.java new file mode 100644 index 000000000..46ddb30bc --- /dev/null +++ b/java/test/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIteratorTest.java @@ -0,0 +1,97 @@ +package org.broadinstitute.sting.utils.sam; + +import org.broadinstitute.sting.BaseTest; +import org.junit.Before; +import org.junit.Test; +import static org.junit.Assert.fail; +import static org.junit.Assert.assertTrue; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; + + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * + * @author aaron + * + * Class ArtificialPatternedSAMIteratorTest + * + * tests ArtificialPatternedSAMIterator, making sure that if you specify in order + * you get reads in order, and if you specify out of order you get them out of order. + */ +public class ArtificialPatternedSAMIteratorTest extends BaseTest { + + // our artifical patterned iterator + ArtificialPatternedSAMIterator iter; + + private int startingChr = 1; + private int endingChr = 2; + private int readCount = 100; + private int DEFAULT_READ_LENGTH = ArtificialSamUtils.DEFAULT_READ_LENGTH; + SAMFileHeader header; + + @Before + public void before() { + header = ArtificialSamUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); + + } + @Test + public void testInOrder() { + iter = new ArtificialPatternedSAMIterator(startingChr,endingChr,readCount,0,header, ArtificialPatternedSAMIterator.PATTERN.IN_ORDER_READS); + if (!iter.hasNext()) { + fail("no reads in the ArtificialPatternedSAMIterator"); + } + SAMRecord last = iter.next(); + while (iter.hasNext()) { + SAMRecord rec = iter.next(); + if (!(rec.getReferenceIndex() > last.getReferenceIndex()) && (rec.getAlignmentStart() <= last.getAlignmentStart())) { + fail("read " + rec.getReadName() + " out of order compared to last read, " + last.getReadName()); + } + last = rec; + } + + } + @Test + public void testOutOfOrder() { + int outOfOrderCount = 0; + iter = new ArtificialPatternedSAMIterator(startingChr,endingChr,readCount,0,header, ArtificialPatternedSAMIterator.PATTERN.RANDOM_READS); + if (!iter.hasNext()) { + fail("no reads in the ArtificialPatternedSAMIterator"); + } + SAMRecord last = iter.next(); + while (iter.hasNext()) { + SAMRecord rec = iter.next(); + if (!(rec.getReferenceIndex() > last.getReferenceIndex()) && (rec.getAlignmentStart() <= last.getAlignmentStart())) { + ++outOfOrderCount; + } + last = rec; + } + assertTrue(outOfOrderCount > 0); + } + + +}