diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjector.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjector.java
new file mode 100644
index 000000000..616619f53
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjector.java
@@ -0,0 +1,148 @@
+package org.broadinstitute.sting.playground.gatk.walkers.indels;
+
+import org.broadinstitute.sting.gatk.walkers.ReadWalker;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.utils.cmdLine.Argument;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMFileWriter;
+import net.sf.samtools.util.CloseableIterator;
+
+import java.io.File;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Queue;
+import java.util.LinkedList;
+/**
+ * User: hanna
+ * Date: Jun 10, 2009
+ * Time: 2:40:19 PM
+ * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
+ * Software and documentation are copyright 2005 by the Broad Institute.
+ * All rights are reserved.
+ *
+ * Users acknowledge that this software is supplied without any warranty or support.
+ * The Broad Institute is not responsible for its use, misuse, or
+ * functionality.
+ */
+
+/**
+ * Copies reads from the input stream into the outputBAM, replacing those
+ * reads which have been cleaned with their new clean copies.
+ */
+public class CleanedReadInjector extends ReadWalker {
+
+ /**
+ * The source of all cleaned intervals.
+ */
+ @Argument(fullName="cleaned_intervals",shortName="ci",doc="Intervals which have been cleaned.",required=true)
+ String intervalsSource = null;
+
+ /**
+ * The source of all cleaned reads.
+ */
+ @Argument(fullName="cleaned_reads",shortName="cr",doc="BAM file of reads which have been cleaned.",required=true)
+ SAMFileReader cleanedReadsSource = null;
+
+ /**
+ * Target file for BAM output.
+ */
+ @Argument(fullName="output_bam",shortName="ob",doc="Output BAM file",required=true)
+ SAMFileWriter outputBAM = null;
+
+ /**
+ * A stream of processed intervals. The current head of the queue represents the next interval.
+ */
+ private Queue intervals;
+
+ /**
+ * The interval currently in process.
+ */
+ private GenomeLoc interval = null;
+
+ /**
+ * A structure for fast lookup of all reads that have been cleaned in the current interval.
+ */
+ private Map cleanedReads = new HashMap();
+
+ @Override
+ public void initialize() {
+ intervals = new LinkedList( GenomeAnalysisEngine.parseIntervalRegion(intervalsSource,false) );
+ interval = intervals.remove();
+ loadCleanedReadsOverlappingInterval( interval );
+ }
+
+ /**
+ * For every read, inspect the set of cleaned reads that could possibly replace the current one.
+ * If one exists, add the cleaned read to the output. Otherwise, add the current read.
+ * @param ref Portion of the reference sequence overlapping this read.
+ * @param read The read to inspect.
+ * @return Number of reads cleaned by this iteration of the map function.
+ */
+ @Override
+ public Integer map(char[] ref, SAMRecord read) {
+ GenomeLoc loc = new GenomeLoc(read);
+
+ while( loc.isPast(interval) && intervals.size() > 0 ) {
+ interval = intervals.remove();
+ loadCleanedReadsOverlappingInterval(interval);
+ }
+
+ String uniquifiedReadName = getUniquifiedReadName(read);
+ int numCleaned = 0;
+
+ if( loc.overlapsP(interval) && cleanedReads.containsKey(uniquifiedReadName) ) {
+ SAMRecord cleanedRead = cleanedReads.get(uniquifiedReadName);
+ cleanedRead.setReadName(read.getReadName());
+ outputBAM.addAlignment(cleanedRead);
+ numCleaned = 1;
+ }
+ else {
+ outputBAM.addAlignment(read);
+ numCleaned = 0;
+ }
+
+ return numCleaned;
+ }
+
+ /**
+ * Initialize traversal with number of reads which have been replaced with a clean version.
+ * @return 0 to initialize the traversal.
+ */
+ @Override
+ public Integer reduceInit() { return 0; }
+
+ /**
+ * Accumulates the number of reads which have been replaced by their clean equivalents.
+ * @param value Number of reads cleaned by this iteration.
+ * @param accum Prior number of reads cleaned.
+ * @return Total number of reads cleaned, including this iteration.
+ */
+ @Override
+ public Integer reduce( Integer value, Integer accum ) {
+ return accum + value;
+ }
+
+ /**
+ * Load a list of all the reads overlapping the given interval into memory.
+ * @param interval
+ */
+ private void loadCleanedReadsOverlappingInterval( GenomeLoc interval ) {
+ CloseableIterator overlappingReads = cleanedReadsSource.queryOverlapping( interval.getContig(), (int)interval.getStart(), (int)interval.getStop() );
+ // Load in all reads mapped to this region. The cleaner will augment the read name in a way that uniquifies it.
+ while( overlappingReads.hasNext() ) {
+ SAMRecord read = overlappingReads.next();
+ cleanedReads.put( read.getReadName(), read );
+ }
+ }
+
+ /**
+ * Gets the distinguishing elements of the read name from the given read.
+ * @param read read to uniquify.
+ * @return A (hopefully) completely unique name for the read.
+ */
+ static String getUniquifiedReadName( SAMRecord read ) {
+ return String.format("%s.%s.%s.%s",read.getAttribute("RG"),read.getReadName(),read.getFlags(),read.getReadString());
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIterator.java
index cbb8a1e02..c5f70d05a 100644
--- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIterator.java
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialPatternedSAMIterator.java
@@ -71,7 +71,7 @@ public class ArtificialPatternedSAMIterator extends ArtificialSAMIterator {
reads = new int[readCount];
for (int x = 0; x < readCount; x++) {
- reads[x] = x;
+ reads[x] = x+1;
}
if (pattern == PATTERN.RANDOM_READS) {
// scramble a bunch of the reads
@@ -102,9 +102,9 @@ public class ArtificialPatternedSAMIterator extends ArtificialSAMIterator {
* @return
*/
protected boolean createNextRead() {
- if (currentRead >= rCount) {
+ if (currentRead > rCount) {
currentChromo++;
- currentRead = 0;
+ currentRead = 1;
}
// check for end condition, have we finished the chromosome listing, and have no unmapped reads
if (currentChromo >= eChromosomeCount) {
@@ -137,7 +137,7 @@ public class ArtificialPatternedSAMIterator extends ArtificialSAMIterator {
if (read > this.readCount) {
return ArtificialSAMUtils.createArtificialRead(this.header, String.valueOf(reads[readCount - 1]), currentChromo, reads[readCount - 1], 50);
}
- return ArtificialSAMUtils.createArtificialRead(this.header, String.valueOf(reads[read]), currentChromo, reads[read], 50);
+ return ArtificialSAMUtils.createArtificialRead(this.header, String.valueOf(reads[read-1]), currentChromo, reads[read-1], 50);
}
}
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java
new file mode 100644
index 000000000..3b293a0fc
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java
@@ -0,0 +1,90 @@
+package org.broadinstitute.sting.utils.sam;
+
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.util.CloseableIterator;
+
+import java.io.InputStream;
+import java.io.ByteArrayInputStream;
+import java.io.UnsupportedEncodingException;
+import java.io.File;
+import java.util.Collections;
+import java.util.List;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.ArrayList;
+
+import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.gatk.iterators.NullSAMIterator;
+import org.broadinstitute.sting.gatk.Reads;
+/**
+ * User: hanna
+ * Date: Jun 11, 2009
+ * Time: 9:35:31 AM
+ * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
+ * Software and documentation are copyright 2005 by the Broad Institute.
+ * All rights are reserved.
+ *
+ * Users acknowledge that this software is supplied without any warranty or support.
+ * The Broad Institute is not responsible for its use, misuse, or
+ * functionality.
+ */
+
+/**
+ * Pass specified reads into the given walker.
+ */
+
+public class ArtificialSAMFileReader extends SAMFileReader {
+ /**
+ * Backing data store of reads.
+ */
+ private List reads = null;
+
+ /**
+ * Construct an artificial SAM file reader.
+ */
+ public ArtificialSAMFileReader(SAMRecord... reads) {
+ super( createEmptyInputStream(),true );
+ this.reads = Arrays.asList(reads);
+ }
+
+ /**
+ * @{inheritDoc}
+ */
+ @Override
+ public CloseableIterator query(final String sequence, final int start, final int end, final boolean contained) {
+ GenomeLoc region = new GenomeLoc(sequence, start, end);
+ List coveredSubset = new ArrayList();
+
+ for( SAMRecord read: reads ) {
+ GenomeLoc readPosition = new GenomeLoc(read);
+ if( contained && region.containsP(readPosition) ) coveredSubset.add(read);
+ else if( !contained && readPosition.overlapsP(region) ) coveredSubset.add(read);
+ }
+
+ final Iterator iterator = coveredSubset.iterator();
+ return new CloseableIterator() {
+ public boolean hasNext() { return iterator.hasNext(); }
+ public SAMRecord next() { return iterator.next(); }
+ public void close() {}
+ public void remove() { iterator.remove(); }
+ };
+ }
+
+ /**
+ * Builds an empty input stream for faking out the sam file reader.
+ * Derive it from a string so that, in the future, it might be possible
+ * to fake the text of a sam file from samtools output, et.c
+ * @return Stream that returns no characters.
+ */
+ private static InputStream createEmptyInputStream() {
+ try {
+ byte[] byteArray = "".getBytes("ISO-8859-1");
+ return new ByteArrayInputStream(byteArray);
+ }
+ catch( UnsupportedEncodingException ex ) {
+ throw new StingException("Unable to build empty input stream",ex);
+ }
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java
index 3254a1332..41d3cd62a 100644
--- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java
@@ -38,7 +38,7 @@ public class ArtificialSAMIterator implements PeekingStingIterator {
protected int currentChromo = 0;
- protected int currentRead = 0;
+ protected int currentRead = 1;
protected int totalReadCount = 0;
protected boolean done = false;
// the next record
@@ -110,9 +110,9 @@ public class ArtificialSAMIterator implements PeekingStingIterator {
}
protected boolean createNextRead() {
- if (currentRead >= rCount) {
+ if (currentRead > rCount) {
currentChromo++;
- currentRead = 0;
+ currentRead = 1;
}
// check for end condition, have we finished the chromosome listing, and have no unmapped reads
if (currentChromo >= eChromosomeCount) {
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java
index 3eb88209b..fdf7771a3 100644
--- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java
@@ -174,7 +174,7 @@ public class ArtificialSAMQueryIterator extends ArtificialSAMIterator {
/** make sure we haven't been used as an iterator yet; this is to miror the MergingSamIterator2 action. */
public void ensureUntouched() {
- if (this.currentChromo != 0 || this.currentRead > 0) {
+ if (this.currentChromo != 0 || this.currentRead > 1) {
throw new UnsupportedOperationException("We've already been used as an iterator; you can't query after that");
}
}
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
index 37320f54d..38704d138 100755
--- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
+++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
@@ -7,6 +7,7 @@ import java.util.*;
import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator;
import org.broadinstitute.sting.gatk.Reads;
+import org.broadinstitute.sting.utils.StingException;
/**
*
@@ -114,10 +115,12 @@ public class ArtificialSAMUtils {
* @return the artificial read
*/
public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) {
+ if( alignmentStart == 0 )
+ throw new StingException("Invalid alignment start for artificial read");
SAMRecord record = new SAMRecord(header);
record.setReadName(name);
record.setReferenceIndex(refIndex);
- record.setAlignmentStart(alignmentStart + 1);
+ record.setAlignmentStart(alignmentStart);
List elements = new ArrayList();
elements.add(new CigarElement(length, CigarOperator.characterToEnum('M')));
record.setCigar(new Cigar(elements));
diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java
index e73d70ec5..07d5b58d6 100755
--- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java
@@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import static junit.framework.Assert.fail;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjectorTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjectorTest.java
new file mode 100644
index 000000000..faab0d1ef
--- /dev/null
+++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/indels/CleanedReadInjectorTest.java
@@ -0,0 +1,255 @@
+package org.broadinstitute.sting.playground.gatk.walkers.indels;
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.OutputTracker;
+import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMFileReader;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.FileNotFoundException;
+import java.io.File;
+
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
+import junit.framework.Assert;
+/**
+ * User: hanna
+ * Date: Jun 11, 2009
+ * Time: 9:54:05 AM
+ * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
+ * Software and documentation are copyright 2005 by the Broad Institute.
+ * All rights are reserved.
+ *
+ * Users acknowledge that this software is supplied without any warranty or support.
+ * The Broad Institute is not responsible for its use, misuse, or
+ * functionality.
+ */
+
+/**
+ * Test the walker that injects clean reads into the bam file.
+ */
+
+public class CleanedReadInjectorTest extends BaseTest {
+ /**
+ * The fasta, for comparison.
+ */
+ protected static IndexedFastaSequenceFile sequenceFile = null;
+
+ /**
+ * Initialize the fasta.
+ */
+ @BeforeClass
+ public static void initialize() throws FileNotFoundException {
+ sequenceFile = new IndexedFastaSequenceFile( new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta") );
+ GenomeLoc.setupRefContigOrdering(sequenceFile);
+ }
+
+ @Test
+ public void testNoReads() {
+ ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader();
+ ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
+ CleanedReadInjector walker = createWalker( "chr1:1-10", cleanedReads, output );
+
+ walker.initialize();
+ walker.onTraversalDone(0);
+
+ Assert.assertEquals("Too many records in output",0,output.getRecords().size());
+ }
+
+ @Test
+ public void testNoCleanedReads() {
+ SAMFileHeader header = getMockSAMFileHeader();
+ SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,5);
+
+ ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader();
+ ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
+ CleanedReadInjector walker = createWalker( "chr1:1-10", cleanedReads, output );
+
+ int result = runWalkerOverReads(walker,sourceRead);
+
+ Assert.assertEquals("Result of traversal is incorrect",0,result);
+ Assert.assertEquals("Incorrect number of records in output",1,output.getRecords().size());
+ Assert.assertEquals("Output record is incorrect",sourceRead,output.getRecords().get(0));
+ }
+
+ @Test
+ public void testOneCleanedRead() {
+ SAMFileHeader header = getMockSAMFileHeader();
+ SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
+
+ SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
+ cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceRead));
+ cleanedRead.setReadString("AAAAA");
+ ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
+
+ ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
+ CleanedReadInjector walker = createWalker( "chr1:1-10", cleanedReads, output );
+
+ int result = runWalkerOverReads(walker,sourceRead);
+
+ Assert.assertEquals("Result of traversal is incorrect",1,result);
+ Assert.assertEquals("Incorrect number of records in output",1,output.getRecords().size());
+ Assert.assertEquals("Output record is incorrect",cleanedRead,output.getRecords().get(0));
+ }
+
+ @Test
+ public void testPartialIntervalOverlap() {
+ SAMFileHeader header = getMockSAMFileHeader();
+ SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
+
+ SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
+ cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceRead));
+ cleanedRead.setReadString("AAAAA");
+ ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
+
+ ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
+ CleanedReadInjector walker = createWalker( "chr1:4-12", cleanedReads, output );
+
+ int result = runWalkerOverReads(walker,sourceRead);
+
+ Assert.assertEquals("Result of traversal is incorrect",1,result);
+ Assert.assertEquals("Incorrect number of records in output",1,output.getRecords().size());
+ Assert.assertEquals("Output record is incorrect",cleanedRead,output.getRecords().get(0));
+ }
+
+ @Test
+ public void testMixedCleanedAndUncleanedNonOverlapping() {
+ SAMFileHeader header = getMockSAMFileHeader();
+ SAMRecord[] sourceReads = new SAMRecord[] { ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5),
+ ArtificialSAMUtils.createArtificialRead(header,"read2",1,2,5),
+ ArtificialSAMUtils.createArtificialRead(header,"read3",1,3,5),
+ ArtificialSAMUtils.createArtificialRead(header,"read4",1,4,5),
+ ArtificialSAMUtils.createArtificialRead(header,"read5",1,5,5) };
+
+ SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,1);
+ cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceReads[0]));
+ cleanedRead.setReadString("AAAAA");
+ ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
+
+ ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
+ CleanedReadInjector walker = createWalker( "chr1:1-10", cleanedReads, output );
+
+ int result = runWalkerOverReads(walker,sourceReads);
+
+ Assert.assertEquals("Result of traversal is incorrect",1,result);
+ Assert.assertEquals("Incorrect number of records in output",5,output.getRecords().size());
+ for( int i = 0; i < sourceReads.length; i++ ) {
+ if( i == 0 )
+ Assert.assertEquals("Output record is incorrect",cleanedRead,output.getRecords().get(i));
+ else
+ Assert.assertEquals("Output record is incorrect",sourceReads[i],output.getRecords().get(i));
+ }
+ }
+
+ @Test
+ public void testAlternateReadOrder() {
+ SAMFileHeader header = getMockSAMFileHeader();
+ SAMRecord[] sourceReads = new SAMRecord[] { ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5),
+ ArtificialSAMUtils.createArtificialRead(header,"read2",1,2,5),
+ ArtificialSAMUtils.createArtificialRead(header,"read3",1,3,5) };
+
+ SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,3,1);
+ cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceReads[0]));
+ cleanedRead.setReadString("AAAAA");
+ ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
+
+ ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
+ CleanedReadInjector walker = createWalker( "chr1:1-10", cleanedReads, output );
+
+ int result = runWalkerOverReads(walker,sourceReads);
+
+ Assert.assertEquals("Result of traversal is incorrect",1,result);
+ Assert.assertEquals("Incorrect number of records in output",3,output.getRecords().size());
+ Assert.assertEquals("Incorrect read at position 1",cleanedRead,output.getRecords().get(0));
+ Assert.assertEquals("Incorrect read at position 2",sourceReads[1],output.getRecords().get(1));
+ Assert.assertEquals("Incorrect read at position 3",sourceReads[2],output.getRecords().get(2));
+ }
+
+ @Test
+ public void testReadOutsideInterval() {
+ SAMFileHeader header = getMockSAMFileHeader();
+ SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
+
+ SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
+ cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceRead));
+ cleanedRead.setReadString("AAAAA");
+ ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
+
+ ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
+ CleanedReadInjector walker = createWalker( "chr1:20-50", cleanedReads, output );
+
+ int result = runWalkerOverReads( walker, sourceRead );
+
+ Assert.assertEquals("Result of traversal is incorrect",0,result);
+ Assert.assertEquals("Incorrect number of records in output",1,output.getRecords().size());
+ Assert.assertEquals("Output record is incorrect",sourceRead,output.getRecords().get(0));
+ }
+
+ @Test
+ public void testMultipleIntervals() {
+ SAMFileHeader header = getMockSAMFileHeader();
+ SAMRecord[] sourceReads = new SAMRecord[] { ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,10),
+ ArtificialSAMUtils.createArtificialRead(header,"read2",1,11,10),
+ ArtificialSAMUtils.createArtificialRead(header,"read3",1,21,10),
+ ArtificialSAMUtils.createArtificialRead(header,"read4",1,31,10),
+ ArtificialSAMUtils.createArtificialRead(header,"read5",1,41,10) };
+ SAMRecord[] cleanedReads = new SAMRecord[sourceReads.length];
+ for( int i = 0; i < sourceReads.length; i++ ) {
+ try {
+ cleanedReads[i] = (SAMRecord)sourceReads[i].clone();
+ cleanedReads[i].setReadName(CleanedReadInjector.getUniquifiedReadName(sourceReads[i]));
+ cleanedReads[i].setReadString("AAAAA");
+ }
+ catch( CloneNotSupportedException ex ) {
+ throw new StingException("Unable to clone samrecord", ex);
+ }
+ }
+
+ ArtificialSAMFileReader cleanedReader = new ArtificialSAMFileReader(cleanedReads);
+
+ ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
+ CleanedReadInjector walker = createWalker( "chr1:11-20;chr1:31-40", cleanedReader, output );
+ int result = runWalkerOverReads( walker, sourceReads );
+
+ Assert.assertEquals("Result of traversal is incorrect",2,result);
+ Assert.assertEquals("Incorrect number of records in output",5,output.getRecords().size());
+ Assert.assertEquals("Incorrect read at position 1",sourceReads[0],output.getRecords().get(0));
+ Assert.assertEquals("Incorrect read at position 2",cleanedReads[1],output.getRecords().get(1));
+ Assert.assertEquals("Incorrect read at position 3",sourceReads[2],output.getRecords().get(2));
+ Assert.assertEquals("Incorrect read at position 4",cleanedReads[3],output.getRecords().get(3));
+ Assert.assertEquals("Incorrect read at position 5",sourceReads[4],output.getRecords().get(4));
+ }
+
+ private CleanedReadInjector createWalker( String intervals, ArtificialSAMFileReader cleanedReads, ArtificialSAMFileWriter output ) {
+ CleanedReadInjector walker = new CleanedReadInjector();
+
+ walker.intervalsSource = intervals;
+ walker.cleanedReadsSource = cleanedReads;
+ walker.outputBAM = output;
+
+ walker.initializeOutputStreams( new OutputTracker(null,null) );
+
+ return walker;
+ }
+
+ private SAMFileHeader getMockSAMFileHeader() {
+ return ArtificialSAMUtils.createArtificialSamHeader(2,0,247249719);
+ }
+
+ private Integer runWalkerOverReads( CleanedReadInjector walker, SAMRecord... reads ) {
+ walker.initialize();
+ Integer accum = walker.reduceInit();
+ for( SAMRecord sourceRead: reads ) {
+ Integer value = walker.map( null, sourceRead );
+ accum = walker.reduce(value,accum);
+ }
+ walker.onTraversalDone(accum);
+ return accum;
+ }
+
+}
diff --git a/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMFileWriterTest.java b/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMFileWriterTest.java
index 006d4e12a..ef6a42472 100644
--- a/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMFileWriterTest.java
+++ b/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMFileWriterTest.java
@@ -61,7 +61,7 @@ public class ArtificialSAMFileWriterTest extends BaseTest {
@Test
public void testBasicCount() {
- for (int x = 0; x < 100; x++) {
+ for (int x = 1; x <= 100; x++) {
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, String.valueOf(x), 1, x, ArtificialSAMUtils.DEFAULT_READ_LENGTH);
writer.addAlignment(rec);
}
@@ -73,7 +73,7 @@ public class ArtificialSAMFileWriterTest extends BaseTest {
public void testReadName() {
List names = new ArrayList();
- for (int x = 0; x < 100; x++) {
+ for (int x = 1; x <= 100; x++) {
names.add(String.valueOf(x));
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, String.valueOf(x), 1, x, ArtificialSAMUtils.DEFAULT_READ_LENGTH);
writer.addAlignment(rec);