First pass of a walker for Eric that searches through an input BAM file for unclean reads, injecting the cleaned reads in their place and outputting the composite result.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@989 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
032d0436e6
commit
71e3825fa1
|
|
@ -0,0 +1,148 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.util.Queue;
|
||||
import java.util.LinkedList;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: Jun 10, 2009
|
||||
* Time: 2:40:19 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Copies reads from the input stream into the <code>outputBAM</code>, replacing those
|
||||
* reads which have been cleaned with their new clean copies.
|
||||
*/
|
||||
public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
||||
|
||||
/**
|
||||
* The source of all cleaned intervals.
|
||||
*/
|
||||
@Argument(fullName="cleaned_intervals",shortName="ci",doc="Intervals which have been cleaned.",required=true)
|
||||
String intervalsSource = null;
|
||||
|
||||
/**
|
||||
* The source of all cleaned reads.
|
||||
*/
|
||||
@Argument(fullName="cleaned_reads",shortName="cr",doc="BAM file of reads which have been cleaned.",required=true)
|
||||
SAMFileReader cleanedReadsSource = null;
|
||||
|
||||
/**
|
||||
* Target file for BAM output.
|
||||
*/
|
||||
@Argument(fullName="output_bam",shortName="ob",doc="Output BAM file",required=true)
|
||||
SAMFileWriter outputBAM = null;
|
||||
|
||||
/**
|
||||
* A stream of processed intervals. The current head of the queue represents the next interval.
|
||||
*/
|
||||
private Queue<GenomeLoc> intervals;
|
||||
|
||||
/**
|
||||
* The interval currently in process.
|
||||
*/
|
||||
private GenomeLoc interval = null;
|
||||
|
||||
/**
|
||||
* A structure for fast lookup of all reads that have been cleaned in the current interval.
|
||||
*/
|
||||
private Map<String,SAMRecord> cleanedReads = new HashMap<String,SAMRecord>();
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
intervals = new LinkedList<GenomeLoc>( GenomeAnalysisEngine.parseIntervalRegion(intervalsSource,false) );
|
||||
interval = intervals.remove();
|
||||
loadCleanedReadsOverlappingInterval( interval );
|
||||
}
|
||||
|
||||
/**
|
||||
* For every read, inspect the set of cleaned reads that could possibly replace the current one.
|
||||
* If one exists, add the cleaned read to the output. Otherwise, add the current read.
|
||||
* @param ref Portion of the reference sequence overlapping this read.
|
||||
* @param read The read to inspect.
|
||||
* @return Number of reads cleaned by this iteration of the map function.
|
||||
*/
|
||||
@Override
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
GenomeLoc loc = new GenomeLoc(read);
|
||||
|
||||
while( loc.isPast(interval) && intervals.size() > 0 ) {
|
||||
interval = intervals.remove();
|
||||
loadCleanedReadsOverlappingInterval(interval);
|
||||
}
|
||||
|
||||
String uniquifiedReadName = getUniquifiedReadName(read);
|
||||
int numCleaned = 0;
|
||||
|
||||
if( loc.overlapsP(interval) && cleanedReads.containsKey(uniquifiedReadName) ) {
|
||||
SAMRecord cleanedRead = cleanedReads.get(uniquifiedReadName);
|
||||
cleanedRead.setReadName(read.getReadName());
|
||||
outputBAM.addAlignment(cleanedRead);
|
||||
numCleaned = 1;
|
||||
}
|
||||
else {
|
||||
outputBAM.addAlignment(read);
|
||||
numCleaned = 0;
|
||||
}
|
||||
|
||||
return numCleaned;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize traversal with number of reads which have been replaced with a clean version.
|
||||
* @return 0 to initialize the traversal.
|
||||
*/
|
||||
@Override
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
/**
|
||||
* Accumulates the number of reads which have been replaced by their clean equivalents.
|
||||
* @param value Number of reads cleaned by this iteration.
|
||||
* @param accum Prior number of reads cleaned.
|
||||
* @return Total number of reads cleaned, including this iteration.
|
||||
*/
|
||||
@Override
|
||||
public Integer reduce( Integer value, Integer accum ) {
|
||||
return accum + value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a list of all the reads overlapping the given interval into memory.
|
||||
* @param interval
|
||||
*/
|
||||
private void loadCleanedReadsOverlappingInterval( GenomeLoc interval ) {
|
||||
CloseableIterator<SAMRecord> overlappingReads = cleanedReadsSource.queryOverlapping( interval.getContig(), (int)interval.getStart(), (int)interval.getStop() );
|
||||
// Load in all reads mapped to this region. The cleaner will augment the read name in a way that uniquifies it.
|
||||
while( overlappingReads.hasNext() ) {
|
||||
SAMRecord read = overlappingReads.next();
|
||||
cleanedReads.put( read.getReadName(), read );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the distinguishing elements of the read name from the given read.
|
||||
* @param read read to uniquify.
|
||||
* @return A (hopefully) completely unique name for the read.
|
||||
*/
|
||||
static String getUniquifiedReadName( SAMRecord read ) {
|
||||
return String.format("%s.%s.%s.%s",read.getAttribute("RG"),read.getReadName(),read.getFlags(),read.getReadString());
|
||||
}
|
||||
}
|
||||
|
|
@ -71,7 +71,7 @@ public class ArtificialPatternedSAMIterator extends ArtificialSAMIterator {
|
|||
reads = new int[readCount];
|
||||
|
||||
for (int x = 0; x < readCount; x++) {
|
||||
reads[x] = x;
|
||||
reads[x] = x+1;
|
||||
}
|
||||
if (pattern == PATTERN.RANDOM_READS) {
|
||||
// scramble a bunch of the reads
|
||||
|
|
@ -102,9 +102,9 @@ public class ArtificialPatternedSAMIterator extends ArtificialSAMIterator {
|
|||
* @return
|
||||
*/
|
||||
protected boolean createNextRead() {
|
||||
if (currentRead >= rCount) {
|
||||
if (currentRead > rCount) {
|
||||
currentChromo++;
|
||||
currentRead = 0;
|
||||
currentRead = 1;
|
||||
}
|
||||
// check for end condition, have we finished the chromosome listing, and have no unmapped reads
|
||||
if (currentChromo >= eChromosomeCount) {
|
||||
|
|
@ -137,7 +137,7 @@ public class ArtificialPatternedSAMIterator extends ArtificialSAMIterator {
|
|||
if (read > this.readCount) {
|
||||
return ArtificialSAMUtils.createArtificialRead(this.header, String.valueOf(reads[readCount - 1]), currentChromo, reads[readCount - 1], 50);
|
||||
}
|
||||
return ArtificialSAMUtils.createArtificialRead(this.header, String.valueOf(reads[read]), currentChromo, reads[read], 50);
|
||||
return ArtificialSAMUtils.createArtificialRead(this.header, String.valueOf(reads[read-1]), currentChromo, reads[read-1], 50);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,90 @@
|
|||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.iterators.NullSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: Jun 11, 2009
|
||||
* Time: 9:35:31 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Pass specified reads into the given walker.
|
||||
*/
|
||||
|
||||
public class ArtificialSAMFileReader extends SAMFileReader {
|
||||
/**
|
||||
* Backing data store of reads.
|
||||
*/
|
||||
private List<SAMRecord> reads = null;
|
||||
|
||||
/**
|
||||
* Construct an artificial SAM file reader.
|
||||
*/
|
||||
public ArtificialSAMFileReader(SAMRecord... reads) {
|
||||
super( createEmptyInputStream(),true );
|
||||
this.reads = Arrays.asList(reads);
|
||||
}
|
||||
|
||||
/**
|
||||
* @{inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public CloseableIterator<SAMRecord> query(final String sequence, final int start, final int end, final boolean contained) {
|
||||
GenomeLoc region = new GenomeLoc(sequence, start, end);
|
||||
List<SAMRecord> coveredSubset = new ArrayList<SAMRecord>();
|
||||
|
||||
for( SAMRecord read: reads ) {
|
||||
GenomeLoc readPosition = new GenomeLoc(read);
|
||||
if( contained && region.containsP(readPosition) ) coveredSubset.add(read);
|
||||
else if( !contained && readPosition.overlapsP(region) ) coveredSubset.add(read);
|
||||
}
|
||||
|
||||
final Iterator<SAMRecord> iterator = coveredSubset.iterator();
|
||||
return new CloseableIterator<SAMRecord>() {
|
||||
public boolean hasNext() { return iterator.hasNext(); }
|
||||
public SAMRecord next() { return iterator.next(); }
|
||||
public void close() {}
|
||||
public void remove() { iterator.remove(); }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an empty input stream for faking out the sam file reader.
|
||||
* Derive it from a string so that, in the future, it might be possible
|
||||
* to fake the text of a sam file from samtools output, et.c
|
||||
* @return Stream that returns no characters.
|
||||
*/
|
||||
private static InputStream createEmptyInputStream() {
|
||||
try {
|
||||
byte[] byteArray = "".getBytes("ISO-8859-1");
|
||||
return new ByteArrayInputStream(byteArray);
|
||||
}
|
||||
catch( UnsupportedEncodingException ex ) {
|
||||
throw new StingException("Unable to build empty input stream",ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -38,7 +38,7 @@ public class ArtificialSAMIterator implements PeekingStingIterator {
|
|||
|
||||
|
||||
protected int currentChromo = 0;
|
||||
protected int currentRead = 0;
|
||||
protected int currentRead = 1;
|
||||
protected int totalReadCount = 0;
|
||||
protected boolean done = false;
|
||||
// the next record
|
||||
|
|
@ -110,9 +110,9 @@ public class ArtificialSAMIterator implements PeekingStingIterator {
|
|||
}
|
||||
|
||||
protected boolean createNextRead() {
|
||||
if (currentRead >= rCount) {
|
||||
if (currentRead > rCount) {
|
||||
currentChromo++;
|
||||
currentRead = 0;
|
||||
currentRead = 1;
|
||||
}
|
||||
// check for end condition, have we finished the chromosome listing, and have no unmapped reads
|
||||
if (currentChromo >= eChromosomeCount) {
|
||||
|
|
|
|||
|
|
@ -174,7 +174,7 @@ public class ArtificialSAMQueryIterator extends ArtificialSAMIterator {
|
|||
|
||||
/** make sure we haven't been used as an iterator yet; this is to miror the MergingSamIterator2 action. */
|
||||
public void ensureUntouched() {
|
||||
if (this.currentChromo != 0 || this.currentRead > 0) {
|
||||
if (this.currentChromo != 0 || this.currentRead > 1) {
|
||||
throw new UnsupportedOperationException("We've already been used as an iterator; you can't query after that");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import java.util.*;
|
|||
|
||||
import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
@ -114,10 +115,12 @@ public class ArtificialSAMUtils {
|
|||
* @return the artificial read
|
||||
*/
|
||||
public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) {
|
||||
if( alignmentStart == 0 )
|
||||
throw new StingException("Invalid alignment start for artificial read");
|
||||
SAMRecord record = new SAMRecord(header);
|
||||
record.setReadName(name);
|
||||
record.setReferenceIndex(refIndex);
|
||||
record.setAlignmentStart(alignmentStart + 1);
|
||||
record.setAlignmentStart(alignmentStart);
|
||||
List<CigarElement> elements = new ArrayList<CigarElement>();
|
||||
elements.add(new CigarElement(length, CigarOperator.characterToEnum('M')));
|
||||
record.setCigar(new Cigar(elements));
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
|||
import static junit.framework.Assert.fail;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,255 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.OutputTracker;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMFileReader;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.File;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import junit.framework.Assert;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: Jun 11, 2009
|
||||
* Time: 9:54:05 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test the walker that injects clean reads into the bam file.
|
||||
*/
|
||||
|
||||
public class CleanedReadInjectorTest extends BaseTest {
|
||||
/**
|
||||
* The fasta, for comparison.
|
||||
*/
|
||||
protected static IndexedFastaSequenceFile sequenceFile = null;
|
||||
|
||||
/**
|
||||
* Initialize the fasta.
|
||||
*/
|
||||
@BeforeClass
|
||||
public static void initialize() throws FileNotFoundException {
|
||||
sequenceFile = new IndexedFastaSequenceFile( new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta") );
|
||||
GenomeLoc.setupRefContigOrdering(sequenceFile);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReads() {
|
||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader();
|
||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||
CleanedReadInjector walker = createWalker( "chr1:1-10", cleanedReads, output );
|
||||
|
||||
walker.initialize();
|
||||
walker.onTraversalDone(0);
|
||||
|
||||
Assert.assertEquals("Too many records in output",0,output.getRecords().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoCleanedReads() {
|
||||
SAMFileHeader header = getMockSAMFileHeader();
|
||||
SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,5);
|
||||
|
||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader();
|
||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||
CleanedReadInjector walker = createWalker( "chr1:1-10", cleanedReads, output );
|
||||
|
||||
int result = runWalkerOverReads(walker,sourceRead);
|
||||
|
||||
Assert.assertEquals("Result of traversal is incorrect",0,result);
|
||||
Assert.assertEquals("Incorrect number of records in output",1,output.getRecords().size());
|
||||
Assert.assertEquals("Output record is incorrect",sourceRead,output.getRecords().get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOneCleanedRead() {
|
||||
SAMFileHeader header = getMockSAMFileHeader();
|
||||
SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||
|
||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceRead));
|
||||
cleanedRead.setReadString("AAAAA");
|
||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||
|
||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||
CleanedReadInjector walker = createWalker( "chr1:1-10", cleanedReads, output );
|
||||
|
||||
int result = runWalkerOverReads(walker,sourceRead);
|
||||
|
||||
Assert.assertEquals("Result of traversal is incorrect",1,result);
|
||||
Assert.assertEquals("Incorrect number of records in output",1,output.getRecords().size());
|
||||
Assert.assertEquals("Output record is incorrect",cleanedRead,output.getRecords().get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPartialIntervalOverlap() {
|
||||
SAMFileHeader header = getMockSAMFileHeader();
|
||||
SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||
|
||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceRead));
|
||||
cleanedRead.setReadString("AAAAA");
|
||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||
|
||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||
CleanedReadInjector walker = createWalker( "chr1:4-12", cleanedReads, output );
|
||||
|
||||
int result = runWalkerOverReads(walker,sourceRead);
|
||||
|
||||
Assert.assertEquals("Result of traversal is incorrect",1,result);
|
||||
Assert.assertEquals("Incorrect number of records in output",1,output.getRecords().size());
|
||||
Assert.assertEquals("Output record is incorrect",cleanedRead,output.getRecords().get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMixedCleanedAndUncleanedNonOverlapping() {
|
||||
SAMFileHeader header = getMockSAMFileHeader();
|
||||
SAMRecord[] sourceReads = new SAMRecord[] { ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5),
|
||||
ArtificialSAMUtils.createArtificialRead(header,"read2",1,2,5),
|
||||
ArtificialSAMUtils.createArtificialRead(header,"read3",1,3,5),
|
||||
ArtificialSAMUtils.createArtificialRead(header,"read4",1,4,5),
|
||||
ArtificialSAMUtils.createArtificialRead(header,"read5",1,5,5) };
|
||||
|
||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,1);
|
||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceReads[0]));
|
||||
cleanedRead.setReadString("AAAAA");
|
||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||
|
||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||
CleanedReadInjector walker = createWalker( "chr1:1-10", cleanedReads, output );
|
||||
|
||||
int result = runWalkerOverReads(walker,sourceReads);
|
||||
|
||||
Assert.assertEquals("Result of traversal is incorrect",1,result);
|
||||
Assert.assertEquals("Incorrect number of records in output",5,output.getRecords().size());
|
||||
for( int i = 0; i < sourceReads.length; i++ ) {
|
||||
if( i == 0 )
|
||||
Assert.assertEquals("Output record is incorrect",cleanedRead,output.getRecords().get(i));
|
||||
else
|
||||
Assert.assertEquals("Output record is incorrect",sourceReads[i],output.getRecords().get(i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAlternateReadOrder() {
|
||||
SAMFileHeader header = getMockSAMFileHeader();
|
||||
SAMRecord[] sourceReads = new SAMRecord[] { ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5),
|
||||
ArtificialSAMUtils.createArtificialRead(header,"read2",1,2,5),
|
||||
ArtificialSAMUtils.createArtificialRead(header,"read3",1,3,5) };
|
||||
|
||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,3,1);
|
||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceReads[0]));
|
||||
cleanedRead.setReadString("AAAAA");
|
||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||
|
||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||
CleanedReadInjector walker = createWalker( "chr1:1-10", cleanedReads, output );
|
||||
|
||||
int result = runWalkerOverReads(walker,sourceReads);
|
||||
|
||||
Assert.assertEquals("Result of traversal is incorrect",1,result);
|
||||
Assert.assertEquals("Incorrect number of records in output",3,output.getRecords().size());
|
||||
Assert.assertEquals("Incorrect read at position 1",cleanedRead,output.getRecords().get(0));
|
||||
Assert.assertEquals("Incorrect read at position 2",sourceReads[1],output.getRecords().get(1));
|
||||
Assert.assertEquals("Incorrect read at position 3",sourceReads[2],output.getRecords().get(2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReadOutsideInterval() {
|
||||
SAMFileHeader header = getMockSAMFileHeader();
|
||||
SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||
|
||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceRead));
|
||||
cleanedRead.setReadString("AAAAA");
|
||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||
|
||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||
CleanedReadInjector walker = createWalker( "chr1:20-50", cleanedReads, output );
|
||||
|
||||
int result = runWalkerOverReads( walker, sourceRead );
|
||||
|
||||
Assert.assertEquals("Result of traversal is incorrect",0,result);
|
||||
Assert.assertEquals("Incorrect number of records in output",1,output.getRecords().size());
|
||||
Assert.assertEquals("Output record is incorrect",sourceRead,output.getRecords().get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleIntervals() {
|
||||
SAMFileHeader header = getMockSAMFileHeader();
|
||||
SAMRecord[] sourceReads = new SAMRecord[] { ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,10),
|
||||
ArtificialSAMUtils.createArtificialRead(header,"read2",1,11,10),
|
||||
ArtificialSAMUtils.createArtificialRead(header,"read3",1,21,10),
|
||||
ArtificialSAMUtils.createArtificialRead(header,"read4",1,31,10),
|
||||
ArtificialSAMUtils.createArtificialRead(header,"read5",1,41,10) };
|
||||
SAMRecord[] cleanedReads = new SAMRecord[sourceReads.length];
|
||||
for( int i = 0; i < sourceReads.length; i++ ) {
|
||||
try {
|
||||
cleanedReads[i] = (SAMRecord)sourceReads[i].clone();
|
||||
cleanedReads[i].setReadName(CleanedReadInjector.getUniquifiedReadName(sourceReads[i]));
|
||||
cleanedReads[i].setReadString("AAAAA");
|
||||
}
|
||||
catch( CloneNotSupportedException ex ) {
|
||||
throw new StingException("Unable to clone samrecord", ex);
|
||||
}
|
||||
}
|
||||
|
||||
ArtificialSAMFileReader cleanedReader = new ArtificialSAMFileReader(cleanedReads);
|
||||
|
||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||
CleanedReadInjector walker = createWalker( "chr1:11-20;chr1:31-40", cleanedReader, output );
|
||||
int result = runWalkerOverReads( walker, sourceReads );
|
||||
|
||||
Assert.assertEquals("Result of traversal is incorrect",2,result);
|
||||
Assert.assertEquals("Incorrect number of records in output",5,output.getRecords().size());
|
||||
Assert.assertEquals("Incorrect read at position 1",sourceReads[0],output.getRecords().get(0));
|
||||
Assert.assertEquals("Incorrect read at position 2",cleanedReads[1],output.getRecords().get(1));
|
||||
Assert.assertEquals("Incorrect read at position 3",sourceReads[2],output.getRecords().get(2));
|
||||
Assert.assertEquals("Incorrect read at position 4",cleanedReads[3],output.getRecords().get(3));
|
||||
Assert.assertEquals("Incorrect read at position 5",sourceReads[4],output.getRecords().get(4));
|
||||
}
|
||||
|
||||
private CleanedReadInjector createWalker( String intervals, ArtificialSAMFileReader cleanedReads, ArtificialSAMFileWriter output ) {
|
||||
CleanedReadInjector walker = new CleanedReadInjector();
|
||||
|
||||
walker.intervalsSource = intervals;
|
||||
walker.cleanedReadsSource = cleanedReads;
|
||||
walker.outputBAM = output;
|
||||
|
||||
walker.initializeOutputStreams( new OutputTracker(null,null) );
|
||||
|
||||
return walker;
|
||||
}
|
||||
|
||||
private SAMFileHeader getMockSAMFileHeader() {
|
||||
return ArtificialSAMUtils.createArtificialSamHeader(2,0,247249719);
|
||||
}
|
||||
|
||||
private Integer runWalkerOverReads( CleanedReadInjector walker, SAMRecord... reads ) {
|
||||
walker.initialize();
|
||||
Integer accum = walker.reduceInit();
|
||||
for( SAMRecord sourceRead: reads ) {
|
||||
Integer value = walker.map( null, sourceRead );
|
||||
accum = walker.reduce(value,accum);
|
||||
}
|
||||
walker.onTraversalDone(accum);
|
||||
return accum;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -61,7 +61,7 @@ public class ArtificialSAMFileWriterTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testBasicCount() {
|
||||
for (int x = 0; x < 100; x++) {
|
||||
for (int x = 1; x <= 100; x++) {
|
||||
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, String.valueOf(x), 1, x, ArtificialSAMUtils.DEFAULT_READ_LENGTH);
|
||||
writer.addAlignment(rec);
|
||||
}
|
||||
|
|
@ -73,7 +73,7 @@ public class ArtificialSAMFileWriterTest extends BaseTest {
|
|||
public void testReadName() {
|
||||
List<String> names = new ArrayList<String>();
|
||||
|
||||
for (int x = 0; x < 100; x++) {
|
||||
for (int x = 1; x <= 100; x++) {
|
||||
names.add(String.valueOf(x));
|
||||
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, String.valueOf(x), 1, x, ArtificialSAMUtils.DEFAULT_READ_LENGTH);
|
||||
writer.addAlignment(rec);
|
||||
|
|
|
|||
Loading…
Reference in New Issue