N-Way-Out is back. Now uses SAMReadID to identify each read's source bam, so should be reliable. Interface is sort of ugly fo now: to generate output file names, .bam is stripped from input file names, then the value of -nWayOut argument is pasted on (and all the output files are written into the current dir).
Unrelated change: in the sorted-target mode (when we read sorted target intervals one by on from a file), one can now specify multiple semicolon-separated interval files (all must be sorted). Not hugely useful probably, but makes --targetIntervals always process its values in exactly the same way, so we are consistent (it has been already taking ;-separated args in unsorted mode) NwayIntervalMergingIterator: reads in multiple sorted GenomeLoc input streams (iterators) and presents them as a single sorted and merged stream git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4602 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
23cb399a88
commit
aadd230636
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.commandline.*;
|
|||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -42,8 +43,10 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
|||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator;
|
||||
import org.broadinstitute.sting.utils.interval.NwayIntervalMergingIterator;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -100,21 +103,42 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
"if this value is exceeded, realignment is not attempted and the reads are passed to the output file(s) as-is", required=false)
|
||||
protected int MAX_READS = 20000;
|
||||
|
||||
@Argument(fullName="sortInCoordinateOrderEvenThoughItIsHighlyUnsafe", required=false, doc="Should we sort the final bam in coordinate order even though it will be malformed because mate pairs of realigned reads will contain inaccurate information?")
|
||||
@Argument(fullName="sortInCoordinateOrderEvenThoughItIsHighlyUnsafe", required=false,
|
||||
doc="Should we sort the final bam in coordinate order even though it will be malformed because "+
|
||||
"mate pairs of realigned reads will contain inaccurate information?")
|
||||
protected boolean SORT_IN_COORDINATE_ORDER = false;
|
||||
|
||||
@Argument(fullName="realignReadsWithBadMates", required=false, doc="Should we try to realign paired-end reads whose mates map to other chromosomes?")
|
||||
@Argument(fullName="realignReadsWithBadMates", required=false,
|
||||
doc="Should we try to realign paired-end reads whose mates map to other chromosomes?")
|
||||
protected boolean REALIGN_BADLY_MATED_READS = false;
|
||||
|
||||
@Argument(fullName="noPGTag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the realigned bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
||||
@Argument(fullName="noPGTag", shortName="noPG", required=false,
|
||||
doc="Don't output the usual PG tag in the realigned bam file header. FOR DEBUGGING PURPOSES ONLY. "+
|
||||
"This option is required in order to pass integration tests.")
|
||||
protected boolean NO_PG_TAG = false;
|
||||
|
||||
@Argument(fullName="noOriginalAlignmentTags", shortName="noTags", required=false, doc="Don't output the original cigar or alignment start tags for each realigned read in the output bam.")
|
||||
@Argument(fullName="noOriginalAlignmentTags", shortName="noTags", required=false,
|
||||
doc="Don't output the original cigar or alignment start tags for each realigned read in the output bam.")
|
||||
protected boolean NO_ORIGINAL_ALIGNMENT_TAGS = false;
|
||||
|
||||
@Argument(fullName="targetIntervalsAreNotSorted", shortName="targetNotSorted", required=false, doc="This tool assumes that the target interval list is sorted; if the list turns out to be unsorted, it will throw an exception. Use this argument when your interval list is not sorted to instruct the Realigner to first sort it in memory.")
|
||||
@Argument(fullName="targetIntervalsAreNotSorted", shortName="targetNotSorted", required=false,
|
||||
doc="This tool assumes that the target interval list is sorted; if the list turns out to be unsorted, "+
|
||||
"it will throw an exception. Use this argument when your interval list is not sorted to instruct "+"" +
|
||||
"the Realigner to first sort it in memory.")
|
||||
protected boolean TARGET_NOT_SORTED = false;
|
||||
|
||||
//NWay output: testing, not ready for the prime time, hence hidden:
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="nWayOut", shortName="nWayOut", required=false,
|
||||
doc="In this mode, there will be one output file for each input (-I) bam file. Reads from all input files "+
|
||||
"will be realigned together, but then each read wiil be saved in the output file corresponding to "+
|
||||
"the input file the read came from. The names of the output bam files will be constructed by "+
|
||||
"stripping extensions (\".bam\" or \".sam\") from the input file names and pasting the value "+
|
||||
"of -nWayOut argument to the result.")
|
||||
protected String N_WAY_OUT = null;
|
||||
|
||||
|
||||
// DEBUGGING OPTIONS FOLLOW
|
||||
|
||||
@Hidden
|
||||
|
|
@ -166,6 +190,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
private FileWriter statsOutput = null;
|
||||
private FileWriter snpsOutput = null;
|
||||
|
||||
protected Map<SAMReaderID,SAMFileWriter> nwayWriters = null;
|
||||
|
||||
public void initialize() {
|
||||
|
||||
if ( LOD_THRESHOLD < 0.0 )
|
||||
|
|
@ -175,9 +201,22 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
|
||||
referenceReader = new IndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
|
||||
|
||||
if ( !TARGET_NOT_SORTED && IntervalUtils.isIntervalFile(intervalsFile)) {
|
||||
// prepare to read intervals one-by-one, as needed (assuming they are sorted).
|
||||
intervals = new IntervalFileMergingIterator( new java.io.File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
|
||||
if ( !TARGET_NOT_SORTED ) {
|
||||
|
||||
NwayIntervalMergingIterator merger = new NwayIntervalMergingIterator(IntervalMergingRule.OVERLAPPING_ONLY);
|
||||
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>();
|
||||
// separate argument on semicolon first
|
||||
for (String fileOrInterval : intervalsFile.split(";")) {
|
||||
// if it's a file, add items to raw interval list
|
||||
if (IntervalUtils.isIntervalFile(fileOrInterval)) {
|
||||
merger.add(new IntervalFileMergingIterator( new java.io.File(fileOrInterval), IntervalMergingRule.OVERLAPPING_ONLY ) );
|
||||
} else {
|
||||
rawIntervals.add(GenomeLocParser.parseGenomeInterval(fileOrInterval));
|
||||
}
|
||||
}
|
||||
if ( ! rawIntervals.isEmpty() ) merger.add(rawIntervals.iterator());
|
||||
// prepare to read intervals one-by-one, as needed (assuming they are sorted).
|
||||
intervals = merger;
|
||||
} else {
|
||||
// read in the whole list of intervals for cleaning
|
||||
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(IntervalUtils.parseIntervalArguments(Arrays.asList(intervalsFile),this.getToolkit().getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), IntervalMergingRule.OVERLAPPING_ONLY);
|
||||
|
|
@ -185,6 +224,36 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
currentInterval = intervals.hasNext() ? intervals.next() : null;
|
||||
|
||||
if ( N_WAY_OUT != null ) {
|
||||
|
||||
if ( writer != null ) throw new UserException.BadInput("-nWayOut and -o arguments are mutually exclusive");
|
||||
|
||||
nwayWriters = new HashMap<SAMReaderID,SAMFileWriter>();
|
||||
|
||||
for ( SAMReaderID rid : getToolkit().getDataSource().getReaderIDs() ) {
|
||||
|
||||
String fName = getToolkit().getDataSource().getSAMFile(rid).getName();
|
||||
|
||||
int pos ;
|
||||
if ( fName.toUpperCase().endsWith(".BAM") ) pos = fName.toUpperCase().lastIndexOf(".BAM");
|
||||
else {
|
||||
if ( fName.toUpperCase().endsWith(".SAM") ) pos = fName.toUpperCase().lastIndexOf(".SAM");
|
||||
else throw new UserException.BadInput("Input file name "+fName+" does not end with .sam or .bam");
|
||||
}
|
||||
String prefix = fName.substring(0,pos);
|
||||
|
||||
if ( nwayWriters.containsKey( rid ) )
|
||||
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
|
||||
|
||||
SAMFileWriterImpl.setDefaultMaxRecordsInRam(MAX_RECORDS_IN_RAM);
|
||||
SAMFileWriter sw = new SAMFileWriterFactory().makeSAMOrBAMWriter(setupHeader(getToolkit().getSAMFileHeader(rid)),
|
||||
false,new File(prefix+N_WAY_OUT));
|
||||
nwayWriters.put(rid,sw);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// set up the output writer(s)
|
||||
if ( writer != null )
|
||||
setupWriter(getToolkit().getSAMFileHeader());
|
||||
|
|
@ -218,6 +287,14 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
private SAMFileHeader setupHeader(SAMFileHeader header) {
|
||||
if ( SORT_IN_COORDINATE_ORDER )
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
|
||||
else
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.queryname);
|
||||
return header;
|
||||
}
|
||||
|
||||
private void setupWriter(SAMFileHeader header) {
|
||||
if ( SORT_IN_COORDINATE_ORDER )
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
|
||||
|
|
@ -248,6 +325,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
private void emit(final SAMRecord read) {
|
||||
if ( writer != null )
|
||||
writer.addAlignment(read);
|
||||
if ( N_WAY_OUT != null ) {
|
||||
SAMReaderID rid = getToolkit().getReaderIDForRead(read);
|
||||
SAMFileWriter w = nwayWriters.get(rid);
|
||||
w.addAlignment(read);
|
||||
}
|
||||
}
|
||||
|
||||
private void emit(final List<SAMRecord> reads) {
|
||||
|
|
@ -255,6 +337,15 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
for ( SAMRecord read : reads )
|
||||
writer.addAlignment(read);
|
||||
}
|
||||
if ( N_WAY_OUT != null ) {
|
||||
for ( SAMRecord read : reads ) {
|
||||
// in initialize() we ensured that every reader has exactly one tag, so the following line is safe:
|
||||
SAMReaderID rid = getToolkit().getReaderIDForRead(read);
|
||||
SAMFileWriter w = nwayWriters.get(rid);
|
||||
w.addAlignment(read);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
|
|
@ -310,6 +401,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
readsToClean.clear();
|
||||
readsNotToClean.clear();
|
||||
currentInterval = intervals.hasNext() ? intervals.next() : null;
|
||||
|
||||
}
|
||||
|
||||
private boolean doNotTryToClean(SAMRecord read) {
|
||||
|
|
@ -334,6 +426,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
try {
|
||||
do {
|
||||
currentInterval = intervals.hasNext() ? intervals.next() : null;
|
||||
|
||||
} while ( currentInterval != null && (readLoc == null || currentInterval.isBefore(readLoc)) );
|
||||
} catch (ReviewedStingException e) {
|
||||
throw new UserException.MissortedFile(new File(intervalsFile), " *** Are you sure that your interval file is sorted? If not, you must use the --targetIntervalsAreNotSorted argument. ***", e);
|
||||
|
|
@ -383,6 +476,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
logger.error("Failed to close "+OUT_SNPS+" gracefully. Data may be corrupt.");
|
||||
}
|
||||
}
|
||||
|
||||
if ( N_WAY_OUT != null ) {
|
||||
for ( SAMFileWriter w : nwayWriters.values() ) w.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void populateKnownIndels(ReadMetaDataTracker metaDataTracker, ReferenceContext ref) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,202 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.interval;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Oct 28, 2010
|
||||
* Time: 12:06:23 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An adapter over a collection of underlying Iterator<GenomeLoc> objects (a single underlying iterator is allowed). Each
|
||||
* individual underlying iterator must serve its intervals in coordinate-sorted order or an exception will be thrown.
|
||||
* Intervals from individual underlying streams (iterators) are 1) merged into a single ordered stream; 2) each group of
|
||||
* overlapping intervals from that merged stream are merged into a single interval; each call to next() returns such
|
||||
* merged interval guaranteed to have no overlaps with the previous or next interval.
|
||||
*
|
||||
*/
|
||||
public class NwayIntervalMergingIterator implements Iterator<GenomeLoc>, Iterable<GenomeLoc> {
|
||||
|
||||
private PriorityQueue<Element> queue = null;
|
||||
private IntervalMergingRule myRule;
|
||||
|
||||
public NwayIntervalMergingIterator(IntervalMergingRule rule) {
|
||||
myRule = rule;
|
||||
queue = new PriorityQueue<Element>();
|
||||
}
|
||||
|
||||
public void add(Iterator<GenomeLoc> it) {
|
||||
Element e = new Element(it);
|
||||
if ( ! e.isEmpty() ) queue.add(e);
|
||||
}
|
||||
|
||||
public Iterator<GenomeLoc> iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <tt>true</tt> if the iteration has more elements. (In other
|
||||
* words, returns <tt>true</tt> if <tt>next</tt> would return an element
|
||||
* rather than throwing an exception.)
|
||||
*
|
||||
* @return <tt>true</tt> if the iterator has more elements.
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
return ! queue.isEmpty(); //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next element in the iteration.
|
||||
*
|
||||
* @return the next element in the iteration.
|
||||
* @throws java.util.NoSuchElementException
|
||||
* iteration has no more elements.
|
||||
*/
|
||||
public GenomeLoc next() {
|
||||
Element e = queue.poll();
|
||||
GenomeLoc result = e.current;
|
||||
|
||||
// advance element (i.e. its underlying iterator) and reinsert into the queue
|
||||
e.advance();
|
||||
if ( ! e.isEmpty() ) queue.add(e);
|
||||
|
||||
while ( ! queue.isEmpty () ) {
|
||||
e = queue.peek();
|
||||
|
||||
if (result.overlapsP(e.current) || myRule == IntervalMergingRule.ALL && result.contiguousP(e.current)) {
|
||||
// we need to merge:
|
||||
result = result.merge(e.current);
|
||||
|
||||
// remove current head of the queue that we just merged into the result:
|
||||
e = queue.poll();
|
||||
// advance element we just merged into the result and reinsert it into the queue (if it has any data left):
|
||||
e.advance();
|
||||
if ( ! e.isEmpty() ) queue.add(e);
|
||||
|
||||
} else {
|
||||
// next element does not overlap with current result; we are done: return the result and that
|
||||
// next element will wait for next call to next()
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
return result; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes from the underlying collection the last element returned by the
|
||||
* iterator (optional operation). This method can be called only once per
|
||||
* call to <tt>next</tt>. The behavior of an iterator is unspecified if
|
||||
* the underlying collection is modified while the iteration is in
|
||||
* progress in any way other than by calling this method.
|
||||
*
|
||||
* @throws UnsupportedOperationException if the <tt>remove</tt>
|
||||
* operation is not supported by this Iterator.
|
||||
* @throws IllegalStateException if the <tt>next</tt> method has not
|
||||
* yet been called, or the <tt>remove</tt> method has already
|
||||
* been called after the last call to the <tt>next</tt>
|
||||
* method.
|
||||
*/
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("remove() method not supported by this iterator");
|
||||
}
|
||||
|
||||
private class Element implements Comparable<Element> {
|
||||
private Iterator<GenomeLoc> it;
|
||||
private GenomeLoc current = null;
|
||||
|
||||
private void advance() {
|
||||
if ( it.hasNext() ) {
|
||||
GenomeLoc next = it.next();
|
||||
if ( next.isBefore(current) ) {
|
||||
throw new UserException("Interval list provided by underlying iterator "+it.getClass().getName() +" is out of order");
|
||||
}
|
||||
current = next;
|
||||
}
|
||||
else current = null;
|
||||
}
|
||||
|
||||
public boolean isEmpty() { return current == null; }
|
||||
|
||||
public Element(Iterator<GenomeLoc> it) {
|
||||
this.it = it;
|
||||
if ( this.it.hasNext() ) current = this.it.next();
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares this object with the specified object for order. Returns a
|
||||
* negative integer, zero, or a positive integer as this object is less
|
||||
* than, equal to, or greater than the specified object.
|
||||
* <p/>
|
||||
* <p>The implementor must ensure <tt>sgn(x.compareTo(y)) ==
|
||||
* -sgn(y.compareTo(x))</tt> for all <tt>x</tt> and <tt>y</tt>. (This
|
||||
* implies that <tt>x.compareTo(y)</tt> must throw an exception iff
|
||||
* <tt>y.compareTo(x)</tt> throws an exception.)
|
||||
* <p/>
|
||||
* <p>The implementor must also ensure that the relation is transitive:
|
||||
* <tt>(x.compareTo(y)>0 && y.compareTo(z)>0)</tt> implies
|
||||
* <tt>x.compareTo(z)>0</tt>.
|
||||
* <p/>
|
||||
* <p>Finally, the implementor must ensure that <tt>x.compareTo(y)==0</tt>
|
||||
* implies that <tt>sgn(x.compareTo(z)) == sgn(y.compareTo(z))</tt>, for
|
||||
* all <tt>z</tt>.
|
||||
* <p/>
|
||||
* <p>It is strongly recommended, but <i>not</i> strictly required that
|
||||
* <tt>(x.compareTo(y)==0) == (x.equals(y))</tt>. Generally speaking, any
|
||||
* class that implements the <tt>Comparable</tt> interface and violates
|
||||
* this condition should clearly indicate this fact. The recommended
|
||||
* language is "Note: this class has a natural ordering that is
|
||||
* inconsistent with equals."
|
||||
* <p/>
|
||||
* <p>In the foregoing description, the notation
|
||||
* <tt>sgn(</tt><i>expression</i><tt>)</tt> designates the mathematical
|
||||
* <i>signum</i> function, which is defined to return one of <tt>-1</tt>,
|
||||
* <tt>0</tt>, or <tt>1</tt> according to whether the value of
|
||||
* <i>expression</i> is negative, zero or positive.
|
||||
*
|
||||
* @param o the object to be compared.
|
||||
* @return a negative integer, zero, or a positive integer as this object
|
||||
* is less than, equal to, or greater than the specified object.
|
||||
* @throws ClassCastException if the specified object's type prevents it
|
||||
* from being compared to this object.
|
||||
*/
|
||||
public int compareTo(Element o) {
|
||||
if ( current == null ) return 1;
|
||||
if ( o.current == null ) return -1;
|
||||
return current.compareTo(o.current); //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.interval;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.junit.Assert;
|
||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Iterator;
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Oct 28, 2010
|
||||
* Time: 2:46:03 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class NwayIntervalMergingIteratorUnitTest extends BaseTest {
|
||||
|
||||
private static File refFile = new File(validationDataLocation + "Homo_sapiens_assembly17.fasta");
|
||||
|
||||
private static List<GenomeLoc> stream1 = null;
|
||||
private static List<GenomeLoc> stream2 = null;
|
||||
private static List<GenomeLoc> expected = null;
|
||||
|
||||
@BeforeClass
|
||||
public static void init() {
|
||||
GenomeLocParser.setupRefContigOrdering(ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile));
|
||||
|
||||
stream1 = new ArrayList<GenomeLoc>();
|
||||
stream2 = new ArrayList<GenomeLoc>();
|
||||
expected = new ArrayList<GenomeLoc>();
|
||||
|
||||
stream1.add(GenomeLocParser.createGenomeLoc("chr1",1554,1560)); // 1
|
||||
stream1.add(GenomeLocParser.createGenomeLoc("chr1",2538,2568)); // 3
|
||||
stream1.add(GenomeLocParser.createGenomeLoc("chr1",2600,2610)); // 4
|
||||
stream1.add(GenomeLocParser.createGenomeLoc("chr1",2609,2625)); // 4
|
||||
stream1.add(GenomeLocParser.createGenomeLoc("chr1",18932,19000)); // 6
|
||||
stream1.add(GenomeLocParser.createGenomeLoc("chr1",19001,25000)); //6
|
||||
|
||||
stream2.add(GenomeLocParser.createGenomeLoc("chr1",1565,1570)); //2
|
||||
stream2.add(GenomeLocParser.createGenomeLoc("chr1",2598,2604)); // 4
|
||||
stream2.add(GenomeLocParser.createGenomeLoc("chr1",7415,7600)); // 5
|
||||
stream2.add(GenomeLocParser.createGenomeLoc("chr1",18932,25000)); // 6
|
||||
stream2.add(GenomeLocParser.createGenomeLoc("chr1",30000,35000)); // 7
|
||||
|
||||
expected.add(GenomeLocParser.createGenomeLoc("chr1",1554,1560)); // 1
|
||||
expected.add(GenomeLocParser.createGenomeLoc("chr1",1565,1570)); //2
|
||||
expected.add(GenomeLocParser.createGenomeLoc("chr1",2538,2568)); // 3
|
||||
expected.add(GenomeLocParser.createGenomeLoc("chr1",2598,2625)); // 4
|
||||
expected.add(GenomeLocParser.createGenomeLoc("chr1",7415,7600)); // 5
|
||||
expected.add(GenomeLocParser.createGenomeLoc("chr1",18932,25000)); // 6
|
||||
expected.add(GenomeLocParser.createGenomeLoc("chr1",30000,35000)); // 7
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNwayIntervalMergingIterator() {
|
||||
logger.warn("testNwayIntervalMergingIterator");
|
||||
|
||||
Iterator<GenomeLoc> it1 = stream1.iterator();
|
||||
Iterator<GenomeLoc> it2 = stream2.iterator();
|
||||
|
||||
Iterator<GenomeLoc> e_it = expected.iterator();
|
||||
|
||||
|
||||
|
||||
NwayIntervalMergingIterator it = new NwayIntervalMergingIterator(IntervalMergingRule.OVERLAPPING_ONLY);
|
||||
it.add(it1);
|
||||
it.add(it2);
|
||||
|
||||
while(it.hasNext()) {
|
||||
GenomeLoc l = it.next();
|
||||
GenomeLoc l_expected = e_it.next();
|
||||
//System.out.println("int: "+l+" expected: "+l_expected) ;
|
||||
Assert.assertEquals("Unexpected location returned by the iterator: "+l,l,l_expected);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue