Added support for HangingLocusIterator
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@42 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8a63606e11
commit
04befb942e
|
|
@ -40,6 +40,7 @@ public class AnalysisTK extends CommandLineProgram {
|
||||||
addModule("Genotype", new GenotypeWalker());
|
addModule("Genotype", new GenotypeWalker());
|
||||||
addModule("SingleSampleGenotyper", new SingleSampleGenotyper());
|
addModule("SingleSampleGenotyper", new SingleSampleGenotyper());
|
||||||
addModule("Null", new NullWalker());
|
addModule("Null", new NullWalker());
|
||||||
|
addModule("DepthOfCoverage", new DepthOfCoverageWalker());
|
||||||
}
|
}
|
||||||
|
|
||||||
private TraversalEngine engine = null;
|
private TraversalEngine engine = null;
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@ import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: mdepristo
|
* User: mdepristo
|
||||||
|
|
@ -11,18 +13,17 @@ import java.util.List;
|
||||||
* Time: 3:01:34 PM
|
* Time: 3:01:34 PM
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public class LocusContext {
|
public interface LocusContext {
|
||||||
public LocusContext() { };
|
|
||||||
|
|
||||||
// How big is the current context?
|
|
||||||
public int getLength() { return 1; }
|
|
||||||
|
|
||||||
// get the reference base at the current (relative) position
|
|
||||||
public byte getReferenceBase() { return 0; }
|
|
||||||
|
|
||||||
// get all of the reads within this context
|
// get all of the reads within this context
|
||||||
public List<SAMRecord> getReads() { return null; }
|
public List<SAMRecord> getReads();
|
||||||
|
|
||||||
// get a list of the equivalent positions within in the reads at Pos
|
// get a list of the equivalent positions within in the reads at Pos
|
||||||
public List<Integer> getOffsets() { return null; }
|
public List<Integer> getOffsets();
|
||||||
|
|
||||||
|
|
||||||
|
public String getContig();
|
||||||
|
public long getPosition();
|
||||||
|
public GenomeLoc getLocation();
|
||||||
|
|
||||||
|
public int numReads();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -14,37 +14,13 @@ import java.util.Iterator;
|
||||||
/**
|
/**
|
||||||
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
|
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
|
||||||
*/
|
*/
|
||||||
public class LocusIterator implements Iterable<LocusIterator>, CloseableIterator<LocusIterator> {
|
public abstract class LocusIterator implements Iterable<LocusContext>, CloseableIterator<LocusContext> {
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// member fields
|
|
||||||
//
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
private final PushbackIterator<SAMRecord> it;
|
|
||||||
private String contig = null;
|
|
||||||
private int position = -1;
|
|
||||||
private List<SAMRecord> reads = new ArrayList<SAMRecord>(100);
|
|
||||||
private List<Integer> offsets = new ArrayList<Integer>(100);
|
|
||||||
|
|
||||||
protected String getContig() { return contig; }
|
|
||||||
protected long getPosition() { return position; }
|
|
||||||
public GenomeLoc getLocation() { return new GenomeLoc(contig, position); }
|
|
||||||
|
|
||||||
public List<SAMRecord> getReads() { return reads; }
|
|
||||||
public List<Integer> getOffsets() { return offsets; }
|
|
||||||
public int numReads() { return reads.size(); }
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// constructors and other basic operations
|
// constructors and other basic operations
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
public LocusIterator(final CloseableIterator<SAMRecord> samIterator) {
|
public Iterator<LocusContext> iterator() {
|
||||||
this.it = new PushbackIterator<SAMRecord>(samIterator);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Iterator<LocusIterator> iterator() {
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -52,95 +28,8 @@ public class LocusIterator implements Iterable<LocusIterator>, CloseableIterator
|
||||||
//this.it.close();
|
//this.it.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasNext() {
|
public abstract boolean hasNext();
|
||||||
return it.hasNext();
|
public abstract LocusContext next();
|
||||||
}
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// next() routine and associated collection operations
|
|
||||||
//
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
public LocusIterator next() {
|
|
||||||
position += 1;
|
|
||||||
|
|
||||||
if ( position != -1 ) {
|
|
||||||
cleanReads();
|
|
||||||
expandReads();
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( reads.isEmpty() ) {
|
|
||||||
// the window is empty, we need to jump to the first pos of the first read in the stream
|
|
||||||
SAMRecord read = it.next();
|
|
||||||
pushRead(read);
|
|
||||||
contig = read.getReferenceName();
|
|
||||||
position = read.getAlignmentStart() - 1;
|
|
||||||
return next();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// at this point, window contains all reads covering the pos, we need to return them
|
|
||||||
// and the offsets into each read for this loci
|
|
||||||
calcOffsetsOfWindow(position);
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void pushRead(SAMRecord read) {
|
|
||||||
//System.out.printf(" -> Adding read %s %d-%d flags %s%n", read.getReadName(), read.getAlignmentStart(), read.getAlignmentEnd(), Utils.readFlagsAsString(read));
|
|
||||||
reads.add(read);
|
|
||||||
}
|
|
||||||
|
|
||||||
class KeepReadPFunc implements Predicate<SAMRecord> {
|
|
||||||
public boolean apply(SAMRecord read) {
|
|
||||||
return position >= read.getAlignmentStart() &&
|
|
||||||
position < read.getAlignmentEnd() &&
|
|
||||||
read.getReferenceName().equals(contig); // should be index for efficiency
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Predicate KeepReadP = new LocusIterator.KeepReadPFunc();
|
|
||||||
|
|
||||||
private void calcOffsetsOfWindow(final int position) {
|
|
||||||
offsets.clear();
|
|
||||||
for ( SAMRecord read : reads ) {
|
|
||||||
// def calcOffset( read ):
|
|
||||||
// offset = self.pos - read.start
|
|
||||||
// return offset
|
|
||||||
//
|
|
||||||
// offsets = map(calcOffset, self.window)
|
|
||||||
final int offset = position - read.getAlignmentStart();
|
|
||||||
assert(offset < read.getReadLength() );
|
|
||||||
offsets.add(offset);
|
|
||||||
//System.out.printf("offsets [%d] %s%n", read.getAlignmentStart(), offsets);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void cleanReads() {
|
|
||||||
// def keepReadP( read ):
|
|
||||||
// return read.chr == chr and pos >= read.start and pos <= read.end
|
|
||||||
// self.window = filter( keepReadP, self.window )
|
|
||||||
reads = Utils.filter(KeepReadP, reads);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void expandReads() {
|
|
||||||
// for read in self.rs:
|
|
||||||
// #print 'read', read, pos
|
|
||||||
// if read.chr == chr and read.start <= pos and read.end >= pos:
|
|
||||||
// self.pushRead(read)
|
|
||||||
// else:
|
|
||||||
// self.rs.unget( read )
|
|
||||||
// #self.rs = chain( [read], self.rs )
|
|
||||||
// break
|
|
||||||
while ( it.hasNext() ) {
|
|
||||||
SAMRecord read = it.next();
|
|
||||||
if ( KeepReadP.apply( read ) ) {
|
|
||||||
pushRead(read);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
it.pushback(read);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void remove() {
|
public void remove() {
|
||||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||||
|
|
|
||||||
|
|
@ -17,10 +17,10 @@ public interface LocusWalker<MapType, ReduceType> {
|
||||||
public String walkerType();
|
public String walkerType();
|
||||||
|
|
||||||
// Do we actually want to operate on the context?
|
// Do we actually want to operate on the context?
|
||||||
boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context);
|
boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context);
|
||||||
|
|
||||||
// Map over the org.broadinstitute.sting.atk.LocusContext
|
// Map over the org.broadinstitute.sting.atk.LocusContext
|
||||||
MapType map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context);
|
MapType map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context);
|
||||||
|
|
||||||
// Given result of map function
|
// Given result of map function
|
||||||
ReduceType reduceInit();
|
ReduceType reduceInit();
|
||||||
|
|
|
||||||
|
|
@ -383,12 +383,6 @@ public class TraversalEngine {
|
||||||
result = true;
|
result = true;
|
||||||
why = "No alignment start";
|
why = "No alignment start";
|
||||||
}
|
}
|
||||||
else if ( rec.getCigar().numCigarElements() > 1 ) {
|
|
||||||
// FIXME -- deal with indels correctly!
|
|
||||||
nSkippedIndels++;
|
|
||||||
result = true;
|
|
||||||
why = "Skipping indel: " + rec.getCigarString();
|
|
||||||
}
|
|
||||||
else {
|
else {
|
||||||
result = false;
|
result = false;
|
||||||
}
|
}
|
||||||
|
|
@ -417,7 +411,8 @@ public class TraversalEngine {
|
||||||
protected <M,T> int traverseByLoci(LocusWalker<M,T> walker) {
|
protected <M,T> int traverseByLoci(LocusWalker<M,T> walker) {
|
||||||
// prepare the read filtering read iterator and provide it to a new locus iterator
|
// prepare the read filtering read iterator and provide it to a new locus iterator
|
||||||
FilteringIterator filterIter = new FilteringIterator(samReadIter, new locusStreamFilterFunc());
|
FilteringIterator filterIter = new FilteringIterator(samReadIter, new locusStreamFilterFunc());
|
||||||
CloseableIterator<LocusIterator> iter = new LocusIterator(filterIter);
|
//LocusIterator iter = new SingleLocusIterator(filterIter);
|
||||||
|
LocusIterator iter = new LocusIteratorByHanger(filterIter);
|
||||||
|
|
||||||
// Initial the reference ordered data iterators
|
// Initial the reference ordered data iterators
|
||||||
List<ReferenceOrderedData.RODIterator> rodIters = initializeRODs();
|
List<ReferenceOrderedData.RODIterator> rodIters = initializeRODs();
|
||||||
|
|
@ -432,7 +427,7 @@ public class TraversalEngine {
|
||||||
this.nRecords++;
|
this.nRecords++;
|
||||||
|
|
||||||
// actually get the read and hand it to the walker
|
// actually get the read and hand it to the walker
|
||||||
final LocusIterator locus = iter.next();
|
final LocusContext locus = iter.next();
|
||||||
|
|
||||||
// Poor man's version of index LOL
|
// Poor man's version of index LOL
|
||||||
if ( inLocations(locus.getLocation()) ) {
|
if ( inLocations(locus.getLocation()) ) {
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import org.broadinstitute.sting.atk.LocusWalker;
|
import org.broadinstitute.sting.atk.LocusWalker;
|
||||||
import org.broadinstitute.sting.atk.LocusIterator;
|
import org.broadinstitute.sting.atk.LocusIterator;
|
||||||
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
|
@ -22,7 +23,7 @@ public abstract class BasicLociWalker<MapType, ReduceType> implements LocusWalke
|
||||||
public String walkerType() { return "ByLocus"; }
|
public String walkerType() { return "ByLocus"; }
|
||||||
|
|
||||||
// Do we actually want to operate on the context?
|
// Do we actually want to operate on the context?
|
||||||
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -30,7 +31,7 @@ public abstract class BasicLociWalker<MapType, ReduceType> implements LocusWalke
|
||||||
}
|
}
|
||||||
|
|
||||||
// These three capabilities must be overidden
|
// These three capabilities must be overidden
|
||||||
public abstract MapType map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context);
|
public abstract MapType map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context);
|
||||||
public abstract ReduceType reduceInit();
|
public abstract ReduceType reduceInit();
|
||||||
public abstract ReduceType reduce(MapType value, ReduceType sum);
|
public abstract ReduceType reduce(MapType value, ReduceType sum);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
package org.broadinstitute.sting.atk.modules;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import org.broadinstitute.sting.atk.LocusIterator;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -13,7 +13,7 @@ import java.util.List;
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public class CountLociWalker extends BasicLociWalker<Integer, Integer> {
|
public class CountLociWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import org.broadinstitute.sting.atk.LocusIterator;
|
import org.broadinstitute.sting.atk.LocusIterator;
|
||||||
import org.broadinstitute.sting.atk.GenotypeEvidence;
|
import org.broadinstitute.sting.atk.GenotypeEvidence;
|
||||||
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
|
@ -10,7 +11,7 @@ import java.util.List;
|
||||||
import static java.lang.System.currentTimeMillis;
|
import static java.lang.System.currentTimeMillis;
|
||||||
|
|
||||||
public class GenotypeWalker extends BasicLociWalker<Integer, Integer> {
|
public class GenotypeWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||||
//char[] = new char(26);
|
//char[] = new char(26);
|
||||||
long start_tm = currentTimeMillis();
|
long start_tm = currentTimeMillis();
|
||||||
List<SAMRecord> reads = context.getReads();
|
List<SAMRecord> reads = context.getReads();
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import org.broadinstitute.sting.atk.LocusWalker;
|
import org.broadinstitute.sting.atk.LocusWalker;
|
||||||
import org.broadinstitute.sting.atk.LocusIterator;
|
import org.broadinstitute.sting.atk.LocusIterator;
|
||||||
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.utils.rodDbSNP;
|
import org.broadinstitute.sting.utils.rodDbSNP;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
@ -19,12 +20,12 @@ public class NullWalker implements LocusWalker<Integer, Integer> {
|
||||||
public String walkerType() { return "ByLocus"; }
|
public String walkerType() { return "ByLocus"; }
|
||||||
|
|
||||||
// Do we actually want to operate on the context?
|
// Do we actually want to operate on the context?
|
||||||
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map over the org.broadinstitute.sting.atk.LocusContext
|
// Map over the org.broadinstitute.sting.atk.LocusContext
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context)
|
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context)
|
||||||
{
|
{
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import org.broadinstitute.sting.atk.LocusWalker;
|
import org.broadinstitute.sting.atk.LocusWalker;
|
||||||
import org.broadinstitute.sting.atk.LocusIterator;
|
import org.broadinstitute.sting.atk.LocusIterator;
|
||||||
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.utils.rodDbSNP;
|
import org.broadinstitute.sting.utils.rodDbSNP;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
@ -23,12 +24,12 @@ public class PileupWalker implements LocusWalker<Integer, Integer> {
|
||||||
public String walkerType() { return "ByLocus"; }
|
public String walkerType() { return "ByLocus"; }
|
||||||
|
|
||||||
// Do we actually want to operate on the context?
|
// Do we actually want to operate on the context?
|
||||||
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map over the org.broadinstitute.sting.atk.LocusContext
|
// Map over the org.broadinstitute.sting.atk.LocusContext
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||||
//System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size());
|
//System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size());
|
||||||
//for ( SAMRecord read : context.getReads() ) {
|
//for ( SAMRecord read : context.getReads() ) {
|
||||||
// System.out.println(" -> " + read.getReadName());
|
// System.out.println(" -> " + read.getReadName());
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import org.broadinstitute.sting.atk.LocusWalker;
|
import org.broadinstitute.sting.atk.LocusWalker;
|
||||||
import org.broadinstitute.sting.atk.LocusIterator;
|
import org.broadinstitute.sting.atk.LocusIterator;
|
||||||
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.utils.rodDbSNP;
|
import org.broadinstitute.sting.utils.rodDbSNP;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
@ -19,7 +20,7 @@ public class SingleSampleGenotyper implements LocusWalker<Integer, Integer> {
|
||||||
public String walkerType() { return "ByLocus"; }
|
public String walkerType() { return "ByLocus"; }
|
||||||
|
|
||||||
// Do we actually want to operate on the context?
|
// Do we actually want to operate on the context?
|
||||||
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -86,7 +87,7 @@ public class SingleSampleGenotyper implements LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map over the org.broadinstitute.sting.atk.LocusContext
|
// Map over the org.broadinstitute.sting.atk.LocusContext
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||||
//System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size());
|
//System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size());
|
||||||
//for ( SAMRecord read : context.getReads() ) {
|
//for ( SAMRecord read : context.getReads() ) {
|
||||||
// System.out.println(" -> " + read.getReadName());
|
// System.out.println(" -> " + read.getReadName());
|
||||||
|
|
|
||||||
|
|
@ -23,11 +23,19 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
||||||
// Ugly global variable defining the optional ordering of contig elements
|
// Ugly global variable defining the optional ordering of contig elements
|
||||||
//
|
//
|
||||||
public static HashMap<String, Integer> refContigOrdering = null;
|
public static HashMap<String, Integer> refContigOrdering = null;
|
||||||
|
public static HashMap<String, String> interns = null;
|
||||||
|
|
||||||
public static void setContigOrdering(HashMap<String, Integer> rco) {
|
public static void setContigOrdering(HashMap<String, Integer> rco) {
|
||||||
refContigOrdering = rco;
|
refContigOrdering = rco;
|
||||||
|
interns = new HashMap<String, String>();
|
||||||
|
for ( String contig : rco.keySet() )
|
||||||
|
interns.put( contig, contig );
|
||||||
}
|
}
|
||||||
|
|
||||||
public GenomeLoc( final String contig, final long start, final long stop ) {
|
public GenomeLoc( String contig, final long start, final long stop ) {
|
||||||
|
if ( interns != null )
|
||||||
|
contig = interns.get(contig);
|
||||||
|
|
||||||
this.contig = contig;
|
this.contig = contig;
|
||||||
this.start = start;
|
this.start = start;
|
||||||
this.stop = stop;
|
this.stop = stop;
|
||||||
|
|
@ -37,12 +45,16 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
||||||
this( contig, pos, pos );
|
this( contig, pos, pos );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public GenomeLoc( final GenomeLoc toCopy ) {
|
||||||
|
this( new String(toCopy.getContig()), toCopy.getStart(), toCopy.getStop() );
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// Parsing string representations
|
// Parsing string representations
|
||||||
//
|
//
|
||||||
private static long parsePosition( final String pos ) {
|
private static long parsePosition( final String pos ) {
|
||||||
String x = pos.replaceAll(",", "");
|
String x = pos.replaceAll(",", "");
|
||||||
return Long.parseLong(x);
|
return Long.parseLong(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static GenomeLoc parseGenomeLoc( final String str ) {
|
public static GenomeLoc parseGenomeLoc( final String str ) {
|
||||||
|
|
@ -57,7 +69,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
||||||
long start = 1;
|
long start = 1;
|
||||||
long stop = Integer.MAX_VALUE;
|
long stop = Integer.MAX_VALUE;
|
||||||
boolean bad = false;
|
boolean bad = false;
|
||||||
|
|
||||||
Matcher match1 = regex1.matcher(str);
|
Matcher match1 = regex1.matcher(str);
|
||||||
Matcher match2 = regex2.matcher(str);
|
Matcher match2 = regex2.matcher(str);
|
||||||
Matcher match3 = regex3.matcher(str);
|
Matcher match3 = regex3.matcher(str);
|
||||||
|
|
@ -133,7 +145,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
||||||
if ( that.start > this.stop ) return true; // that guy is past our start
|
if ( that.start > this.stop ) return true; // that guy is past our start
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public final boolean overlapsP(GenomeLoc that) {
|
public final boolean overlapsP(GenomeLoc that) {
|
||||||
return ! disjointP( that );
|
return ! disjointP( that );
|
||||||
}
|
}
|
||||||
|
|
@ -142,11 +154,41 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
||||||
return this.contig.equals(that.contig);
|
return this.contig.equals(that.contig);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public final int minus( final GenomeLoc that ) {
|
||||||
|
if ( this.getContig().equals(that.getContig()) )
|
||||||
|
return (int) (this.getStart() - that.getStart());
|
||||||
|
else
|
||||||
|
return Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
public final int distance( final GenomeLoc that ) {
|
||||||
|
return Math.abs(minus(that));
|
||||||
|
}
|
||||||
|
|
||||||
|
public final boolean isBetween( final GenomeLoc left, final GenomeLoc right ) {
|
||||||
|
return this.compareTo(left) > -1 && this.compareTo(right) < 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public final void incPos() {
|
||||||
|
incPos(1);
|
||||||
|
}
|
||||||
|
public final void incPos(long by) {
|
||||||
|
this.start += by;
|
||||||
|
this.stop += by;
|
||||||
|
}
|
||||||
|
|
||||||
|
public final GenomeLoc nextLoc() {
|
||||||
|
GenomeLoc n = new GenomeLoc(this);
|
||||||
|
n.incPos();
|
||||||
|
return n;
|
||||||
|
}
|
||||||
//
|
//
|
||||||
// Comparison operations
|
// Comparison operations
|
||||||
//
|
//
|
||||||
public static int compareContigs( final String thisContig, final String thatContig ) {
|
public static int compareContigs( final String thisContig, final String thatContig ) {
|
||||||
|
if ( thisContig == thatContig )
|
||||||
|
return 0;
|
||||||
|
|
||||||
if ( refContigOrdering != null ) {
|
if ( refContigOrdering != null ) {
|
||||||
if ( ! refContigOrdering.containsKey(thisContig) ) {
|
if ( ! refContigOrdering.containsKey(thisContig) ) {
|
||||||
if ( ! refContigOrdering.containsKey(thatContig) ) {
|
if ( ! refContigOrdering.containsKey(thatContig) ) {
|
||||||
|
|
@ -192,4 +234,4 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
||||||
if ( this.getStop() > that.getStop() ) return 1;
|
if ( this.getStop() > that.getStop() ) return 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -68,8 +68,15 @@ public class Utils {
|
||||||
return ret.toString();
|
return ret.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String join(String separator, Collection<String> strings) {
|
//public static String join(String separator, Collection<String> strings) {
|
||||||
return join( separator, strings.toArray(new String[0]) );
|
// return join( separator, strings.toArray(new String[0]) );
|
||||||
|
//}
|
||||||
|
|
||||||
|
public static <T> String join(String separator, Collection<T> objects) {
|
||||||
|
ArrayList<String> strs = new ArrayList<String>();
|
||||||
|
for ( Object x : objects )
|
||||||
|
strs.add(x.toString());
|
||||||
|
return join( separator, strs.toArray(new String[0]) );
|
||||||
}
|
}
|
||||||
|
|
||||||
public static double average(List<Long> vals, int maxI) {
|
public static double average(List<Long> vals, int maxI) {
|
||||||
|
|
@ -97,14 +104,16 @@ public class Utils {
|
||||||
List<SAMSequenceRecord> refContigs = refFile.getSequenceDictionary();
|
List<SAMSequenceRecord> refContigs = refFile.getSequenceDictionary();
|
||||||
HashMap<String, Integer> refContigOrdering = new HashMap<String, Integer>();
|
HashMap<String, Integer> refContigOrdering = new HashMap<String, Integer>();
|
||||||
|
|
||||||
int i = 0;
|
if ( refContigs != null ) {
|
||||||
System.out.printf("Prepared reference sequence contig dictionary%n order ->");
|
int i = 0;
|
||||||
for ( SAMSequenceRecord contig : refContigs ) {
|
System.out.printf("Prepared reference sequence contig dictionary%n order ->");
|
||||||
System.out.printf(" %s", contig.getSequenceName());
|
for ( SAMSequenceRecord contig : refContigs ) {
|
||||||
refContigOrdering.put(contig.getSequenceName(), i);
|
System.out.printf(" %s", contig.getSequenceName());
|
||||||
i++;
|
refContigOrdering.put(contig.getSequenceName(), i);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
System.out.printf("%n Total elements -> %d%n", refContigOrdering.size());
|
||||||
}
|
}
|
||||||
System.out.printf("%n Total elements -> %d%n", refContigOrdering.size());
|
|
||||||
|
|
||||||
GenomeLoc.setContigOrdering(refContigOrdering);
|
GenomeLoc.setContigOrdering(refContigOrdering);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue