Added basic GenomeLoc class
Support for operating on any location in genome (albeit slowly) Improvements to dbSNP support git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@19 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
63f8d82e36
commit
49a84c965e
Binary file not shown.
|
|
@ -41,6 +41,20 @@ public class SequenceUtil {
|
||||||
return new String(complement);
|
return new String(complement);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static char complementBase(char base) {
|
||||||
|
switch ( base ) {
|
||||||
|
case 'a' : return 't';
|
||||||
|
case 'A' : return 'T';
|
||||||
|
case 'c' : return 'g';
|
||||||
|
case 'C' : return 'G';
|
||||||
|
case 'g' : return 'c';
|
||||||
|
case 'G' : return 'C';
|
||||||
|
case 't' : return 'a';
|
||||||
|
case 'T' : return 'A';
|
||||||
|
default : return base;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Attempts to efficiently compare two bases stored as bytes for equality. */
|
/** Attempts to efficiently compare two bases stored as bytes for equality. */
|
||||||
public static boolean basesEqual(byte lhs, byte rhs) {
|
public static boolean basesEqual(byte lhs, byte rhs) {
|
||||||
if (lhs == rhs) return true;
|
if (lhs == rhs) return true;
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ public class AnalysisTK extends CommandLineProgram {
|
||||||
addModule("CountLoci", new CountLociWalker());
|
addModule("CountLoci", new CountLociWalker());
|
||||||
addModule("Pileup", new PileupWalker());
|
addModule("Pileup", new PileupWalker());
|
||||||
addModule("CountReads", new CountReadsWalker());
|
addModule("CountReads", new CountReadsWalker());
|
||||||
|
addModule("PrintReads", new PrintReadsWalker());
|
||||||
addModule("Base_Quality_Histogram", new BaseQualityHistoWalker());
|
addModule("Base_Quality_Histogram", new BaseQualityHistoWalker());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -69,6 +70,7 @@ public class AnalysisTK extends CommandLineProgram {
|
||||||
}
|
}
|
||||||
|
|
||||||
this.engine = new TraversalEngine(INPUT_FILE, REF_FILE_ARG, rods);
|
this.engine = new TraversalEngine(INPUT_FILE, REF_FILE_ARG, rods);
|
||||||
|
engine.initialize();
|
||||||
|
|
||||||
ValidationStringency strictness;
|
ValidationStringency strictness;
|
||||||
if ( STRICTNESS_ARG == null ) {
|
if ( STRICTNESS_ARG == null ) {
|
||||||
|
|
@ -89,8 +91,11 @@ public class AnalysisTK extends CommandLineProgram {
|
||||||
engine.setDebugging(! ( DEBUGGING_STR == null || DEBUGGING_STR.toLowerCase().equals("true")));
|
engine.setDebugging(! ( DEBUGGING_STR == null || DEBUGGING_STR.toLowerCase().equals("true")));
|
||||||
engine.setMaxReads(Integer.parseInt(MAX_READS_ARG));
|
engine.setMaxReads(Integer.parseInt(MAX_READS_ARG));
|
||||||
|
|
||||||
|
if ( REGION_STR != null ) {
|
||||||
|
engine.setLocation(REGION_STR);
|
||||||
|
}
|
||||||
|
|
||||||
//LocusWalker<Integer,Integer> walker = new PileupWalker();
|
//LocusWalker<Integer,Integer> walker = new PileupWalker();
|
||||||
engine.initialize();
|
|
||||||
try {
|
try {
|
||||||
LocusWalker<?, ?> walker = (LocusWalker<?, ?>)MODULES.get(Analysis_Name);
|
LocusWalker<?, ?> walker = (LocusWalker<?, ?>)MODULES.get(Analysis_Name);
|
||||||
engine.traverseByLoci(walker);
|
engine.traverseByLoci(walker);
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import edu.mit.broad.sam.SAMRecord;
|
||||||
import edu.mit.broad.sting.utils.PushbackIterator;
|
import edu.mit.broad.sting.utils.PushbackIterator;
|
||||||
import edu.mit.broad.sting.utils.Utils;
|
import edu.mit.broad.sting.utils.Utils;
|
||||||
import edu.mit.broad.sting.utils.Predicate;
|
import edu.mit.broad.sting.utils.Predicate;
|
||||||
|
import edu.mit.broad.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
@ -26,8 +27,10 @@ public class LocusIterator implements Iterable<LocusIterator>, CloseableIterator
|
||||||
private List<SAMRecord> reads = new ArrayList<SAMRecord>(100);
|
private List<SAMRecord> reads = new ArrayList<SAMRecord>(100);
|
||||||
private List<Integer> offsets = new ArrayList<Integer>(100);
|
private List<Integer> offsets = new ArrayList<Integer>(100);
|
||||||
|
|
||||||
public String getContig() { return contig; }
|
protected String getContig() { return contig; }
|
||||||
public int getPosition() { return position; }
|
protected long getPosition() { return position; }
|
||||||
|
public GenomeLoc getLocation() { return new GenomeLoc(contig, position); }
|
||||||
|
|
||||||
public List<SAMRecord> getReads() { return reads; }
|
public List<SAMRecord> getReads() { return reads; }
|
||||||
public List<Integer> getOffsets() { return offsets; }
|
public List<Integer> getOffsets() { return offsets; }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ public class PrepareROD extends CommandLineProgram {
|
||||||
refContigOrdering.put(contig.getSequenceName(), i);
|
refContigOrdering.put(contig.getSequenceName(), i);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
ReferenceOrderedDatum.setContigOrdering(refContigOrdering);
|
GenomeLoc.setContigOrdering(refContigOrdering);
|
||||||
|
|
||||||
Class rodClass = Types.get(ROD_TYPE.toLowerCase());
|
Class rodClass = Types.get(ROD_TYPE.toLowerCase());
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,16 +8,17 @@ import edu.mit.broad.picard.filter.SamRecordFilter;
|
||||||
import edu.mit.broad.picard.filter.FilteringIterator;
|
import edu.mit.broad.picard.filter.FilteringIterator;
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
|
import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
|
||||||
import edu.mit.broad.sting.utils.ReferenceIterator;
|
import edu.mit.broad.sting.utils.*;
|
||||||
import edu.mit.broad.sting.utils.ReferenceOrderedData;
|
|
||||||
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
|
||||||
import edu.mit.broad.sting.utils.Utils;
|
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.List;
|
import java.util.*;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.ArrayList;
|
import net.sf.functionalj.reflect.StdReflect;
|
||||||
import java.util.Arrays;
|
import net.sf.functionalj.reflect.JdkStdReflect;
|
||||||
|
import net.sf.functionalj.FunctionN;
|
||||||
|
import net.sf.functionalj.Function1;
|
||||||
|
import net.sf.functionalj.Functions;
|
||||||
|
import net.sf.functionalj.util.Operators;
|
||||||
|
|
||||||
public class TraversalEngine {
|
public class TraversalEngine {
|
||||||
// Usage and parameters
|
// Usage and parameters
|
||||||
|
|
@ -46,6 +47,8 @@ public class TraversalEngine {
|
||||||
|
|
||||||
public boolean DEBUGGING = false;
|
public boolean DEBUGGING = false;
|
||||||
|
|
||||||
|
private GenomeLoc[] locs = null;
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Setting up the engine
|
// Setting up the engine
|
||||||
|
|
@ -63,6 +66,54 @@ public class TraversalEngine {
|
||||||
public void setMaxReads( final int maxReads ) { this.maxReads = maxReads; }
|
public void setMaxReads( final int maxReads ) { this.maxReads = maxReads; }
|
||||||
public void setDebugging( final boolean d ) { DEBUGGING = d; }
|
public void setDebugging( final boolean d ) { DEBUGGING = d; }
|
||||||
|
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// functions for dealing locations (areas of the genome we're traversing over)
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
public void setLocation( final String locStr ) {
|
||||||
|
this.locs = parseGenomeLocs(locStr);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static GenomeLoc[] parseGenomeLocs( final String str ) {
|
||||||
|
// Of the form: loc1;loc2;...
|
||||||
|
// Where each locN can be:
|
||||||
|
// Ôchr2Õ, Ôchr2:1000000Õ or Ôchr2:1,000,000-2,000,000Õ
|
||||||
|
StdReflect reflect = new JdkStdReflect();
|
||||||
|
FunctionN<GenomeLoc> parseOne = reflect.staticFunction(GenomeLoc.class, "parseGenomeLoc", String.class);
|
||||||
|
Function1<GenomeLoc, String> f1 = parseOne.f1();
|
||||||
|
Collection<GenomeLoc> result = Functions.map(f1, Arrays.asList(str.split(";")));
|
||||||
|
GenomeLoc[] locs = (GenomeLoc[])result.toArray(new GenomeLoc[0]);
|
||||||
|
|
||||||
|
Arrays.sort(locs);
|
||||||
|
for ( GenomeLoc l : locs )
|
||||||
|
System.out.printf(" -> %s%n", l);
|
||||||
|
|
||||||
|
System.out.printf(" Locations are: %s%n", Utils.join(" ", Functions.map( Operators.toString, Arrays.asList(locs) ) ) );
|
||||||
|
|
||||||
|
return locs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean inLocations( GenomeLoc curr ) {
|
||||||
|
if ( this.locs == null )
|
||||||
|
return true;
|
||||||
|
else {
|
||||||
|
for ( GenomeLoc loc : this.locs ) {
|
||||||
|
//System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr));
|
||||||
|
if ( loc.overlapsP(curr) )
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean pastFinalLocation( GenomeLoc curr ) {
|
||||||
|
boolean r = locs != null && locs[locs.length-1].compareTo( curr ) == -1 && ! locs[locs.length-1].overlapsP(curr);
|
||||||
|
//System.out.printf(" pastFinalLocation %s vs. %s => %d => %b%n", locs[locs.length-1], curr, locs[locs.length-1].compareTo( curr ), r);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// functions for dealing with the reference sequence
|
// functions for dealing with the reference sequence
|
||||||
|
|
@ -127,17 +178,18 @@ public class TraversalEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<ReferenceOrderedDatum> getReferenceOrderedDataAtLocus(List<ReferenceOrderedData.RODIterator> rodIters,
|
protected List<ReferenceOrderedDatum> getReferenceOrderedDataAtLocus(List<ReferenceOrderedData.RODIterator> rodIters,
|
||||||
final String contig, final int pos) {
|
final GenomeLoc loc) {
|
||||||
List<ReferenceOrderedDatum> data = new ArrayList<ReferenceOrderedDatum>();
|
List<ReferenceOrderedDatum> data = new ArrayList<ReferenceOrderedDatum>();
|
||||||
for ( ReferenceOrderedData.RODIterator iter : rodIters ) {
|
for ( ReferenceOrderedData.RODIterator iter : rodIters ) {
|
||||||
data.add(iter.seekForward(contig, pos));
|
data.add(iter.seekForward(loc));
|
||||||
}
|
}
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// traversal functions
|
// traversal by loci functions
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
protected int initialize() {
|
protected int initialize() {
|
||||||
|
|
@ -197,30 +249,38 @@ public class TraversalEngine {
|
||||||
List<ReferenceOrderedData.RODIterator> rodIters = initializeRODs();
|
List<ReferenceOrderedData.RODIterator> rodIters = initializeRODs();
|
||||||
|
|
||||||
T sum = walker.reduceInit();
|
T sum = walker.reduceInit();
|
||||||
while ( iter.hasNext() ) {
|
boolean done = false;
|
||||||
|
while ( iter.hasNext() && ! done ) {
|
||||||
this.nRecords++;
|
this.nRecords++;
|
||||||
|
|
||||||
// actually get the read and hand it to the walker
|
// actually get the read and hand it to the walker
|
||||||
final LocusIterator locus = iter.next();
|
final LocusIterator locus = iter.next();
|
||||||
final ReferenceIterator refSite = refIter.seekForward(locus.getContig(), locus.getPosition());
|
|
||||||
final char refBase = refSite.getBaseAsChar();
|
|
||||||
final List<ReferenceOrderedDatum> rodData = getReferenceOrderedDataAtLocus(rodIters, locus.getContig(), locus.getPosition());
|
|
||||||
|
|
||||||
if ( DEBUGGING )
|
// Poor man's version of index LOL
|
||||||
System.out.printf(" Reference: %s:%d %c%n", refSite.getCurrentContig().getName(), refSite.getPosition(), refBase);
|
if ( inLocations(locus.getLocation()) ) {
|
||||||
|
final ReferenceIterator refSite = refIter.seekForward(locus.getContig(), locus.getPosition());
|
||||||
|
final char refBase = refSite.getBaseAsChar();
|
||||||
|
final List<ReferenceOrderedDatum> rodData = getReferenceOrderedDataAtLocus(rodIters, locus.getLocation());
|
||||||
|
|
||||||
final boolean keepMeP = walker.filter(rodData, refBase, locus);
|
if ( DEBUGGING )
|
||||||
if ( keepMeP ) {
|
System.out.printf(" Reference: %s:%d %c%n", refSite.getCurrentContig().getName(), refSite.getPosition(), refBase);
|
||||||
M x = walker.map(rodData, refBase, locus);
|
|
||||||
sum = walker.reduce(x, sum);
|
final boolean keepMeP = walker.filter(rodData, refBase, locus);
|
||||||
|
if ( keepMeP ) {
|
||||||
|
M x = walker.map(rodData, refBase, locus);
|
||||||
|
sum = walker.reduce(x, sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( this.maxReads > 0 && this.nRecords > this.maxReads ) {
|
||||||
|
System.out.println("Maximum number of reads encountered, terminating traversal " + this.nRecords);
|
||||||
|
done = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
printProgress("loci");
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( this.maxReads > 0 && this.nRecords > this.maxReads ) {
|
if ( pastFinalLocation(locus.getLocation()) )
|
||||||
System.out.println("Maximum number of reads encountered, terminating traversal " + this.nRecords);
|
done = true;
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
printProgress("loci");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
printProgress( true, "loci" );
|
printProgress( true, "loci" );
|
||||||
|
|
@ -234,28 +294,43 @@ public class TraversalEngine {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// traversal by read functions
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
protected <M,R> int traverseByRead(ReadWalker<M,R> walker) {
|
protected <M,R> int traverseByRead(ReadWalker<M,R> walker) {
|
||||||
walker.initialize();
|
walker.initialize();
|
||||||
CloseableIterator<SAMRecord> iter = readStream.iterator();
|
CloseableIterator<SAMRecord> iter = readStream.iterator();
|
||||||
R sum = walker.reduceInit();
|
R sum = walker.reduceInit();
|
||||||
while ( iter.hasNext() ) {
|
boolean done = false;
|
||||||
|
while ( iter.hasNext() && ! done ) {
|
||||||
this.nRecords++;
|
this.nRecords++;
|
||||||
|
|
||||||
// actually get the read and hand it to the walker
|
// actually get the read and hand it to the walker
|
||||||
final SAMRecord read = iter.next();
|
final SAMRecord read = iter.next();
|
||||||
final boolean keepMeP = walker.filter(null, read);
|
GenomeLoc loc = new GenomeLoc(read.getReferenceName(), read.getAlignmentStart());
|
||||||
if ( keepMeP ) {
|
|
||||||
M x = walker.map(null, read);
|
if ( inLocations(loc) ) {
|
||||||
sum = walker.reduce(x, sum);
|
final boolean keepMeP = walker.filter(null, read);
|
||||||
|
|
||||||
|
if ( keepMeP ) {
|
||||||
|
M x = walker.map(null, read);
|
||||||
|
sum = walker.reduce(x, sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( this.maxReads > 0 && this.nRecords > this.maxReads ) {
|
||||||
|
System.out.println("Maximum number of reads encountered, terminating traversal " + this.nRecords);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
printProgress("reads");
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( this.maxReads > 0 && this.nRecords > this.maxReads ) {
|
if ( pastFinalLocation(loc) )
|
||||||
System.out.println("Maximum number of reads encountered, terminating traversal " + this.nRecords);
|
done = true;
|
||||||
break;
|
//System.out.printf("Done? %b%n", done);
|
||||||
}
|
}
|
||||||
|
|
||||||
printProgress("reads");
|
|
||||||
}
|
|
||||||
|
|
||||||
printProgress( true, "reads" );
|
printProgress( true, "reads" );
|
||||||
System.out.println("Traversal reduce result is " + sum);
|
System.out.println("Traversal reduce result is " + sum);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
package edu.mit.broad.sting.atk.modules;
|
||||||
|
|
||||||
|
import edu.mit.broad.sting.atk.LocusWalker;
|
||||||
|
import edu.mit.broad.sting.atk.LocusIterator;
|
||||||
|
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
||||||
|
import edu.mit.broad.sam.SAMRecord;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mdepristo
|
||||||
|
* Date: Feb 22, 2009
|
||||||
|
* Time: 3:22:14 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public abstract class BasicLociWalker<MapType, ReduceType> implements LocusWalker<MapType, ReduceType> {
|
||||||
|
public void initialize() {
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String walkerType() { return "ByLocus"; }
|
||||||
|
|
||||||
|
// Do we actually want to operate on the context?
|
||||||
|
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
||||||
|
return true; // We are keeping all the reads
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraveralDone() {
|
||||||
|
}
|
||||||
|
|
||||||
|
// These three capabilities must be overidden
|
||||||
|
public abstract MapType map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context);
|
||||||
|
public abstract ReduceType reduceInit();
|
||||||
|
public abstract ReduceType reduce(MapType value, ReduceType sum);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
package edu.mit.broad.sting.atk.modules;
|
||||||
|
|
||||||
|
import edu.mit.broad.sam.SAMRecord;
|
||||||
|
import edu.mit.broad.sting.atk.LocusContext;
|
||||||
|
import edu.mit.broad.sting.atk.ReadWalker;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mdepristo
|
||||||
|
* Date: Feb 22, 2009
|
||||||
|
* Time: 2:52:28 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public abstract class BasicReadWalker<MapType, ReduceType> implements ReadWalker<MapType, ReduceType> {
|
||||||
|
public void initialize() { }
|
||||||
|
public String walkerType() { return "ByRead"; }
|
||||||
|
|
||||||
|
public boolean filter(LocusContext context, SAMRecord read) {
|
||||||
|
// We are keeping all the reads
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraveralDone() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Three basic abstract function that *must* be overridden
|
||||||
|
public abstract MapType map(LocusContext context, SAMRecord read);
|
||||||
|
public abstract ReduceType reduceInit();
|
||||||
|
public abstract ReduceType reduce(MapType value, ReduceType sum);
|
||||||
|
}
|
||||||
|
|
@ -1,9 +1,7 @@
|
||||||
package edu.mit.broad.sting.atk.modules;
|
package edu.mit.broad.sting.atk.modules;
|
||||||
|
|
||||||
import edu.mit.broad.sting.atk.LocusWalker;
|
|
||||||
import edu.mit.broad.sting.atk.LocusIterator;
|
import edu.mit.broad.sting.atk.LocusIterator;
|
||||||
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
||||||
import edu.mit.broad.sam.SAMRecord;
|
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -14,28 +12,14 @@ import java.util.List;
|
||||||
* Time: 3:22:14 PM
|
* Time: 3:22:14 PM
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public class CountLociWalker implements LocusWalker<Integer, Integer> {
|
public class CountLociWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
public void initialize() {
|
|
||||||
}
|
|
||||||
|
|
||||||
public String walkerType() { return "ByLocus"; }
|
|
||||||
|
|
||||||
// Do we actually want to operate on the context?
|
|
||||||
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
|
||||||
return true; // We are keeping all the reads
|
|
||||||
}
|
|
||||||
|
|
||||||
// Map over the edu.mit.broad.sting.atk.LocusContext
|
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Given result of map function
|
|
||||||
public Integer reduceInit() { return 0; }
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
return value + sum;
|
return value + sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraveralDone() {
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
@ -1,38 +1,16 @@
|
||||||
package edu.mit.broad.sting.atk.modules;
|
package edu.mit.broad.sting.atk.modules;
|
||||||
|
|
||||||
import edu.mit.broad.sam.SAMRecord;
|
import edu.mit.broad.sam.SAMRecord;
|
||||||
import edu.mit.broad.sting.atk.ReadWalker;
|
|
||||||
import edu.mit.broad.sting.atk.LocusContext;
|
import edu.mit.broad.sting.atk.LocusContext;
|
||||||
|
|
||||||
/**
|
public class CountReadsWalker extends BasicReadWalker<Integer, Integer> {
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: mdepristo
|
|
||||||
* Date: Feb 22, 2009
|
|
||||||
* Time: 3:22:14 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class CountReadsWalker implements ReadWalker<Integer, Integer> {
|
|
||||||
|
|
||||||
public void initialize() { }
|
|
||||||
|
|
||||||
public String walkerType() { return "ByRead"; }
|
|
||||||
|
|
||||||
// Do we actually want to operate on the context?
|
|
||||||
public boolean filter(LocusContext context, SAMRecord read) {
|
|
||||||
return true; // We are keeping all the reads
|
|
||||||
}
|
|
||||||
|
|
||||||
// Map over the edu.mit.broad.sting.atk.LocusContext
|
|
||||||
public Integer map(LocusContext context, SAMRecord read) {
|
public Integer map(LocusContext context, SAMRecord read) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Given result of map function
|
|
||||||
public Integer reduceInit() { return 0; }
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
return value + sum;
|
return value + sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraveralDone() {
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@ package edu.mit.broad.sting.atk.modules;
|
||||||
import edu.mit.broad.sting.atk.LocusWalker;
|
import edu.mit.broad.sting.atk.LocusWalker;
|
||||||
import edu.mit.broad.sting.atk.LocusIterator;
|
import edu.mit.broad.sting.atk.LocusIterator;
|
||||||
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
||||||
|
import edu.mit.broad.sting.utils.rodDbSNP;
|
||||||
|
import edu.mit.broad.sting.utils.Utils;
|
||||||
import edu.mit.broad.sam.SAMRecord;
|
import edu.mit.broad.sam.SAMRecord;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -53,14 +55,21 @@ public class PileupWalker implements LocusWalker<Integer, Integer> {
|
||||||
String rodString = "";
|
String rodString = "";
|
||||||
for ( ReferenceOrderedDatum datum : rodData ) {
|
for ( ReferenceOrderedDatum datum : rodData ) {
|
||||||
if ( datum != null ) {
|
if ( datum != null ) {
|
||||||
rodString += datum.toSimpleString();
|
if ( datum instanceof rodDbSNP) {
|
||||||
|
rodDbSNP dbsnp = (rodDbSNP)datum;
|
||||||
|
//System.out.printf(" DBSNP %s on %s => %s%n", dbsnp.toSimpleString(), dbsnp.strand, Utils.join("/", dbsnp.getAllelesFWD()));
|
||||||
|
rodString += dbsnp.toMediumString();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
rodString += datum.toSimpleString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ( rodString != "" )
|
if ( rodString != "" )
|
||||||
rodString = "[ROD: " + rodString + "]";
|
rodString = "[ROD: " + rodString + "]";
|
||||||
|
|
||||||
if ( context.getPosition() % 1 == 0 ) {
|
if ( context.getLocation().getStart() % 1 == 0 ) {
|
||||||
System.out.printf("%s:%d: %s %s %s %s%n", context.getContig(), context.getPosition(), ref, bases, quals, rodString);
|
System.out.printf("%s: %s %s %s %s%n", context.getLocation(), ref, bases, quals, rodString);
|
||||||
}
|
}
|
||||||
|
|
||||||
//for ( int offset : context.getOffsets() ) {
|
//for ( int offset : context.getOffsets() ) {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,17 @@
|
||||||
|
package edu.mit.broad.sting.atk.modules;
|
||||||
|
|
||||||
|
import edu.mit.broad.sam.SAMRecord;
|
||||||
|
import edu.mit.broad.sting.atk.LocusContext;
|
||||||
|
|
||||||
|
public class PrintReadsWalker extends BasicReadWalker<Integer, Integer> {
|
||||||
|
public Integer map(LocusContext context, SAMRecord read) {
|
||||||
|
System.out.println(read.format());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
return value + sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,195 @@
|
||||||
|
package edu.mit.broad.sting.utils;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mdepristo
|
||||||
|
* Date: Mar 2, 2009
|
||||||
|
* Time: 8:50:11 AM
|
||||||
|
*
|
||||||
|
* Genome location representation. It is *** 1 *** based
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class GenomeLoc implements Comparable<GenomeLoc> {
|
||||||
|
private String contig;
|
||||||
|
private long start;
|
||||||
|
private long stop;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Ugly global variable defining the optional ordering of contig elements
|
||||||
|
//
|
||||||
|
public static HashMap<String, Integer> refContigOrdering = null;
|
||||||
|
public static void setContigOrdering(HashMap<String, Integer> rco) {
|
||||||
|
refContigOrdering = rco;
|
||||||
|
}
|
||||||
|
|
||||||
|
public GenomeLoc( final String contig, final long start, final long stop ) {
|
||||||
|
this.contig = contig;
|
||||||
|
this.start = start;
|
||||||
|
this.stop = stop;
|
||||||
|
}
|
||||||
|
|
||||||
|
public GenomeLoc( final String contig, final long pos ) {
|
||||||
|
this( contig, pos, pos );
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Parsing string representations
|
||||||
|
//
|
||||||
|
private static long parsePosition( final String pos ) {
|
||||||
|
String x = pos.replaceAll(",", "");
|
||||||
|
return Long.parseLong(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static GenomeLoc parseGenomeLoc( final String str ) {
|
||||||
|
// Ôchr2Õ, Ôchr2:1000000Õ or Ôchr2:1,000,000-2,000,000Õ
|
||||||
|
System.out.printf("Parsing location '%s'%n", str);
|
||||||
|
|
||||||
|
final Pattern regex1 = Pattern.compile("([\\w&&[^:]]+)$"); // matches case 1
|
||||||
|
final Pattern regex2 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)$"); // matches case 2
|
||||||
|
final Pattern regex3 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)-([\\d,]+)$");// matches case 3
|
||||||
|
|
||||||
|
String contig = null;
|
||||||
|
long start = 1;
|
||||||
|
long stop = Integer.MAX_VALUE;
|
||||||
|
boolean bad = false;
|
||||||
|
|
||||||
|
Matcher match1 = regex1.matcher(str);
|
||||||
|
Matcher match2 = regex2.matcher(str);
|
||||||
|
Matcher match3 = regex3.matcher(str);
|
||||||
|
|
||||||
|
try {
|
||||||
|
if ( match1.matches() ) {
|
||||||
|
contig = match1.group(1);
|
||||||
|
}
|
||||||
|
else if ( match2.matches() ) {
|
||||||
|
contig = match2.group(1);
|
||||||
|
start = parsePosition(match2.group(2));
|
||||||
|
}
|
||||||
|
else if ( match3.matches() ) {
|
||||||
|
contig = match3.group(1);
|
||||||
|
start = parsePosition(match3.group(2));
|
||||||
|
stop = parsePosition(match3.group(3));
|
||||||
|
|
||||||
|
if ( start > stop )
|
||||||
|
bad = true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
bad = true;
|
||||||
|
}
|
||||||
|
} catch ( Exception e ) {
|
||||||
|
bad = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( bad ) {
|
||||||
|
throw new RuntimeException("Invalid Genome Location string: " + str);
|
||||||
|
}
|
||||||
|
|
||||||
|
GenomeLoc loc = new GenomeLoc(contig, start, stop);
|
||||||
|
System.out.printf(" => Parsed location '%s' into %s%n", str, loc);
|
||||||
|
|
||||||
|
return loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Accessors and setters
|
||||||
|
//
|
||||||
|
public final String getContig() { return this.contig; }
|
||||||
|
public final long getStart() { return this.start; }
|
||||||
|
public final long getStop() { return this.stop; }
|
||||||
|
public final String toString() {
|
||||||
|
if ( throughEndOfContigP() && atBeginningOfContigP() )
|
||||||
|
return getContig();
|
||||||
|
else if ( throughEndOfContigP() || getStart() == getStop() )
|
||||||
|
return String.format("%s:%d", getContig(), getStart());
|
||||||
|
else
|
||||||
|
return String.format("%s:%d-%d", getContig(), getStart(), getStop());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public final boolean throughEndOfContigP() { return this.stop == Integer.MAX_VALUE; }
|
||||||
|
public final boolean atBeginningOfContigP() { return this.start == 1; }
|
||||||
|
|
||||||
|
public void setContig(String contig) {
|
||||||
|
this.contig = contig;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStart(long start) {
|
||||||
|
this.start = start;
|
||||||
|
}
|
||||||
|
public void setStop(long stop) {
|
||||||
|
this.stop = stop;
|
||||||
|
}
|
||||||
|
|
||||||
|
public final boolean isSingleBP() { return stop == start; }
|
||||||
|
|
||||||
|
public final boolean disjointP(GenomeLoc that) {
|
||||||
|
if ( compareContigs(this.contig, that.contig) != 0 ) return true; // different chromosomes
|
||||||
|
if ( this.start > that.stop ) return true; // this guy is past that
|
||||||
|
if ( that.start > this.stop ) return true; // that guy is past our start
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public final boolean overlapsP(GenomeLoc that) {
|
||||||
|
return ! disjointP( that );
|
||||||
|
}
|
||||||
|
|
||||||
|
public final boolean onSameContig(GenomeLoc that) {
|
||||||
|
return this.contig.equals(that.contig);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// Comparison operations
|
||||||
|
//
|
||||||
|
public static int compareContigs( final String thisContig, final String thatContig ) {
|
||||||
|
if ( refContigOrdering != null ) {
|
||||||
|
if ( ! refContigOrdering.containsKey(thisContig) ) {
|
||||||
|
if ( ! refContigOrdering.containsKey(thatContig) ) {
|
||||||
|
// Use regular sorted order
|
||||||
|
return thisContig.compareTo(thatContig);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// this is always bigger if that is in the key set
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ( ! refContigOrdering.containsKey(thatContig) )
|
||||||
|
return -1;
|
||||||
|
else {
|
||||||
|
assert refContigOrdering.containsKey(thisContig);// : this;
|
||||||
|
assert refContigOrdering.containsKey(thatContig);// : that;
|
||||||
|
|
||||||
|
final int thisO = refContigOrdering.get(thisContig);
|
||||||
|
final int thatO = refContigOrdering.get(thatContig);
|
||||||
|
if ( thisO < thatO ) return -1;
|
||||||
|
if ( thisO > thatO ) return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return thisContig.compareTo(thatContig);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int compareContigs( GenomeLoc that ) {
|
||||||
|
return compareContigs( this.contig, that.contig );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public int compareTo( GenomeLoc that ) {
|
||||||
|
if ( this == that ) return 0;
|
||||||
|
|
||||||
|
final int cmpContig = compareContigs( this.getContig(), that.getContig() );
|
||||||
|
if ( cmpContig != 0 ) return cmpContig;
|
||||||
|
if ( this.getStart() < that.getStart() ) return -1;
|
||||||
|
if ( this.getStart() > that.getStart() ) return 1;
|
||||||
|
if ( this.getStop() < that.getStop() ) return -1;
|
||||||
|
if ( this.getStop() > that.getStop() ) return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -21,7 +21,7 @@ public class ReferenceIterator implements Iterator<ReferenceIterator> {
|
||||||
|
|
||||||
private ReferenceSequence currentContig = null;
|
private ReferenceSequence currentContig = null;
|
||||||
private ReferenceSequence nextContig = null;
|
private ReferenceSequence nextContig = null;
|
||||||
private int offset = -1;
|
private long offset = -1;
|
||||||
|
|
||||||
public ReferenceIterator( ReferenceSequenceFile refFile ) {
|
public ReferenceIterator( ReferenceSequenceFile refFile ) {
|
||||||
this.refFile = refFile;
|
this.refFile = refFile;
|
||||||
|
|
@ -32,11 +32,11 @@ public class ReferenceIterator implements Iterator<ReferenceIterator> {
|
||||||
// Accessing data
|
// Accessing data
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
public byte getBaseAsByte() { return currentContig.getBases()[offset]; }
|
public byte getBaseAsByte() { return currentContig.getBases()[(int)offset]; }
|
||||||
public String getBaseAsString() { return StringUtil.bytesToString(currentContig.getBases(), offset, 1); }
|
public String getBaseAsString() { return StringUtil.bytesToString(currentContig.getBases(), (int)offset, 1); }
|
||||||
public char getBaseAsChar() { return getBaseAsString().charAt(0); }
|
public char getBaseAsChar() { return getBaseAsString().charAt(0); }
|
||||||
public ReferenceSequence getCurrentContig() { return currentContig; }
|
public ReferenceSequence getCurrentContig() { return currentContig; }
|
||||||
public int getPosition() { return offset + 1; }
|
public long getPosition() { return offset + 1; }
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -91,11 +91,15 @@ public class ReferenceIterator implements Iterator<ReferenceIterator> {
|
||||||
// Jumping forward
|
// Jumping forward
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
public ReferenceIterator seekForward(final String contigName, final int pos) {
|
public ReferenceIterator seekForward(final GenomeLoc loc) {
|
||||||
|
return seekForwardOffset(loc.getContig(), loc.getStart() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ReferenceIterator seekForward(final String contigName, final long pos) {
|
||||||
return seekForwardOffset(contigName, pos - 1);
|
return seekForwardOffset(contigName, pos - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
private ReferenceIterator seekForwardOffset(final String contigName, final int seekOffset) {
|
private ReferenceIterator seekForwardOffset(final String contigName, final long seekOffset) {
|
||||||
// jumps us forward in the sequence to the contig / pos
|
// jumps us forward in the sequence to the contig / pos
|
||||||
if ( currentContig == null )
|
if ( currentContig == null )
|
||||||
next();
|
next();
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
||||||
public void testMe() {
|
public void testMe() {
|
||||||
ReferenceOrderedDatum last = null;
|
ReferenceOrderedDatum last = null;
|
||||||
for ( ReferenceOrderedDatum rec : this ) {
|
for ( ReferenceOrderedDatum rec : this ) {
|
||||||
if ( last == null || ! last.getContig().equals(rec.getContig()) ) {
|
if ( last == null || ! last.getLocation().onSameContig(rec.getLocation()) ) {
|
||||||
System.out.println(rec.toString());
|
System.out.println(rec.toString());
|
||||||
}
|
}
|
||||||
last = rec;
|
last = rec;
|
||||||
|
|
@ -137,7 +137,11 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
||||||
// If we don't find anything and cross beyond contig / pos, we return null
|
// If we don't find anything and cross beyond contig / pos, we return null
|
||||||
// Otherwise we return the first object who's start is at pos
|
// Otherwise we return the first object who's start is at pos
|
||||||
//
|
//
|
||||||
public ROD seekForward(final String contigName, final int pos) {
|
public ROD seekForward(final GenomeLoc loc) {
|
||||||
|
return seekForward(loc.getContig(), loc.getStart());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected ROD seekForward(final String contigName, final long pos) {
|
||||||
final boolean DEBUG = false;
|
final boolean DEBUG = false;
|
||||||
|
|
||||||
ROD result = null;
|
ROD result = null;
|
||||||
|
|
@ -146,7 +150,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
||||||
while ( hasNext() ) {
|
while ( hasNext() ) {
|
||||||
ROD current = next();
|
ROD current = next();
|
||||||
//System.out.printf(" -> Seeking to %s %d AT %s %d%n", contigName, pos, current.getContig(), current.getStart());
|
//System.out.printf(" -> Seeking to %s %d AT %s %d%n", contigName, pos, current.getContig(), current.getStart());
|
||||||
int strCmp = ReferenceOrderedDatum.compareContigs( contigName, prev.getContig() );// contigName.compareTo( prev.getContig() );
|
int strCmp = GenomeLoc.compareContigs( contigName, prev.getContig() );// contigName.compareTo( prev.getContig() );
|
||||||
if ( strCmp == 0 ) {
|
if ( strCmp == 0 ) {
|
||||||
// The contigs are equal
|
// The contigs are equal
|
||||||
if ( current.getStart() > pos ) {
|
if ( current.getStart() > pos ) {
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,5 @@
|
||||||
package edu.mit.broad.sting.utils;
|
package edu.mit.broad.sting.utils;
|
||||||
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
//
|
|
||||||
// Ugly global variable defining the optional ordering of contig elements
|
|
||||||
//
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: mdepristo
|
* User: mdepristo
|
||||||
|
|
@ -13,11 +7,7 @@ import java.util.HashMap;
|
||||||
* Time: 10:49:47 AM
|
* Time: 10:49:47 AM
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public abstract class ReferenceOrderedDatum implements Comparable {
|
public abstract class ReferenceOrderedDatum implements Comparable<ReferenceOrderedDatum> {
|
||||||
public static HashMap<String, Integer> refContigOrdering = null;
|
|
||||||
public static void setContigOrdering(HashMap<String, Integer> rco) {
|
|
||||||
refContigOrdering = rco;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ReferenceOrderedDatum() { }
|
public ReferenceOrderedDatum() { }
|
||||||
|
|
||||||
|
|
@ -27,51 +17,12 @@ public abstract class ReferenceOrderedDatum implements Comparable {
|
||||||
public abstract String toSimpleString();
|
public abstract String toSimpleString();
|
||||||
public abstract String repl();
|
public abstract String repl();
|
||||||
|
|
||||||
public abstract String getContig();
|
public abstract GenomeLoc getLocation();
|
||||||
public abstract long getStart();
|
public int compareTo( ReferenceOrderedDatum that ) {
|
||||||
public abstract long getStop();
|
return getLocation().compareTo(that.getLocation());
|
||||||
|
|
||||||
public static int compareContigs( final String thisContig, final String thatContig ) {
|
|
||||||
if ( refContigOrdering != null ) {
|
|
||||||
if ( ! refContigOrdering.containsKey(thisContig) ) {
|
|
||||||
if ( ! refContigOrdering.containsKey(thatContig) ) {
|
|
||||||
// Use regular sorted order
|
|
||||||
return thisContig.compareTo(thatContig);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// this is always bigger if that is in the key set
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if ( ! refContigOrdering.containsKey(thatContig) )
|
|
||||||
return -1;
|
|
||||||
else {
|
|
||||||
assert refContigOrdering.containsKey(thisContig);// : this;
|
|
||||||
assert refContigOrdering.containsKey(thatContig);// : that;
|
|
||||||
|
|
||||||
final int thisO = refContigOrdering.get(thisContig);
|
|
||||||
final int thatO = refContigOrdering.get(thatContig);
|
|
||||||
if ( thisO < thatO ) return -1;
|
|
||||||
if ( thisO > thatO ) return 1;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return thisContig.compareTo(thatContig);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int compareTo( Object x ) {
|
public final String getContig() { return getLocation().getContig(); }
|
||||||
if ( this == x ) return 0;
|
public final long getStart() { return getLocation().getStart(); }
|
||||||
|
public final long getStop() { return getLocation().getStop(); }
|
||||||
ReferenceOrderedDatum that = (ReferenceOrderedDatum)x;
|
|
||||||
|
|
||||||
final int cmpContig = compareContigs( this.getContig(), that.getContig() );
|
|
||||||
if ( cmpContig != 0 ) return cmpContig;
|
|
||||||
if ( this.getStart() < that.getStart() ) return -1;
|
|
||||||
if ( this.getStart() > that.getStart() ) return 1;
|
|
||||||
if ( this.getStop() < that.getStop() ) return -1;
|
|
||||||
if ( this.getStop() > that.getStop() ) return 1;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -68,6 +68,10 @@ public class Utils {
|
||||||
return ret.toString();
|
return ret.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String join(String separator, Collection<String> strings) {
|
||||||
|
return join( separator, strings.toArray(new String[0]) );
|
||||||
|
}
|
||||||
|
|
||||||
public static void setupRefContigOrdering(final ReferenceSequenceFile refFile) {
|
public static void setupRefContigOrdering(final ReferenceSequenceFile refFile) {
|
||||||
List<SAMSequenceRecord> refContigs = refFile.getSequenceDictionary();
|
List<SAMSequenceRecord> refContigs = refFile.getSequenceDictionary();
|
||||||
HashMap<String, Integer> refContigOrdering = new HashMap<String, Integer>();
|
HashMap<String, Integer> refContigOrdering = new HashMap<String, Integer>();
|
||||||
|
|
@ -81,6 +85,6 @@ public class Utils {
|
||||||
}
|
}
|
||||||
System.out.printf("%n Total elements -> %d%n", refContigOrdering.size());
|
System.out.printf("%n Total elements -> %d%n", refContigOrdering.size());
|
||||||
|
|
||||||
ReferenceOrderedDatum.setContigOrdering(refContigOrdering);
|
GenomeLoc.setContigOrdering(refContigOrdering);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,13 +3,13 @@ package edu.mit.broad.sting.utils;
|
||||||
import edu.mit.broad.sam.SAMRecord;
|
import edu.mit.broad.sam.SAMRecord;
|
||||||
import edu.mit.broad.sam.util.CloseableIterator;
|
import edu.mit.broad.sam.util.CloseableIterator;
|
||||||
import edu.mit.broad.picard.util.TabbedTextFileParser;
|
import edu.mit.broad.picard.util.TabbedTextFileParser;
|
||||||
|
import edu.mit.broad.picard.util.SequenceUtil;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.BufferedInputStream;
|
import java.io.BufferedInputStream;
|
||||||
import java.util.Iterator;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Example format:
|
* Example format:
|
||||||
|
|
@ -22,15 +22,18 @@ import java.util.HashMap;
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public class rodDbSNP extends ReferenceOrderedDatum {
|
public class rodDbSNP extends ReferenceOrderedDatum {
|
||||||
public String contig; // Reference sequence chromosome or scaffold
|
public GenomeLoc loc; // genome location of SNP
|
||||||
public long start, stop; // Start and stop positions in chrom
|
// Reference sequence chromosome or scaffold
|
||||||
|
// Start and stop positions in chrom
|
||||||
|
|
||||||
public String name; // Reference SNP identifier or Affy SNP name
|
public String name; // Reference SNP identifier or Affy SNP name
|
||||||
public String strand; // Which DNA strand contains the observed alleles
|
public String strand; // Which DNA strand contains the observed alleles
|
||||||
public String observed; // The sequences of the observed alleles from rs-fasta files
|
|
||||||
public char[] observedBases; // The sequences of the observed alleles from rs-fasta files
|
public String refBases; // the reference base according to NCBI, in the dbSNP file
|
||||||
public String molType; // Sample type from exemplar ss
|
public String observed; // The sequences of the observed alleles from rs-fasta files
|
||||||
public String varType; // The class of variant (simple, insertion, deletion, range, etc.)
|
|
||||||
|
public String molType; // Sample type from exemplar ss
|
||||||
|
public String varType; // The class of variant (simple, insertion, deletion, range, etc.)
|
||||||
// Can be 'unknown','single','in-del','het','microsatellite','named','mixed','mnp','insertion','deletion'
|
// Can be 'unknown','single','in-del','het','microsatellite','named','mixed','mnp','insertion','deletion'
|
||||||
public String validationStatus; // The validation status of the SNP
|
public String validationStatus; // The validation status of the SNP
|
||||||
// one of set('unknown','by-cluster','by-frequency','by-submitter','by-2hit-2allele','by-hapmap')
|
// one of set('unknown','by-cluster','by-frequency','by-submitter','by-2hit-2allele','by-hapmap')
|
||||||
|
|
@ -53,9 +56,52 @@ public class rodDbSNP extends ReferenceOrderedDatum {
|
||||||
// ----------------------------------------------------------------------
|
// ----------------------------------------------------------------------
|
||||||
public rodDbSNP() {}
|
public rodDbSNP() {}
|
||||||
|
|
||||||
public String getContig() { return this.contig; }
|
// ----------------------------------------------------------------------
|
||||||
public long getStart() { return start; }
|
//
|
||||||
public long getStop() { return stop; }
|
// manipulating the SNP information
|
||||||
|
//
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
public GenomeLoc getLocation() { return loc; }
|
||||||
|
|
||||||
|
public boolean onFwdStrand() {
|
||||||
|
return strand.equals("+");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the reference bases on the forward strand
|
||||||
|
public String getRefBasesFWD() {
|
||||||
|
if ( onFwdStrand() )
|
||||||
|
return refBases;
|
||||||
|
else
|
||||||
|
return SequenceUtil.reverseComplement(refBases);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getAllelesFWD() {
|
||||||
|
List<String> alleles = null;
|
||||||
|
if ( onFwdStrand() )
|
||||||
|
alleles = Arrays.asList(observed.split("/"));
|
||||||
|
else
|
||||||
|
alleles = Arrays.asList(SequenceUtil.reverseComplement(observed).split("/"));
|
||||||
|
|
||||||
|
//System.out.printf("getAlleles %s on %s %b => %s %n", observed, strand, onFwdStrand(), Utils.join("/", alleles));
|
||||||
|
return alleles;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAllelesFWDString() {
|
||||||
|
return Utils.join("/", getAllelesFWD());
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// What kind of variant are we?
|
||||||
|
//
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
public boolean isSNP() { return varType.contains("single"); }
|
||||||
|
public boolean isInsertion() { return varType.contains("insertion"); }
|
||||||
|
public boolean isDeletion() { return varType.contains("deletion"); }
|
||||||
|
public boolean isIndel() { return varType.contains("in-del"); }
|
||||||
|
|
||||||
|
public boolean isHapmap() { return validationStatus.contains("by-hapmap"); }
|
||||||
|
public boolean is2Hit2Allele() { return validationStatus.contains("by-2hit-2allele"); }
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
// ----------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -63,28 +109,39 @@ public class rodDbSNP extends ReferenceOrderedDatum {
|
||||||
//
|
//
|
||||||
// ----------------------------------------------------------------------
|
// ----------------------------------------------------------------------
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("%s\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d",
|
return String.format("%s\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d",
|
||||||
contig, start, stop, name, strand, observed, molType,
|
getContig(), getStart(), getStop(), name, strand, refBases, observed, molType,
|
||||||
varType, validationStatus, avHet, avHetSE, func, locType, weight );
|
varType, validationStatus, avHet, avHetSE, func, locType, weight );
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toSimpleString() {
|
public String toSimpleString() {
|
||||||
return String.format("%s:%s", name, observed);
|
return String.format("%s:%s:%s", name, observed, strand);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toMediumString() {
|
||||||
|
String s = String.format("%s:%s:%s", getLocation().toString(), name, getAllelesFWDString());
|
||||||
|
if ( isSNP() ) s += ":SNP";
|
||||||
|
if ( isIndel() ) s += ":Indel";
|
||||||
|
if ( isHapmap() ) s += ":Hapmap";
|
||||||
|
if ( is2Hit2Allele() ) s += ":2Hit";
|
||||||
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String repl() {
|
public String repl() {
|
||||||
return String.format("%d\t%s\t%d\t%d\t%s\t0\t%s\tX\tX\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d",
|
return String.format("%d\t%s\t%d\t%d\t%s\t0\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d",
|
||||||
585, contig, start-1, stop-1, name, strand, observed, molType,
|
585, getContig(), getStart()-1, getStop()-1, name, strand, refBases, refBases, observed, molType,
|
||||||
varType, validationStatus, avHet, avHetSE, func, locType, weight );
|
varType, validationStatus, avHet, avHetSE, func, locType, weight );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void parseLine(final String[] parts) {
|
public void parseLine(final String[] parts) {
|
||||||
try {
|
try {
|
||||||
contig = parts[1];
|
String contig = parts[1];
|
||||||
start = Long.parseLong(parts[2]) + 1; // The final is 0 based
|
long start = Long.parseLong(parts[2]) + 1; // The final is 0 based
|
||||||
|
long stop = Long.parseLong(parts[3]) + 1; // The final is 0 based
|
||||||
|
loc = new GenomeLoc(contig, start, stop);
|
||||||
|
|
||||||
stop = Long.parseLong(parts[3]) + 1; // The final is 0 based
|
|
||||||
name = parts[4];
|
name = parts[4];
|
||||||
|
refBases = parts[5];
|
||||||
strand = parts[6];
|
strand = parts[6];
|
||||||
observed = parts[9];
|
observed = parts[9];
|
||||||
molType = parts[10];
|
molType = parts[10];
|
||||||
|
|
@ -95,15 +152,6 @@ public class rodDbSNP extends ReferenceOrderedDatum {
|
||||||
func = parts[15];
|
func = parts[15];
|
||||||
locType = parts[16];
|
locType = parts[16];
|
||||||
weight = Integer.parseInt(parts[17]);
|
weight = Integer.parseInt(parts[17]);
|
||||||
|
|
||||||
// Cut up the observed bases string into an array of individual bases
|
|
||||||
String[] bases = observed.split("/");
|
|
||||||
observedBases = new char[bases.length];
|
|
||||||
for ( String elt : bases ) {
|
|
||||||
observedBases[0] = (char)elt.getBytes()[0];
|
|
||||||
//System.out.printf(" Bases %s %d %c%n", elt, elt.getBytes()[0], (char)elt.getBytes()[0]);
|
|
||||||
}
|
|
||||||
//System.out.printf(" => Observed bases are %s%n", Utils.join(" B ", bases));
|
|
||||||
} catch ( RuntimeException e ) {
|
} catch ( RuntimeException e ) {
|
||||||
System.out.printf(" Exception caught during parsing GFFLine %s%n", Utils.join(" <=> ", parts));
|
System.out.printf(" Exception caught during parsing GFFLine %s%n", Utils.join(" <=> ", parts));
|
||||||
throw e;
|
throw e;
|
||||||
|
|
|
||||||
|
|
@ -53,10 +53,6 @@ public class rodGFF extends ReferenceOrderedDatum {
|
||||||
// Accessors
|
// Accessors
|
||||||
//
|
//
|
||||||
// ----------------------------------------------------------------------
|
// ----------------------------------------------------------------------
|
||||||
public String getContig() {
|
|
||||||
return this.contig;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getSource() {
|
public String getSource() {
|
||||||
return source;
|
return source;
|
||||||
}
|
}
|
||||||
|
|
@ -73,18 +69,14 @@ public class rodGFF extends ReferenceOrderedDatum {
|
||||||
return frame;
|
return frame;
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getStart() {
|
|
||||||
return start;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getStop() {
|
|
||||||
return stop;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getScore() {
|
public double getScore() {
|
||||||
return score;
|
return score;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public GenomeLoc getLocation() {
|
||||||
|
return new GenomeLoc(contig, start, stop);
|
||||||
|
}
|
||||||
|
|
||||||
public String getAttribute(final String key) {
|
public String getAttribute(final String key) {
|
||||||
return attributes.get(key);
|
return attributes.get(key);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
java -Xmx1024m -cp out/production/AnalysisTK:../../jars/broad.jar edu.mit.broad.sting.atk.AnalysisTK $*
|
java -Xmx1024m -cp out/production/AnalysisTK:trunk/java/jars/functionalj.jar edu.mit.broad.sting.atk.AnalysisTK $*
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
java -Xmx1024m -agentlib:hprof=cpu=samples -cp out/production/AnalysisTK:../../jars/broad.jar edu.mit.broad.sting.atk.AnalysisTK $*
|
java -Xmx1024m -agentlib:hprof=cpu=samples -cp out/production/AnalysisTK:trunk/java/jars/functionalj.jar edu.mit.broad.sting.atk.AnalysisTK $*
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue