Renaming of ATK to GATK, the genome analysis TK.
Also added several more layers of error checking git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@54 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c9cb7a3596
commit
24ae381c97
|
|
@ -1,7 +1,8 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.atk.ReadWalker;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -27,7 +27,7 @@ public class BaseQualityHistoWalker implements ReadWalker<Integer, Integer> {
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map over the org.broadinstitute.sting.gatk.LocusContext
|
// Map over the org.broadinstitute.sting.atk.LocusContext
|
||||||
public Integer map(LocusContext context, SAMRecord read) {
|
public Integer map(LocusContext context, SAMRecord read) {
|
||||||
for ( byte qual : read.getBaseQualities() ) {
|
for ( byte qual : read.getBaseQualities() ) {
|
||||||
//System.out.println(qual);
|
//System.out.println(qual);
|
||||||
|
|
@ -1,8 +1,10 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.atk.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -25,8 +27,7 @@ public abstract class BasicLociWalker<MapType, ReduceType> implements LocusWalke
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraversalDone() {
|
public void onTraveralDone() {
|
||||||
;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// These three capabilities must be overidden
|
// These three capabilities must be overidden
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.atk.ReadWalker;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -20,8 +20,8 @@ public abstract class BasicReadWalker<MapType, ReduceType> implements ReadWalker
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraversalDone() {
|
public void onTraveralDone() {
|
||||||
;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Three basic abstract function that *must* be overridden
|
// Three basic abstract function that *must* be overridden
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
|
|
||||||
public class CountReadsWalker extends BasicReadWalker<Integer, Integer> {
|
public class CountReadsWalker extends BasicReadWalker<Integer, Integer> {
|
||||||
public Integer map(LocusContext context, SAMRecord read) {
|
public Integer map(LocusContext context, SAMRecord read) {
|
||||||
|
|
@ -1,10 +1,12 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: mdepristo
|
* User: mdepristo
|
||||||
|
|
@ -1,8 +1,12 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.atk.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
|
import org.broadinstitute.sting.utils.rodDbSNP;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -20,7 +24,7 @@ public class NullWalker implements LocusWalker<Integer, Integer> {
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map over the org.broadinstitute.sting.gatk.LocusContext
|
// Map over the org.broadinstitute.sting.atk.LocusContext
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context)
|
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context)
|
||||||
{
|
{
|
||||||
return 1;
|
return 1;
|
||||||
|
|
@ -36,6 +40,6 @@ public class NullWalker implements LocusWalker<Integer, Integer> {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraversalDone() {
|
public void onTraveralDone() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,8 +1,11 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.atk.LocusIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
|
import org.broadinstitute.sting.utils.rodDbSNP;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -14,7 +17,7 @@ import java.util.List;
|
||||||
* Time: 3:22:14 PM
|
* Time: 3:22:14 PM
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public class PileupWalker extends BasicLociWalker<Integer, Integer> {
|
public class PileupWalker implements LocusWalker<Integer, Integer> {
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -25,7 +28,7 @@ public class PileupWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map over the org.broadinstitute.sting.gatk.LocusContext
|
// Map over the org.broadinstitute.sting.atk.LocusContext
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||||
//System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size());
|
//System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size());
|
||||||
//for ( SAMRecord read : context.getReads() ) {
|
//for ( SAMRecord read : context.getReads() ) {
|
||||||
|
|
@ -81,4 +84,7 @@ public class PileupWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
return value + sum;
|
return value + sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void onTraveralDone() {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
|
|
||||||
public class PrintReadsWalker extends BasicReadWalker<Integer, Integer> {
|
public class PrintReadsWalker extends BasicReadWalker<Integer, Integer> {
|
||||||
public Integer map(LocusContext context, SAMRecord read) {
|
public Integer map(LocusContext context, SAMRecord read) {
|
||||||
|
|
@ -1,8 +1,10 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.atk.LocusIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
|
import org.broadinstitute.sting.utils.rodDbSNP;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
|
@ -11,7 +13,13 @@ import java.util.List;
|
||||||
// Draft single sample genotyper
|
// Draft single sample genotyper
|
||||||
// j.maguire 3-7-2009
|
// j.maguire 3-7-2009
|
||||||
|
|
||||||
public class SingleSampleGenotyper extends BasicLociWalker<Integer, Integer> {
|
public class SingleSampleGenotyper implements LocusWalker<Integer, Integer> {
|
||||||
|
public void initialize() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public String walkerType() { return "ByLocus"; }
|
||||||
|
|
||||||
|
// Do we actually want to operate on the context?
|
||||||
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
@ -78,7 +86,7 @@ public class SingleSampleGenotyper extends BasicLociWalker<Integer, Integer> {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map over the org.broadinstitute.sting.gatk.LocusContext
|
// Map over the org.broadinstitute.sting.atk.LocusContext
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||||
//System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size());
|
//System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size());
|
||||||
//for ( SAMRecord read : context.getReads() ) {
|
//for ( SAMRecord read : context.getReads() ) {
|
||||||
|
|
@ -97,7 +105,7 @@ public class SingleSampleGenotyper extends BasicLociWalker<Integer, Integer> {
|
||||||
{
|
{
|
||||||
if ( datum != null )
|
if ( datum != null )
|
||||||
{
|
{
|
||||||
if ( datum instanceof rodDbSNP)
|
if ( datum instanceof rodDbSNP)
|
||||||
{
|
{
|
||||||
rodDbSNP dbsnp = (rodDbSNP)datum;
|
rodDbSNP dbsnp = (rodDbSNP)datum;
|
||||||
rodString += dbsnp.toMediumString();
|
rodString += dbsnp.toMediumString();
|
||||||
|
|
@ -136,4 +144,7 @@ public class SingleSampleGenotyper extends BasicLociWalker<Integer, Integer> {
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
return value + sum;
|
return value + sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void onTraveralDone() {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,213 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
|
|
||||||
import edu.mit.broad.picard.util.TabbedTextFileParser;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Class for representing arbitrary reference ordered data sets
|
|
||||||
*
|
|
||||||
* User: mdepristo
|
|
||||||
* Date: Feb 27, 2009
|
|
||||||
* Time: 10:47:14 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements Iterable<ROD> {
|
|
||||||
private File file = null;
|
|
||||||
private Class<ROD> type = null; // runtime type information for object construction
|
|
||||||
|
|
||||||
public ReferenceOrderedData(File file, Class<ROD> type ) {
|
|
||||||
this.file = file;
|
|
||||||
this.type = type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public RODIterator iterator() {
|
|
||||||
return new RODIterator(new SimpleRODIterator());
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Testing
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
public void testMe() {
|
|
||||||
ReferenceOrderedDatum last = null;
|
|
||||||
for ( ReferenceOrderedDatum rec : this ) {
|
|
||||||
if ( last == null || ! last.getLocation().onSameContig(rec.getLocation()) ) {
|
|
||||||
System.out.println(rec.toString());
|
|
||||||
}
|
|
||||||
last = rec;
|
|
||||||
}
|
|
||||||
System.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Manipulations of all of the data
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
public ArrayList<ReferenceOrderedDatum> readAll() {
|
|
||||||
ArrayList<ReferenceOrderedDatum> elts = new ArrayList<ReferenceOrderedDatum>();
|
|
||||||
for ( ReferenceOrderedDatum rec : this ) {
|
|
||||||
elts.add(rec);
|
|
||||||
}
|
|
||||||
elts.trimToSize();
|
|
||||||
return elts;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void sortRODDataInMemory(ArrayList<ReferenceOrderedDatum> data) {
|
|
||||||
Collections.sort(data);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void write(ArrayList<ReferenceOrderedDatum> data, File output) throws IOException {
|
|
||||||
final FileWriter out = new FileWriter(output);
|
|
||||||
|
|
||||||
for ( ReferenceOrderedDatum rec : data ) {
|
|
||||||
out.write(rec.repl() + "\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
out.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean validateFile() throws Exception {
|
|
||||||
ReferenceOrderedDatum last = null;
|
|
||||||
for ( ReferenceOrderedDatum rec : this ) {
|
|
||||||
if ( last != null && last.compareTo(rec) == 1 ) {
|
|
||||||
// It's out of order
|
|
||||||
throw new Exception("Out of order elements at \n" + last.toString() + "\n" + rec.toString());
|
|
||||||
}
|
|
||||||
last = rec;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void indexFile() {
|
|
||||||
// Fixme -- get access to the linear index system from Jim
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Iteration
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
private class SimpleRODIterator implements Iterator<ROD> {
|
|
||||||
//private WhitespaceTextFileParser parser = null;
|
|
||||||
private TabbedTextFileParser parser = null;
|
|
||||||
|
|
||||||
public SimpleRODIterator() {
|
|
||||||
parser = new TabbedTextFileParser(true, file);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasNext() {
|
|
||||||
return parser.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
public ROD next() {
|
|
||||||
String parts[] = parser.next();
|
|
||||||
return parseLine(parts);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void remove() {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public class RODIterator implements Iterator<ROD> {
|
|
||||||
private PushbackIterator<ROD> it;
|
|
||||||
private ROD prev = null;
|
|
||||||
|
|
||||||
public RODIterator(SimpleRODIterator it) {
|
|
||||||
this.it = new PushbackIterator<ROD>(it);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasNext() { return it.hasNext(); }
|
|
||||||
public ROD next() {
|
|
||||||
prev = it.next();
|
|
||||||
return prev;
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Seeks forward in the file until we reach (or cross) a record at contig / pos
|
|
||||||
// If we don't find anything and cross beyond contig / pos, we return null
|
|
||||||
// Otherwise we return the first object who's start is at pos
|
|
||||||
//
|
|
||||||
public ROD seekForward(final GenomeLoc loc) {
|
|
||||||
return seekForward(loc.getContig(), loc.getStart());
|
|
||||||
}
|
|
||||||
|
|
||||||
protected ROD seekForward(final String contigName, final long pos) {
|
|
||||||
final boolean DEBUG = false;
|
|
||||||
|
|
||||||
ROD result = null;
|
|
||||||
|
|
||||||
if ( DEBUG ) System.out.printf(" *** starting seek to %s %d %s%n", contigName, pos, prev);
|
|
||||||
while ( hasNext() ) {
|
|
||||||
ROD current = next();
|
|
||||||
//System.out.printf(" -> Seeking to %s %d AT %s %d%n", contigName, pos, current.getContig(), current.getStart());
|
|
||||||
int strCmp = GenomeLoc.compareContigs( contigName, prev.getContig() );// contigName.compareTo( prev.getContig() );
|
|
||||||
if ( strCmp == 0 ) {
|
|
||||||
// The contigs are equal
|
|
||||||
if ( current.getStart() > pos ) {
|
|
||||||
// There was nothing to find, push back next and return null
|
|
||||||
it.pushback(current);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else if ( pos == current.getStart() ) {
|
|
||||||
// We found a record at contig / pos, return it
|
|
||||||
result = current;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if ( strCmp < 0 ) {
|
|
||||||
if ( DEBUG ) System.out.printf(" -> Jumped to contig %s%n", contigName);
|
|
||||||
// We've gone past the desired contig, break
|
|
||||||
it.pushback(current);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( DEBUG ) {
|
|
||||||
if ( result == null )
|
|
||||||
;
|
|
||||||
//System.out.printf(" --- seek result to %s %d is NULL%n", contigName, pos);
|
|
||||||
else
|
|
||||||
System.out.printf(" ### Found %s %d%n", result.getContig(), result.getStart());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// we ran out of elements or found something
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void remove() {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Parsing
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
ROD parseLine(final String[] parts) {
|
|
||||||
//System.out.printf("Parsing GFFLine %s%n", Utils.join(" ", parts));
|
|
||||||
try {
|
|
||||||
ROD obj = type.newInstance();
|
|
||||||
obj.parseLine(parts);
|
|
||||||
return obj;
|
|
||||||
} catch ( java.lang.InstantiationException e ) {
|
|
||||||
System.out.println(e);
|
|
||||||
return null; // wow, unsafe!
|
|
||||||
} catch ( java.lang.IllegalAccessException e ) {
|
|
||||||
System.out.println(e);
|
|
||||||
return null; // wow, unsafe!
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,30 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: mdepristo
|
|
||||||
* Date: Feb 27, 2009
|
|
||||||
* Time: 10:49:47 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public abstract class ReferenceOrderedDatum implements Comparable<ReferenceOrderedDatum> {
|
|
||||||
|
|
||||||
public ReferenceOrderedDatum() { }
|
|
||||||
|
|
||||||
public abstract void parseLine(final String[] parts);
|
|
||||||
|
|
||||||
public abstract String toString();
|
|
||||||
public abstract String toSimpleString();
|
|
||||||
public abstract String repl();
|
|
||||||
|
|
||||||
public abstract GenomeLoc getLocation();
|
|
||||||
public int compareTo( ReferenceOrderedDatum that ) {
|
|
||||||
return getLocation().compareTo(that.getLocation());
|
|
||||||
}
|
|
||||||
|
|
||||||
public final String getContig() { return getLocation().getContig(); }
|
|
||||||
public final long getStart() { return getLocation().getStart(); }
|
|
||||||
public final long getStop() { return getLocation().getStop(); }
|
|
||||||
}
|
|
||||||
|
|
@ -1,157 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata;
|
|
||||||
|
|
||||||
import edu.mit.broad.picard.util.SequenceUtil;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Example format:
|
|
||||||
* 585 chr1 433 433 rs56289060 0 + - - -/C genomic insertion unknown 0 0 unknown between 1
|
|
||||||
* 585 chr1 491 492 rs55998931 0 + C C C/T genomic single unknown 0 0 unknown exact 1
|
|
||||||
*
|
|
||||||
* User: mdepristo
|
|
||||||
* Date: Feb 27, 2009
|
|
||||||
* Time: 10:47:14 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class rodDbSNP extends ReferenceOrderedDatum {
|
|
||||||
public GenomeLoc loc; // genome location of SNP
|
|
||||||
// Reference sequence chromosome or scaffold
|
|
||||||
// Start and stop positions in chrom
|
|
||||||
|
|
||||||
public String name; // Reference SNP identifier or Affy SNP name
|
|
||||||
public String strand; // Which DNA strand contains the observed alleles
|
|
||||||
|
|
||||||
public String refBases; // the reference base according to NCBI, in the dbSNP file
|
|
||||||
public String observed; // The sequences of the observed alleles from rs-fasta files
|
|
||||||
|
|
||||||
public String molType; // Sample type from exemplar ss
|
|
||||||
public String varType; // The class of variant (simple, insertion, deletion, range, etc.)
|
|
||||||
// Can be 'unknown','single','in-del','het','microsatellite','named','mixed','mnp','insertion','deletion'
|
|
||||||
public String validationStatus; // The validation status of the SNP
|
|
||||||
// one of set('unknown','by-cluster','by-frequency','by-submitter','by-2hit-2allele','by-hapmap')
|
|
||||||
|
|
||||||
public double avHet; // The average heterozygosity from all observations
|
|
||||||
public double avHetSE; // The Standard Error for the average heterozygosity
|
|
||||||
|
|
||||||
public String func; // The functional category of the SNP (coding-synon, coding-nonsynon, intron, etc.)
|
|
||||||
// set('unknown','coding-synon','intron','cds-reference','near-gene-3','near-gene-5',
|
|
||||||
// 'nonsense','missense','frameshift','untranslated-3','untranslated-5','splice-3','splice-5')
|
|
||||||
public String locType; // How the variant affects the reference sequence
|
|
||||||
// enum('range','exact','between','rangeInsertion','rangeSubstitution','rangeDeletion')
|
|
||||||
|
|
||||||
public int weight; // The quality of the alignment
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Constructors
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
public rodDbSNP() {}
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// manipulating the SNP information
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
public GenomeLoc getLocation() { return loc; }
|
|
||||||
|
|
||||||
public boolean onFwdStrand() {
|
|
||||||
return strand.equals("+");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the reference bases on the forward strand
|
|
||||||
public String getRefBasesFWD() {
|
|
||||||
if ( onFwdStrand() )
|
|
||||||
return refBases;
|
|
||||||
else
|
|
||||||
return SequenceUtil.reverseComplement(refBases);
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getAllelesFWD() {
|
|
||||||
List<String> alleles = null;
|
|
||||||
if ( onFwdStrand() )
|
|
||||||
alleles = Arrays.asList(observed.split("/"));
|
|
||||||
else
|
|
||||||
alleles = Arrays.asList(SequenceUtil.reverseComplement(observed).split("/"));
|
|
||||||
|
|
||||||
//System.out.printf("getAlleles %s on %s %b => %s %n", observed, strand, onFwdStrand(), Utils.join("/", alleles));
|
|
||||||
return alleles;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getAllelesFWDString() {
|
|
||||||
return Utils.join("/", getAllelesFWD());
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// What kind of variant are we?
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
public boolean isSNP() { return varType.contains("single"); }
|
|
||||||
public boolean isInsertion() { return varType.contains("insertion"); }
|
|
||||||
public boolean isDeletion() { return varType.contains("deletion"); }
|
|
||||||
public boolean isIndel() { return varType.contains("in-del"); }
|
|
||||||
|
|
||||||
public boolean isHapmap() { return validationStatus.contains("by-hapmap"); }
|
|
||||||
public boolean is2Hit2Allele() { return validationStatus.contains("by-2hit-2allele"); }
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// formatting
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
public String toString() {
|
|
||||||
return String.format("%s\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d",
|
|
||||||
getContig(), getStart(), getStop(), name, strand, refBases, observed, molType,
|
|
||||||
varType, validationStatus, avHet, avHetSE, func, locType, weight );
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toSimpleString() {
|
|
||||||
return String.format("%s:%s:%s", name, observed, strand);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toMediumString() {
|
|
||||||
String s = String.format("%s:%s:%s", getLocation().toString(), name, getAllelesFWDString());
|
|
||||||
if ( isSNP() ) s += ":SNP";
|
|
||||||
if ( isIndel() ) s += ":Indel";
|
|
||||||
if ( isHapmap() ) s += ":Hapmap";
|
|
||||||
if ( is2Hit2Allele() ) s += ":2Hit";
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String repl() {
|
|
||||||
return String.format("%d\t%s\t%d\t%d\t%s\t0\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d",
|
|
||||||
585, getContig(), getStart()-1, getStop()-1, name, strand, refBases, refBases, observed, molType,
|
|
||||||
varType, validationStatus, avHet, avHetSE, func, locType, weight );
|
|
||||||
}
|
|
||||||
|
|
||||||
public void parseLine(final String[] parts) {
|
|
||||||
try {
|
|
||||||
String contig = parts[1];
|
|
||||||
long start = Long.parseLong(parts[2]) + 1; // The final is 0 based
|
|
||||||
long stop = Long.parseLong(parts[3]) + 1; // The final is 0 based
|
|
||||||
loc = new GenomeLoc(contig, start, stop);
|
|
||||||
|
|
||||||
name = parts[4];
|
|
||||||
refBases = parts[5];
|
|
||||||
strand = parts[6];
|
|
||||||
observed = parts[9];
|
|
||||||
molType = parts[10];
|
|
||||||
varType = parts[11];
|
|
||||||
validationStatus = parts[12];
|
|
||||||
avHet = Double.parseDouble(parts[13]);
|
|
||||||
avHetSE = Double.parseDouble(parts[14]);
|
|
||||||
func = parts[15];
|
|
||||||
locType = parts[16];
|
|
||||||
weight = Integer.parseInt(parts[17]);
|
|
||||||
} catch ( RuntimeException e ) {
|
|
||||||
System.out.printf(" Exception caught during parsing GFFLine %s%n", Utils.join(" <=> ", parts));
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,113 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Class for representing arbitrary reference ordered data sets
|
|
||||||
*
|
|
||||||
* User: mdepristo
|
|
||||||
* Date: Feb 27, 2009
|
|
||||||
* Time: 10:47:14 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class rodGFF extends ReferenceOrderedDatum {
|
|
||||||
private String contig, source, feature, strand, frame;
|
|
||||||
private long start, stop;
|
|
||||||
private double score;
|
|
||||||
private HashMap<String, String> attributes;
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Constructors
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
public rodGFF() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setValues(final String contig, final String source, final String feature,
|
|
||||||
final long start, final long stop, final double score,
|
|
||||||
final String strand, final String frame, HashMap<String, String> attributes) {
|
|
||||||
this.contig = contig;
|
|
||||||
this.source = source;
|
|
||||||
this.feature = feature;
|
|
||||||
this.start = start;
|
|
||||||
this.stop= stop;
|
|
||||||
this.score = score;
|
|
||||||
this.strand = strand;
|
|
||||||
this.frame = frame;
|
|
||||||
this.attributes = attributes;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Accessors
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
public String getSource() {
|
|
||||||
return source;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getFeature() {
|
|
||||||
return feature;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStrand() {
|
|
||||||
return strand;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getFrame() {
|
|
||||||
return frame;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getScore() {
|
|
||||||
return score;
|
|
||||||
}
|
|
||||||
|
|
||||||
public GenomeLoc getLocation() {
|
|
||||||
return new GenomeLoc(contig, start, stop);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getAttribute(final String key) {
|
|
||||||
return attributes.get(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// formatting
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
public String toString() {
|
|
||||||
return String.format("%s\t%s\t%s\t%d\t%d\t%f\t%s\t%s", contig, source, feature, start, stop, score, strand, frame);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String repl() {
|
|
||||||
return this.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toSimpleString() {
|
|
||||||
return String.format("%s", feature);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void parseLine(final String[] parts) {
|
|
||||||
//System.out.printf("Parsing GFFLine %s%n", Utils.join(" ", parts));
|
|
||||||
|
|
||||||
final String contig = parts[0];
|
|
||||||
final String source = parts[1];
|
|
||||||
final String feature = parts[2];
|
|
||||||
final long start = Long.parseLong(parts[3]);
|
|
||||||
final long stop = Long.parseLong(parts[4]);
|
|
||||||
|
|
||||||
double score = Double.NaN;
|
|
||||||
if ( ! parts[5].equals(".") )
|
|
||||||
score = Double.parseDouble(parts[5]);
|
|
||||||
|
|
||||||
final String strand = parts[6];
|
|
||||||
final String frame = parts[7];
|
|
||||||
HashMap<String, String> attributes = null;
|
|
||||||
setValues(contig, source, feature, start, stop, score, strand, frame, attributes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,30 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: mdepristo
|
|
||||||
* Date: Feb 22, 2009
|
|
||||||
* Time: 2:52:28 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public interface LocusWalker<MapType, ReduceType> {
|
|
||||||
void initialize();
|
|
||||||
public String walkerType();
|
|
||||||
|
|
||||||
// Do we actually want to operate on the context?
|
|
||||||
boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context);
|
|
||||||
|
|
||||||
// Map over the org.broadinstitute.sting.gatk.LocusContext
|
|
||||||
MapType map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context);
|
|
||||||
|
|
||||||
// Given result of map function
|
|
||||||
ReduceType reduceInit();
|
|
||||||
ReduceType reduce(MapType value, ReduceType sum);
|
|
||||||
|
|
||||||
void onTraversalDone();
|
|
||||||
}
|
|
||||||
|
|
@ -1,28 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: mdepristo
|
|
||||||
* Date: Feb 22, 2009
|
|
||||||
* Time: 2:52:28 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public interface ReadWalker<MapType, ReduceType> {
|
|
||||||
void initialize();
|
|
||||||
public String walkerType();
|
|
||||||
|
|
||||||
// Do we actually want to operate on the context?
|
|
||||||
boolean filter(LocusContext context, SAMRecord read);
|
|
||||||
|
|
||||||
// Map over the org.broadinstitute.sting.gatk.LocusContext
|
|
||||||
MapType map(LocusContext context, SAMRecord read);
|
|
||||||
|
|
||||||
// Given result of map function
|
|
||||||
ReduceType reduceInit();
|
|
||||||
ReduceType reduce(MapType value, ReduceType sum);
|
|
||||||
|
|
||||||
void onTraversalDone();
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue