Move to new directory organization.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@35 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
25ddc0f5ea
commit
5031875507
|
|
@ -1,78 +0,0 @@
|
||||||
package edu.mit.broad.sting.atk;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: andrewk
|
|
||||||
* Date: Mar 9, 2009
|
|
||||||
* Time: 3:34:08 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class GenotypeEvidence {
|
|
||||||
|
|
||||||
int[] nuc2num = new int[128];
|
|
||||||
int[] nucs = new int[4];
|
|
||||||
int a = nucs[0];
|
|
||||||
int c = nucs[1];
|
|
||||||
int t = nucs[2];
|
|
||||||
int g = nucs[3];
|
|
||||||
float[] nuc_pcnt = new float[4];
|
|
||||||
char ref;
|
|
||||||
public float q; // % non-reference alleles
|
|
||||||
public int refbases;
|
|
||||||
public int allbases;
|
|
||||||
|
|
||||||
public GenotypeEvidence(String bases, char ref){
|
|
||||||
this.ref = ref;
|
|
||||||
nuc2num['A'] = 0;
|
|
||||||
nuc2num['C'] = 1;
|
|
||||||
nuc2num['T'] = 2;
|
|
||||||
nuc2num['G'] = 3;
|
|
||||||
nuc2num['a'] = 0;
|
|
||||||
nuc2num['c'] = 1;
|
|
||||||
nuc2num['t'] = 2;
|
|
||||||
nuc2num['g'] = 3;
|
|
||||||
|
|
||||||
for (char b : bases.toCharArray()) {
|
|
||||||
nucs[nuc2num[b]] += 1;
|
|
||||||
/*switch (b) {
|
|
||||||
case 'A': nucs[0] += 1; break;
|
|
||||||
case 'C': nucs[1] += 1; break;
|
|
||||||
case 'T': nucs[2] += 1; break;
|
|
||||||
case 'G': nucs[3] += 1; break;
|
|
||||||
} */
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate q = ref. bases / nonref. bases
|
|
||||||
refbases = nucs[nuc2num[ref]];
|
|
||||||
allbases = bases.length();
|
|
||||||
q = 1 - ((float)refbases / allbases);
|
|
||||||
|
|
||||||
/*for (int i=0; i<4; i++) {
|
|
||||||
nuc_pcnt[i] = (float)nucs[i] / len;
|
|
||||||
//if
|
|
||||||
}*/
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public boolean SigNonref(float cutoff_fraction) {
|
|
||||||
/* for (char nuc : nucs) {
|
|
||||||
|
|
||||||
}*/
|
|
||||||
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void print() {
|
|
||||||
|
|
||||||
System.out.format("A %2d | ", nucs[0]);
|
|
||||||
System.out.format("C %2d | ", nucs[1]);
|
|
||||||
System.out.format("T %2d | ", nucs[2]);
|
|
||||||
System.out.format("G %2d | ", nucs[3]);
|
|
||||||
System.out.format("Ref %s | ", ref);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -1,88 +0,0 @@
|
||||||
package edu.mit.broad.sting.atk.modules;
|
|
||||||
|
|
||||||
import edu.mit.broad.sting.atk.LocusIterator;
|
|
||||||
import edu.mit.broad.sting.atk.GenotypeEvidence;
|
|
||||||
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import static java.lang.System.currentTimeMillis;
|
|
||||||
|
|
||||||
public class GenotypeWalker extends BasicLociWalker<Integer, Integer> {
|
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
|
||||||
//char[] = new char(26);
|
|
||||||
long start_tm = currentTimeMillis();
|
|
||||||
List<SAMRecord> reads = context.getReads();
|
|
||||||
List<Integer> offsets = context.getOffsets();
|
|
||||||
String bases = "";
|
|
||||||
String quals = "";
|
|
||||||
//String offsetString = "";
|
|
||||||
for ( int i = 0; i < reads.size(); i++ ) {
|
|
||||||
SAMRecord read = reads.get(i);
|
|
||||||
int offset = offsets.get(i);
|
|
||||||
|
|
||||||
//if ( offset >= read.getReadString().length() )
|
|
||||||
// System.out.printf(" [%2d] [%s] %s%n", offset, read.format(), read.getReadString());
|
|
||||||
|
|
||||||
bases += read.getReadString().charAt(offset);
|
|
||||||
//quals += read.getBaseQualityString().charAt(offset);
|
|
||||||
//offsetString += i;
|
|
||||||
//System.out.printf(" [%2d] [%s] %s%n", offset, read.getReadString().charAt(offset), read.getReadString());
|
|
||||||
}
|
|
||||||
|
|
||||||
GenotypeEvidence all = new GenotypeEvidence(bases, ref);
|
|
||||||
|
|
||||||
// P(q|G) - prob of nonref mixture given the genotype
|
|
||||||
float qobs = all.q; // observed percent of non-ref bases
|
|
||||||
double G; // % non-ref bases in observed
|
|
||||||
if (qobs >= 0.1) {
|
|
||||||
all.print();
|
|
||||||
System.out.format("q %.2f | ", all.q);
|
|
||||||
System.out.format("%s | ", context.getLocation());
|
|
||||||
System.out.format("Total %4d | ", context.numReads());
|
|
||||||
System.out.println();
|
|
||||||
for (int q = 0; q < all.allbases; q ++) {
|
|
||||||
for (G = 0.01; G <= 1.0; G += 0.49) { // iterate over: ref (0%), het (50%) and hom (100%) nonref bases observed
|
|
||||||
//double pqG = binomialProb(all.allbases - all.refbases, all.allbases, G);
|
|
||||||
double pqG = binomialProb(q, all.allbases, G);
|
|
||||||
//all.print();
|
|
||||||
System.out.format("P(q|G) %.3f | ", pqG);
|
|
||||||
}
|
|
||||||
System.out.println();
|
|
||||||
}
|
|
||||||
long stop_tm = currentTimeMillis();
|
|
||||||
System.out.format("%.3fs\n", (float)(stop_tm - start_tm) / 1000);
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static double binomialProb(int k, int n, double p) {
|
|
||||||
// k - numebr of successes
|
|
||||||
// n - number of Bernoulli trials
|
|
||||||
// p - probability of success
|
|
||||||
|
|
||||||
return (double)nchoosek(n, k) * Math.pow(p, k) * Math.pow(1-p, n-k);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int nchoosek(int n, int k) {
|
|
||||||
int t = 1;
|
|
||||||
|
|
||||||
int m = n - k;
|
|
||||||
if (k < m) {
|
|
||||||
k = m;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = n, j = 1; i > k; i--, j++) {
|
|
||||||
t = t * i / j;
|
|
||||||
}
|
|
||||||
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer reduceInit() { return 0; }
|
|
||||||
|
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
|
||||||
return value + sum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,44 +0,0 @@
|
||||||
package edu.mit.broad.sting.atk.modules;
|
|
||||||
|
|
||||||
import edu.mit.broad.sting.atk.LocusWalker;
|
|
||||||
import edu.mit.broad.sting.atk.LocusIterator;
|
|
||||||
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
|
||||||
import edu.mit.broad.sting.utils.rodDbSNP;
|
|
||||||
import edu.mit.broad.sting.utils.Utils;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
// Null traversal. For ATK performance measuring.
|
|
||||||
// j.maguire 3-7-2009
|
|
||||||
|
|
||||||
public class NullWalker implements LocusWalker<Integer, Integer> {
|
|
||||||
public void initialize() {
|
|
||||||
}
|
|
||||||
|
|
||||||
public String walkerType() { return "ByLocus"; }
|
|
||||||
|
|
||||||
// Do we actually want to operate on the context?
|
|
||||||
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
|
||||||
return true; // We are keeping all the reads
|
|
||||||
}
|
|
||||||
|
|
||||||
// Map over the edu.mit.broad.sting.atk.LocusContext
|
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context)
|
|
||||||
{
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Given result of map function
|
|
||||||
public Integer reduceInit()
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
public Integer reduce(Integer value, Integer sum)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void onTraveralDone() {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,149 +0,0 @@
|
||||||
package edu.mit.broad.sting.atk.modules;
|
|
||||||
|
|
||||||
import edu.mit.broad.sting.atk.LocusWalker;
|
|
||||||
import edu.mit.broad.sting.atk.LocusIterator;
|
|
||||||
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
|
||||||
import edu.mit.broad.sting.utils.rodDbSNP;
|
|
||||||
import edu.mit.broad.sting.utils.Utils;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
// Draft single sample genotyper
|
|
||||||
// j.maguire 3-7-2009
|
|
||||||
|
|
||||||
public class SingleSampleGenotyper implements LocusWalker<Integer, Integer> {
|
|
||||||
public void initialize() {
|
|
||||||
}
|
|
||||||
|
|
||||||
public String walkerType() { return "ByLocus"; }
|
|
||||||
|
|
||||||
// Do we actually want to operate on the context?
|
|
||||||
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
|
||||||
return true; // We are keeping all the reads
|
|
||||||
}
|
|
||||||
|
|
||||||
protected class GenotypeLikelihoods
|
|
||||||
{
|
|
||||||
public double[] likelihoods;
|
|
||||||
public String[] genotypes;
|
|
||||||
|
|
||||||
GenotypeLikelihoods()
|
|
||||||
{
|
|
||||||
likelihoods = new double[10];
|
|
||||||
genotypes = new String[10];
|
|
||||||
|
|
||||||
genotypes[0] = "AA";
|
|
||||||
genotypes[1] = "AC";
|
|
||||||
genotypes[2] = "AG";
|
|
||||||
genotypes[3] = "AT";
|
|
||||||
genotypes[4] = "CC";
|
|
||||||
genotypes[5] = "CG";
|
|
||||||
genotypes[6] = "CT";
|
|
||||||
genotypes[7] = "GG";
|
|
||||||
genotypes[8] = "GT";
|
|
||||||
genotypes[9] = "TT";
|
|
||||||
}
|
|
||||||
|
|
||||||
void add(char ref, char read, byte qual)
|
|
||||||
{
|
|
||||||
double p_error = Math.pow(10.0, (double)qual / -10);
|
|
||||||
for (int i = 0; i < genotypes.length; i++)
|
|
||||||
{
|
|
||||||
likelihoods[i] += AlleleLikelihood(ref, read, genotypes[i], p_error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
double AlleleLikelihood(char ref, char read, String genotype, double p_error)
|
|
||||||
{
|
|
||||||
char h1 = genotype.charAt(0);
|
|
||||||
char h2 = genotype.charAt(1);
|
|
||||||
|
|
||||||
double p_base;
|
|
||||||
|
|
||||||
if ((h1 == h2) && (h1 == read)) { p_base = Math.log10(1-p_error); }
|
|
||||||
else if ((h1 != h2) && (h1 == read) || (h2 == read)) { p_base = Math.log10(0.5 - (p_error/2.0)); }
|
|
||||||
else { p_base = Math.log10(p_error); }
|
|
||||||
|
|
||||||
return p_base;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString()
|
|
||||||
{
|
|
||||||
Integer[] permutation = Utils.SortPermutation(likelihoods);
|
|
||||||
String[] sorted_genotypes = Utils.PermuteArray(genotypes, permutation);
|
|
||||||
double[] sorted_likelihoods = Utils.PermuteArray(likelihoods, permutation);
|
|
||||||
|
|
||||||
String s = "";
|
|
||||||
for (int i = sorted_genotypes.length-1; i >= 0; i--)
|
|
||||||
{
|
|
||||||
if (i != sorted_genotypes.length-1) { s = s + " "; }
|
|
||||||
s = s + sorted_genotypes[i] + ":" + sorted_likelihoods[i];
|
|
||||||
}
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// Map over the edu.mit.broad.sting.atk.LocusContext
|
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
|
||||||
//System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size());
|
|
||||||
//for ( SAMRecord read : context.getReads() ) {
|
|
||||||
// System.out.println(" -> " + read.getReadName());
|
|
||||||
//}
|
|
||||||
|
|
||||||
List<SAMRecord> reads = context.getReads();
|
|
||||||
List<Integer> offsets = context.getOffsets();
|
|
||||||
String bases = "";
|
|
||||||
String quals = "";
|
|
||||||
//String offsetString = "";
|
|
||||||
|
|
||||||
// Look up hapmap and dbsnp priors
|
|
||||||
String rodString = "";
|
|
||||||
for ( ReferenceOrderedDatum datum : rodData )
|
|
||||||
{
|
|
||||||
if ( datum != null )
|
|
||||||
{
|
|
||||||
if ( datum instanceof rodDbSNP)
|
|
||||||
{
|
|
||||||
rodDbSNP dbsnp = (rodDbSNP)datum;
|
|
||||||
rodString += dbsnp.toMediumString();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
rodString += datum.toSimpleString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ( rodString != "" )
|
|
||||||
rodString = "[ROD: " + rodString + "]";
|
|
||||||
|
|
||||||
// Accumulate genotype likelihoods
|
|
||||||
GenotypeLikelihoods G = new GenotypeLikelihoods();
|
|
||||||
for ( int i = 0; i < reads.size(); i++ )
|
|
||||||
{
|
|
||||||
SAMRecord read = reads.get(i);
|
|
||||||
int offset = offsets.get(i);
|
|
||||||
bases += read.getReadString().charAt(offset);
|
|
||||||
quals += read.getBaseQualityString().charAt(offset);
|
|
||||||
|
|
||||||
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( context.getLocation().getStart() % 1 == 0 ) {
|
|
||||||
//System.out.printf("%s: %s %s %s %s%n", context.getLocation(), ref, bases, quals, rodString);
|
|
||||||
System.out.printf("%s %s %s %s\n", ref, bases, G.toString(), rodString);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Given result of map function
|
|
||||||
public Integer reduceInit() { return 0; }
|
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
|
||||||
return value + sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void onTraveralDone() {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -71,7 +71,7 @@
|
||||||
<jar jarfile="${dist}/AnalysisTK.jar" basedir="${build}">
|
<jar jarfile="${dist}/AnalysisTK.jar" basedir="${build}">
|
||||||
<manifest>
|
<manifest>
|
||||||
<attribute name="Class-Path" value="${jar.classpath}" />
|
<attribute name="Class-Path" value="${jar.classpath}" />
|
||||||
<attribute name="Main-Class" value="edu.mit.broad.sting.atk.AnalysisTK" />
|
<attribute name="Main-Class" value="org.broadinstitute.sting.atk.AnalysisTK" />
|
||||||
</manifest>
|
</manifest>
|
||||||
</jar>
|
</jar>
|
||||||
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
<ivy-module version="1.0">
|
<ivy-module version="1.0">
|
||||||
<info organisation="edu.mit.broad" module="Sting"/>
|
<info organisation="org.broadinstitute" module="Sting"/>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency org="net.sf" name="functionalj" rev="latest.integration" />
|
<dependency org="net.sf" name="functionalj" rev="latest.integration" />
|
||||||
<dependency org="net.sf" name="sam" rev="latest.integration" />
|
<dependency org="net.sf" name="sam" rev="latest.integration" />
|
||||||
<dependency name="picard" rev="latest.integration" />
|
<dependency org="edu.mit.broad" name="picard" rev="latest.integration" />
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</ivy-module>
|
</ivy-module>
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package edu.mit.broad.sting;
|
package org.broadinstitute.sting;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
||||||
|
|
@ -76,7 +76,7 @@ public class ValidateSAM extends CommandLineProgram {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void usage() {
|
private static void usage() {
|
||||||
System.err.println("USAGE: edu.mit.broad.sting.ValidateSAM <SAMFile|BAMFile>");
|
System.err.println("USAGE: org.broadinstitute.sting.ValidateSAM <SAMFile|BAMFile>");
|
||||||
}
|
}
|
||||||
|
|
||||||
private SAMFileReader getSamReader(final File samFile) {
|
private SAMFileReader getSamReader(final File samFile) {
|
||||||
|
|
@ -1,14 +1,14 @@
|
||||||
package edu.mit.broad.sting.atk;
|
package org.broadinstitute.sting.atk;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
||||||
import edu.mit.broad.picard.cmdline.CommandLineProgram;
|
import edu.mit.broad.picard.cmdline.CommandLineProgram;
|
||||||
import edu.mit.broad.picard.cmdline.Usage;
|
import edu.mit.broad.picard.cmdline.Usage;
|
||||||
import edu.mit.broad.picard.cmdline.Option;
|
import edu.mit.broad.picard.cmdline.Option;
|
||||||
|
|
||||||
import edu.mit.broad.sting.atk.modules.*;
|
import org.broadinstitute.sting.atk.modules.*;
|
||||||
import edu.mit.broad.sting.utils.ReferenceOrderedData;
|
import org.broadinstitute.sting.utils.ReferenceOrderedData;
|
||||||
import edu.mit.broad.sting.utils.rodGFF;
|
import org.broadinstitute.sting.utils.rodGFF;
|
||||||
import edu.mit.broad.sting.utils.rodDbSNP;
|
import org.broadinstitute.sting.utils.rodDbSNP;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
@ -37,9 +37,6 @@ public class AnalysisTK extends CommandLineProgram {
|
||||||
addModule("CountReads", new CountReadsWalker());
|
addModule("CountReads", new CountReadsWalker());
|
||||||
addModule("PrintReads", new PrintReadsWalker());
|
addModule("PrintReads", new PrintReadsWalker());
|
||||||
addModule("Base_Quality_Histogram", new BaseQualityHistoWalker());
|
addModule("Base_Quality_Histogram", new BaseQualityHistoWalker());
|
||||||
addModule("Genotype", new GenotypeWalker());
|
|
||||||
addModule("SingleSampleGenotyper", new SingleSampleGenotyper());
|
|
||||||
addModule("Null", new NullWalker());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private TraversalEngine engine = null;
|
private TraversalEngine engine = null;
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package edu.mit.broad.sting.atk;
|
package org.broadinstitute.sting.atk;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
package edu.mit.broad.sting.atk;
|
package org.broadinstitute.sting.atk;
|
||||||
|
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import edu.mit.broad.sting.utils.PushbackIterator;
|
import org.broadinstitute.sting.utils.PushbackIterator;
|
||||||
import edu.mit.broad.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import edu.mit.broad.sting.utils.Predicate;
|
import org.broadinstitute.sting.utils.Predicate;
|
||||||
import edu.mit.broad.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
@ -33,7 +33,6 @@ public class LocusIterator implements Iterable<LocusIterator>, CloseableIterator
|
||||||
|
|
||||||
public List<SAMRecord> getReads() { return reads; }
|
public List<SAMRecord> getReads() { return reads; }
|
||||||
public List<Integer> getOffsets() { return offsets; }
|
public List<Integer> getOffsets() { return offsets; }
|
||||||
public int numReads() { return reads.size(); }
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package edu.mit.broad.sting.atk;
|
package org.broadinstitute.sting.atk;
|
||||||
|
|
||||||
import edu.mit.broad.sting.atk.LocusIterator;
|
import org.broadinstitute.sting.atk.LocusIterator;
|
||||||
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -19,7 +19,7 @@ public interface LocusWalker<MapType, ReduceType> {
|
||||||
// Do we actually want to operate on the context?
|
// Do we actually want to operate on the context?
|
||||||
boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context);
|
boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context);
|
||||||
|
|
||||||
// Map over the edu.mit.broad.sting.atk.LocusContext
|
// Map over the org.broadinstitute.sting.atk.LocusContext
|
||||||
MapType map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context);
|
MapType map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context);
|
||||||
|
|
||||||
// Given result of map function
|
// Given result of map function
|
||||||
|
|
@ -27,4 +27,4 @@ public interface LocusWalker<MapType, ReduceType> {
|
||||||
ReduceType reduce(MapType value, ReduceType sum);
|
ReduceType reduce(MapType value, ReduceType sum);
|
||||||
|
|
||||||
void onTraveralDone();
|
void onTraveralDone();
|
||||||
}
|
}
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package edu.mit.broad.sting.atk;
|
package org.broadinstitute.sting.atk;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
|
@ -9,8 +9,8 @@ import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
||||||
|
|
||||||
import edu.mit.broad.sting.atk.modules.*;
|
import org.broadinstitute.sting.atk.modules.*;
|
||||||
import edu.mit.broad.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package edu.mit.broad.sting.atk;
|
package org.broadinstitute.sting.atk;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import edu.mit.broad.sting.atk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -17,7 +17,7 @@ public interface ReadWalker<MapType, ReduceType> {
|
||||||
// Do we actually want to operate on the context?
|
// Do we actually want to operate on the context?
|
||||||
boolean filter(LocusContext context, SAMRecord read);
|
boolean filter(LocusContext context, SAMRecord read);
|
||||||
|
|
||||||
// Map over the edu.mit.broad.sting.atk.LocusContext
|
// Map over the org.broadinstitute.sting.atk.LocusContext
|
||||||
MapType map(LocusContext context, SAMRecord read);
|
MapType map(LocusContext context, SAMRecord read);
|
||||||
|
|
||||||
// Given result of map function
|
// Given result of map function
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package edu.mit.broad.sting.atk;
|
package org.broadinstitute.sting.atk;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
||||||
|
|
@ -8,7 +8,7 @@ import edu.mit.broad.picard.filter.SamRecordFilter;
|
||||||
import edu.mit.broad.picard.filter.FilteringIterator;
|
import edu.mit.broad.picard.filter.FilteringIterator;
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
|
import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
|
||||||
import edu.mit.broad.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -22,51 +22,35 @@ import net.sf.functionalj.util.Operators;
|
||||||
|
|
||||||
public class TraversalEngine {
|
public class TraversalEngine {
|
||||||
// Usage and parameters
|
// Usage and parameters
|
||||||
private List<ReferenceOrderedData> rods = null; // list of reference ordered data objects
|
private File readsFile = null;
|
||||||
|
private File refFileName = null;
|
||||||
|
private List<ReferenceOrderedData> rods = null;
|
||||||
|
|
||||||
//private String regionStr = null; // String dec
|
private String regionStr = null;
|
||||||
//private String traversalType = null; // String describing this traversal type
|
private String traversalType = null;
|
||||||
|
|
||||||
// How strict should we be with SAM/BAM parsing?
|
|
||||||
private ValidationStringency strictness = ValidationStringency.STRICT;
|
private ValidationStringency strictness = ValidationStringency.STRICT;
|
||||||
|
|
||||||
// Time in milliseconds since we initialized this engine
|
|
||||||
private long startTime = -1;
|
private long startTime = -1;
|
||||||
private long lastProgressPrintTime = -1; // When was the last time we printed our progress?
|
private long lastProgressPrintTime = -1;
|
||||||
|
private long MAX_PROGRESS_PRINT_TIME = 5 * 1000; // 10 seconds in millisecs
|
||||||
// How long can we go without printing some progress info?
|
|
||||||
private long MAX_PROGRESS_PRINT_TIME = 10 * 1000; // 10 seconds in millisecs
|
|
||||||
|
|
||||||
// Maximum number of reads to process before finishing
|
|
||||||
private long maxReads = -1;
|
private long maxReads = -1;
|
||||||
|
private long nRecords = 0;
|
||||||
// Name of the reads file, in BAM/SAM format
|
private SAMFileReader samReader = null;
|
||||||
private File readsFile = null; // the name of the reads file
|
|
||||||
// iterator over the sam records in the readsFile
|
|
||||||
private Iterator<SAMRecord> samReadIter = null;
|
|
||||||
|
|
||||||
// The reference data -- filename, refSeqFile, and iterator
|
|
||||||
private File refFileName = null; // the name of the reference file
|
|
||||||
private ReferenceSequenceFile refFile = null;
|
private ReferenceSequenceFile refFile = null;
|
||||||
private ReferenceIterator refIter = null;
|
private ReferenceIterator refIter = null;
|
||||||
|
private SAMFileReader readStream;
|
||||||
|
private Iterator<SAMRecord> samReadIter = null;
|
||||||
|
|
||||||
// Number of records (loci, reads) we've processed
|
|
||||||
private long nRecords = 0;
|
|
||||||
// How many reads have we processed, along with those skipped for various reasons
|
|
||||||
private int nReads = 0;
|
private int nReads = 0;
|
||||||
private int nSkippedReads = 0;
|
private int nSkippedReads = 0;
|
||||||
private int nUnmappedReads = 0;
|
private int nUnmappedReads = 0;
|
||||||
private int nNotPrimary = 0;
|
private int nNotPrimary = 0;
|
||||||
private int nBadAlignments = 0;
|
private int nBadAlignments = 0;
|
||||||
private int nSkippedIndels = 0;
|
private int nSkippedIndels = 0;
|
||||||
|
|
||||||
// Progress tracker for the sam file
|
|
||||||
private FileProgressTracker samReadingTracker = null;
|
private FileProgressTracker samReadingTracker = null;
|
||||||
|
|
||||||
public boolean DEBUGGING = false;
|
public boolean DEBUGGING = false;
|
||||||
public long N_RECORDS_TO_PRINT = 100000;
|
|
||||||
|
|
||||||
// Locations we are going to process during the traversal
|
|
||||||
private GenomeLoc[] locs = null;
|
private GenomeLoc[] locs = null;
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -74,191 +58,13 @@ public class TraversalEngine {
|
||||||
// Setting up the engine
|
// Setting up the engine
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a new, uninitialized TraversalEngine
|
|
||||||
*
|
|
||||||
* @param reads SAM/BAM file of reads
|
|
||||||
* @param ref Reference file in FASTA format, assumes a .dict file is also available
|
|
||||||
* @param rods Array of reference ordered data sets
|
|
||||||
*/
|
|
||||||
public TraversalEngine(File reads, File ref, ReferenceOrderedData[] rods ) {
|
public TraversalEngine(File reads, File ref, ReferenceOrderedData[] rods ) {
|
||||||
readsFile = reads;
|
readsFile = reads;
|
||||||
refFileName = ref;
|
refFileName = ref;
|
||||||
this.rods = Arrays.asList(rods);
|
this.rods = Arrays.asList(rods);
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
protected int initialize() {
|
||||||
//
|
|
||||||
// Manipulating the underlying engine parameters
|
|
||||||
//
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
|
||||||
//public void setRegion(final String reg) { regionStr = regionStr; }
|
|
||||||
//public void setTraversalType(final String type) { traversalType = type; }
|
|
||||||
public void setStrictness( final ValidationStringency s ) { strictness = s; }
|
|
||||||
public void setMaxReads( final int maxReads ) { this.maxReads = maxReads; }
|
|
||||||
public void setDebugging( final boolean d ) { DEBUGGING = d; }
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// functions for dealing locations (areas of the genome we're traversing over)
|
|
||||||
//
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses the location string locStr and sets the traversal engine to only process
|
|
||||||
* regions specified by the location string. The string is of the form:
|
|
||||||
* Of the form: loc1;loc2;...
|
|
||||||
* Where each locN can be:
|
|
||||||
* Ôchr2Õ, Ôchr2:1000000Õ or Ôchr2:1,000,000-2,000,000Õ
|
|
||||||
*
|
|
||||||
* @param locStr
|
|
||||||
*/
|
|
||||||
public void setLocation( final String locStr ) {
|
|
||||||
this.locs = parseGenomeLocs(locStr);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Useful utility function that parses a location string into a coordinate-order sorted
|
|
||||||
* array of GenomeLoc objects
|
|
||||||
*
|
|
||||||
* @param str
|
|
||||||
* @return Array of GenomeLoc objects corresponding to the locations in the string, sorted by coordinate order
|
|
||||||
*/
|
|
||||||
public static GenomeLoc[] parseGenomeLocs( final String str ) {
|
|
||||||
// Of the form: loc1;loc2;...
|
|
||||||
// Where each locN can be:
|
|
||||||
// Ôchr2Õ, Ôchr2:1000000Õ or Ôchr2:1,000,000-2,000,000Õ
|
|
||||||
StdReflect reflect = new JdkStdReflect();
|
|
||||||
FunctionN<GenomeLoc> parseOne = reflect.staticFunction(GenomeLoc.class, "parseGenomeLoc", String.class);
|
|
||||||
Function1<GenomeLoc, String> f1 = parseOne.f1();
|
|
||||||
Collection<GenomeLoc> result = Functions.map(f1, Arrays.asList(str.split(";")));
|
|
||||||
GenomeLoc[] locs = (GenomeLoc[])result.toArray(new GenomeLoc[0]);
|
|
||||||
|
|
||||||
Arrays.sort(locs);
|
|
||||||
for ( GenomeLoc l : locs )
|
|
||||||
System.out.printf(" -> %s%n", l);
|
|
||||||
|
|
||||||
System.out.printf(" Locations are: %s%n", Utils.join(" ", Functions.map( Operators.toString, Arrays.asList(locs) ) ) );
|
|
||||||
|
|
||||||
return locs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A key function that returns true if the proposed GenomeLoc curr is within the list of
|
|
||||||
* locations we are processing in this TraversalEngine
|
|
||||||
*
|
|
||||||
* @param curr
|
|
||||||
* @return true if we should process GenomeLoc curr, otherwise false
|
|
||||||
*/
|
|
||||||
public boolean inLocations( GenomeLoc curr ) {
|
|
||||||
if ( this.locs == null )
|
|
||||||
return true;
|
|
||||||
else {
|
|
||||||
for ( GenomeLoc loc : this.locs ) {
|
|
||||||
//System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr));
|
|
||||||
if ( loc.overlapsP(curr) )
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true iff we have a specified series of locations to process AND we are past the last
|
|
||||||
* location in the list. It means that, in a serial processing of the genome, that we are done.
|
|
||||||
*
|
|
||||||
* @param curr Current genome Location
|
|
||||||
* @return true if we are past the last location to process
|
|
||||||
*/
|
|
||||||
private boolean pastFinalLocation( GenomeLoc curr ) {
|
|
||||||
boolean r = locs != null && locs[locs.length-1].compareTo( curr ) == -1 && ! locs[locs.length-1].overlapsP(curr);
|
|
||||||
//System.out.printf(" pastFinalLocation %s vs. %s => %d => %b%n", locs[locs.length-1], curr, locs[locs.length-1].compareTo( curr ), r);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// printing
|
|
||||||
//
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param curTime (current runtime, in millisecs)
|
|
||||||
* @return true if the maximum interval (in millisecs) has passed since the last printing
|
|
||||||
*/
|
|
||||||
private boolean maxElapsedIntervalForPrinting(final long curTime) {
|
|
||||||
return (curTime - this.lastProgressPrintTime) > MAX_PROGRESS_PRINT_TIME;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Forward request to printProgress
|
|
||||||
*
|
|
||||||
* @param type
|
|
||||||
* @param loc
|
|
||||||
*/
|
|
||||||
public void printProgress(final String type, GenomeLoc loc) {
|
|
||||||
printProgress( false, type, loc );
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Utility routine that prints out process information (including timing) every N records or
|
|
||||||
* every M seconds, for N and M set in global variables.
|
|
||||||
*
|
|
||||||
* @param mustPrint If true, will print out info, regardless of nRecords or time interval
|
|
||||||
* @param type String to print out describing our atomic traversal type ("read", "locus", etc)
|
|
||||||
* @param loc Current location
|
|
||||||
*/
|
|
||||||
public void printProgress( boolean mustPrint, final String type, GenomeLoc loc ) {
|
|
||||||
final long nRecords = this.nRecords;
|
|
||||||
final long curTime = System.currentTimeMillis();
|
|
||||||
final double elapsed = (curTime - startTime) / 1000.0;
|
|
||||||
//System.out.printf("Cur = %d, last print = %d%n", curTime, lastProgressPrintTime);
|
|
||||||
|
|
||||||
if ( mustPrint || nRecords % N_RECORDS_TO_PRINT == 0 || maxElapsedIntervalForPrinting(curTime)) {
|
|
||||||
this.lastProgressPrintTime = curTime;
|
|
||||||
final double secsPer1MReads = (elapsed * 1000000.0) / nRecords;
|
|
||||||
if ( loc != null )
|
|
||||||
System.out.printf("[PROGRESS] Traversed to %s, processing %,d %s in %.2f secs (%.2f secs per 1M %s)%n", loc, nRecords, type, elapsed, secsPer1MReads, type);
|
|
||||||
else
|
|
||||||
System.out.printf("[PROGRESS] Traversed %,d %s in %.2f secs (%.2f secs per 1M %s)%n", nRecords, type, elapsed, secsPer1MReads, type);
|
|
||||||
|
|
||||||
// Currently samReadingTracker will print misleading info if we're not processing the whole file
|
|
||||||
if ( this.locs == null )
|
|
||||||
System.out.printf("[PROGRESS] -> %s%n", samReadingTracker.progressMeter());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Called after a traversal to print out information about the traversal process
|
|
||||||
*
|
|
||||||
* @param type String describing this type of traversal ("loci", "read")
|
|
||||||
* @param sum The reduce result of the traversal
|
|
||||||
* @param <T> ReduceType of the traversal
|
|
||||||
*/
|
|
||||||
protected <T> void printOnTraversalDone( final String type, T sum ) {
|
|
||||||
printProgress( true, type, null );
|
|
||||||
System.out.println("Traversal reduce result is " + sum);
|
|
||||||
System.out.printf("Traversal skipped %d reads out of %d total (%.2f%%)%n", nSkippedReads, nReads, (nSkippedReads * 100.0) / nReads);
|
|
||||||
System.out.printf(" -> %d unmapped reads%n", nUnmappedReads );
|
|
||||||
System.out.printf(" -> %d non-primary reads%n", nNotPrimary );
|
|
||||||
System.out.printf(" -> %d reads with bad alignments%n", nBadAlignments );
|
|
||||||
System.out.printf(" -> %d reads with indels%n", nSkippedIndels );
|
|
||||||
}
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Initialization
|
|
||||||
//
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize the traversal engine. After this point traversals can be run over the data
|
|
||||||
*
|
|
||||||
* @return true on success
|
|
||||||
*/
|
|
||||||
public boolean initialize() {
|
|
||||||
lastProgressPrintTime = startTime = System.currentTimeMillis();
|
lastProgressPrintTime = startTime = System.currentTimeMillis();
|
||||||
loadReference();
|
loadReference();
|
||||||
//testReference();
|
//testReference();
|
||||||
|
|
@ -280,14 +86,105 @@ public class TraversalEngine {
|
||||||
throw new RuntimeIOException(e);
|
throw new RuntimeIOException(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRegion(final String reg) { regionStr = regionStr; }
|
||||||
|
public void setTraversalType(final String type) { traversalType = type; }
|
||||||
|
public void setStrictness( final ValidationStringency s ) { strictness = s; }
|
||||||
|
public void setMaxReads( final int maxReads ) { this.maxReads = maxReads; }
|
||||||
|
public void setDebugging( final boolean d ) { DEBUGGING = d; }
|
||||||
|
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// functions for dealing locations (areas of the genome we're traversing over)
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
public void setLocation( final String locStr ) {
|
||||||
|
this.locs = parseGenomeLocs(locStr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static GenomeLoc[] parseGenomeLocs( final String str ) {
|
||||||
|
// Of the form: loc1;loc2;...
|
||||||
|
// Where each locN can be:
|
||||||
|
// Ôchr2Õ, Ôchr2:1000000Õ or Ôchr2:1,000,000-2,000,000Õ
|
||||||
|
StdReflect reflect = new JdkStdReflect();
|
||||||
|
FunctionN<GenomeLoc> parseOne = reflect.staticFunction(GenomeLoc.class, "parseGenomeLoc", String.class);
|
||||||
|
Function1<GenomeLoc, String> f1 = parseOne.f1();
|
||||||
|
Collection<GenomeLoc> result = Functions.map(f1, Arrays.asList(str.split(";")));
|
||||||
|
GenomeLoc[] locs = (GenomeLoc[])result.toArray(new GenomeLoc[0]);
|
||||||
|
|
||||||
|
Arrays.sort(locs);
|
||||||
|
for ( GenomeLoc l : locs )
|
||||||
|
System.out.printf(" -> %s%n", l);
|
||||||
|
|
||||||
|
System.out.printf(" Locations are: %s%n", Utils.join(" ", Functions.map( Operators.toString, Arrays.asList(locs) ) ) );
|
||||||
|
|
||||||
|
return locs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean inLocations( GenomeLoc curr ) {
|
||||||
|
if ( this.locs == null )
|
||||||
|
return true;
|
||||||
|
else {
|
||||||
|
for ( GenomeLoc loc : this.locs ) {
|
||||||
|
//System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr));
|
||||||
|
if ( loc.overlapsP(curr) )
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean pastFinalLocation( GenomeLoc curr ) {
|
||||||
|
boolean r = locs != null && locs[locs.length-1].compareTo( curr ) == -1 && ! locs[locs.length-1].overlapsP(curr);
|
||||||
|
//System.out.printf(" pastFinalLocation %s vs. %s => %d => %b%n", locs[locs.length-1], curr, locs[locs.length-1].compareTo( curr ), r);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// functions for dealing with the reference sequence
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
/**
|
/**
|
||||||
* Prepare the reference for stream processing
|
|
||||||
*
|
*
|
||||||
|
* @param curTime (current runtime, in millisecs)
|
||||||
|
* @return true if the maximum interval (in millisecs) has passed since the last printing
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
private boolean maxElapsedIntervalForPrinting(final long curTime) {
|
||||||
|
return (curTime - this.lastProgressPrintTime) > MAX_PROGRESS_PRINT_TIME;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void printProgress(final String type, GenomeLoc loc) { printProgress( false, type, loc ); }
|
||||||
|
|
||||||
|
public void printProgress( boolean mustPrint, final String type, GenomeLoc loc ) {
|
||||||
|
final long nRecords = this.nRecords;
|
||||||
|
final long curTime = System.currentTimeMillis();
|
||||||
|
final double elapsed = (curTime - startTime) / 1000.0;
|
||||||
|
//System.out.printf("Cur = %d, last print = %d%n", curTime, lastProgressPrintTime);
|
||||||
|
|
||||||
|
if ( mustPrint || nRecords % 100000 == 0 || maxElapsedIntervalForPrinting(curTime)) {
|
||||||
|
this.lastProgressPrintTime = curTime;
|
||||||
|
final double secsPer1MReads = (elapsed * 1000000.0) / nRecords;
|
||||||
|
if ( loc != null )
|
||||||
|
System.out.printf("[PROGRESS] Traversed to %s, processing %d %s %.2f secs (%.2f secs per 1M %s)%n", loc, nRecords, type, elapsed, secsPer1MReads, type);
|
||||||
|
else
|
||||||
|
System.out.printf("[PROGRESS] Traversed %d %s %.2f secs (%.2f secs per 1M %s)%n", nRecords, type, elapsed, secsPer1MReads, type);
|
||||||
|
|
||||||
|
if ( this.locs == null )
|
||||||
|
System.out.printf("[PROGRESS] -> %s%n", samReadingTracker.progressMeter());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// functions for dealing with the reference sequence
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
protected void loadReference() {
|
protected void loadReference() {
|
||||||
if ( refFileName!= null ) {
|
if ( refFileName!= null ) {
|
||||||
this.refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFileName);
|
this.refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFileName);
|
||||||
|
|
@ -296,11 +193,29 @@ public class TraversalEngine {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
protected void testReference() {
|
||||||
* Prepare the list of reference ordered data iterators for each of the rods
|
String line = "";
|
||||||
*
|
refIter.seekForward("chr20", 79);
|
||||||
* @return A list of ROD iterators for getting data from each ROD
|
for ( int i = 0; i < this.maxReads && refIter.hasNext(); i++ ) {
|
||||||
*/
|
final ReferenceIterator refSite = refIter.next();
|
||||||
|
final char refBase = refSite.getBaseAsChar();
|
||||||
|
line += refBase;
|
||||||
|
if ( (i + 1) % 80 == 0 ) {
|
||||||
|
System.out.println(line);
|
||||||
|
line = "";
|
||||||
|
}
|
||||||
|
//System.out.printf(" Reference: %s:%d %c%n", refSite.getCurrentContig().getName(), refSite.getPosition(), refBase);
|
||||||
|
}
|
||||||
|
System.out.println(line);
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// dealing with reference ordered data
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
protected List<ReferenceOrderedData.RODIterator> initializeRODs() {
|
protected List<ReferenceOrderedData.RODIterator> initializeRODs() {
|
||||||
// set up reference ordered data
|
// set up reference ordered data
|
||||||
List<ReferenceOrderedData.RODIterator> rodIters = new ArrayList<ReferenceOrderedData.RODIterator>();
|
List<ReferenceOrderedData.RODIterator> rodIters = new ArrayList<ReferenceOrderedData.RODIterator>();
|
||||||
|
|
@ -310,38 +225,6 @@ public class TraversalEngine {
|
||||||
return rodIters;
|
return rodIters;
|
||||||
}
|
}
|
||||||
|
|
||||||
// protected void testReference() {
|
|
||||||
// String line = "";
|
|
||||||
// refIter.seekForward("chr20", 79);
|
|
||||||
// for ( int i = 0; i < this.maxReads && refIter.hasNext(); i++ ) {
|
|
||||||
// final ReferenceIterator refSite = refIter.next();
|
|
||||||
// final char refBase = refSite.getBaseAsChar();
|
|
||||||
// line += refBase;
|
|
||||||
// if ( (i + 1) % 80 == 0 ) {
|
|
||||||
// System.out.println(line);
|
|
||||||
// line = "";
|
|
||||||
// }
|
|
||||||
// //System.out.printf(" Reference: %s:%d %c%n", refSite.getCurrentContig().getName(), refSite.getPosition(), refBase);
|
|
||||||
// }
|
|
||||||
// System.out.println(line);
|
|
||||||
// System.exit(1);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// dealing with reference ordered data
|
|
||||||
//
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Builds a list of the reference ordered datum at loc from each of the iterators. This function
|
|
||||||
* assumes you are accessing the data in order. You can't use this function for random access. Each
|
|
||||||
* successive call moves you along the file, consuming all data before loc.
|
|
||||||
*
|
|
||||||
* @param rodIters Iterators to access the RODs
|
|
||||||
* @param loc The location to get the rods at
|
|
||||||
* @return A list of ReferenceOrderDatum at loc. ROD without a datum at loc will be null in the list
|
|
||||||
*/
|
|
||||||
protected List<ReferenceOrderedDatum> getReferenceOrderedDataAtLocus(List<ReferenceOrderedData.RODIterator> rodIters,
|
protected List<ReferenceOrderedDatum> getReferenceOrderedDataAtLocus(List<ReferenceOrderedData.RODIterator> rodIters,
|
||||||
final GenomeLoc loc) {
|
final GenomeLoc loc) {
|
||||||
List<ReferenceOrderedDatum> data = new ArrayList<ReferenceOrderedDatum>();
|
List<ReferenceOrderedDatum> data = new ArrayList<ReferenceOrderedDatum>();
|
||||||
|
|
@ -357,13 +240,6 @@ public class TraversalEngine {
|
||||||
// traversal by loci functions
|
// traversal by loci functions
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
/**
|
|
||||||
* Class to filter out un-handle-able reads from the stream. We currently are skipping
|
|
||||||
* unmapped reads, non-primary reads, unaligned reads, and those with indels. We should
|
|
||||||
* really change this to handle indel containing reads.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
class locusStreamFilterFunc implements SamRecordFilter {
|
class locusStreamFilterFunc implements SamRecordFilter {
|
||||||
public boolean filterOut(SAMRecord rec) {
|
public boolean filterOut(SAMRecord rec) {
|
||||||
boolean result = false;
|
boolean result = false;
|
||||||
|
|
@ -404,30 +280,15 @@ public class TraversalEngine {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Traverse by loci -- the key driver of linearly ordered traversal of loci. Provides reads, RODs, and
|
|
||||||
* the reference base for each locus in the reference to the LocusWalker walker. Supports all of the
|
|
||||||
* interaction contract implied by the locus walker
|
|
||||||
*
|
|
||||||
* @param walker A locus walker object
|
|
||||||
* @param <M> MapType -- the result of calling map() on walker
|
|
||||||
* @param <T> ReduceType -- the result of calling reduce() on the walker
|
|
||||||
* @return 0 on success
|
|
||||||
*/
|
|
||||||
protected <M,T> int traverseByLoci(LocusWalker<M,T> walker) {
|
protected <M,T> int traverseByLoci(LocusWalker<M,T> walker) {
|
||||||
// prepare the read filtering read iterator and provide it to a new locus iterator
|
walker.initialize();
|
||||||
FilteringIterator filterIter = new FilteringIterator(samReadIter, new locusStreamFilterFunc());
|
FilteringIterator filterIter = new FilteringIterator(samReadIter, new locusStreamFilterFunc());
|
||||||
CloseableIterator<LocusIterator> iter = new LocusIterator(filterIter);
|
CloseableIterator<LocusIterator> iter = new LocusIterator(filterIter);
|
||||||
|
|
||||||
// Initial the reference ordered data iterators
|
|
||||||
List<ReferenceOrderedData.RODIterator> rodIters = initializeRODs();
|
List<ReferenceOrderedData.RODIterator> rodIters = initializeRODs();
|
||||||
|
|
||||||
// initialize the walker object
|
|
||||||
walker.initialize();
|
|
||||||
// Initialize the T sum using the walker
|
|
||||||
T sum = walker.reduceInit();
|
T sum = walker.reduceInit();
|
||||||
boolean done = false;
|
boolean done = false;
|
||||||
|
|
||||||
while ( iter.hasNext() && ! done ) {
|
while ( iter.hasNext() && ! done ) {
|
||||||
this.nRecords++;
|
this.nRecords++;
|
||||||
|
|
||||||
|
|
@ -436,20 +297,13 @@ public class TraversalEngine {
|
||||||
|
|
||||||
// Poor man's version of index LOL
|
// Poor man's version of index LOL
|
||||||
if ( inLocations(locus.getLocation()) ) {
|
if ( inLocations(locus.getLocation()) ) {
|
||||||
|
final ReferenceIterator refSite = refIter.seekForward(locus.getContig(), locus.getPosition());
|
||||||
// Jump forward in the reference to this locus location
|
|
||||||
final ReferenceIterator refSite = refIter.seekForward(locus.getLocation());
|
|
||||||
final char refBase = refSite.getBaseAsChar();
|
final char refBase = refSite.getBaseAsChar();
|
||||||
|
|
||||||
// Iterate forward to get all reference ordered data covering this locus
|
|
||||||
final List<ReferenceOrderedDatum> rodData = getReferenceOrderedDataAtLocus(rodIters, locus.getLocation());
|
final List<ReferenceOrderedDatum> rodData = getReferenceOrderedDataAtLocus(rodIters, locus.getLocation());
|
||||||
|
|
||||||
if ( DEBUGGING )
|
if ( DEBUGGING )
|
||||||
System.out.printf(" Reference: %s:%d %c%n", refSite.getCurrentContig().getName(), refSite.getPosition(), refBase);
|
System.out.printf(" Reference: %s:%d %c%n", refSite.getCurrentContig().getName(), refSite.getPosition(), refBase);
|
||||||
|
|
||||||
//
|
|
||||||
// Execute our contract with the walker. Call filter, map, and reduce
|
|
||||||
//
|
|
||||||
final boolean keepMeP = walker.filter(rodData, refBase, locus);
|
final boolean keepMeP = walker.filter(rodData, refBase, locus);
|
||||||
if ( keepMeP ) {
|
if ( keepMeP ) {
|
||||||
M x = walker.map(rodData, refBase, locus);
|
M x = walker.map(rodData, refBase, locus);
|
||||||
|
|
@ -469,43 +323,37 @@ public class TraversalEngine {
|
||||||
done = true;
|
done = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
printOnTraversalDone("loci", sum);
|
printProgress( true, "loci", null );
|
||||||
|
System.out.println("Traversal reduce result is " + sum);
|
||||||
|
System.out.printf("Traversal skipped %d reads out of %d total (%.2f%%)%n", nSkippedReads, nReads, (nSkippedReads * 100.0) / nReads);
|
||||||
|
System.out.printf(" -> %d unmapped reads%n", nUnmappedReads );
|
||||||
|
System.out.printf(" -> %d non-primary reads%n", nNotPrimary );
|
||||||
|
System.out.printf(" -> %d reads with bad alignments%n", nBadAlignments );
|
||||||
|
System.out.printf(" -> %d reads with indels%n", nSkippedIndels );
|
||||||
walker.onTraveralDone();
|
walker.onTraveralDone();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
* Traverse by read -- the key driver of linearly ordered traversal of reads. Provides a single read to
|
//
|
||||||
* the walker object, in coordinate order. Supports all of the
|
// traversal by read functions
|
||||||
* interaction contract implied by the read walker
|
//
|
||||||
*
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
* @param walker A read walker object
|
|
||||||
* @param <M> MapType -- the result of calling map() on walker
|
|
||||||
* @param <T> ReduceType -- the result of calling reduce() on the walker
|
|
||||||
* @return 0 on success
|
|
||||||
*/
|
|
||||||
protected <M,R> int traverseByRead(ReadWalker<M,R> walker) {
|
protected <M,R> int traverseByRead(ReadWalker<M,R> walker) {
|
||||||
|
|
||||||
// Initialize the walker
|
|
||||||
walker.initialize();
|
walker.initialize();
|
||||||
|
|
||||||
// Initialize the sum
|
|
||||||
R sum = walker.reduceInit();
|
R sum = walker.reduceInit();
|
||||||
|
|
||||||
boolean done = false;
|
boolean done = false;
|
||||||
while ( samReadIter.hasNext() && ! done ) {
|
while ( samReadIter.hasNext() && ! done ) {
|
||||||
this.nRecords++;
|
this.nRecords++;
|
||||||
|
|
||||||
// get the next read
|
// actually get the read and hand it to the walker
|
||||||
final SAMRecord read = samReadIter.next();
|
final SAMRecord read = samReadIter.next();
|
||||||
GenomeLoc loc = new GenomeLoc(read.getReferenceName(), read.getAlignmentStart());
|
GenomeLoc loc = new GenomeLoc(read.getReferenceName(), read.getAlignmentStart());
|
||||||
|
|
||||||
if ( inLocations(loc) ) {
|
if ( inLocations(loc) ) {
|
||||||
|
|
||||||
//
|
|
||||||
// execute the walker contact
|
|
||||||
//
|
|
||||||
final boolean keepMeP = walker.filter(null, read);
|
final boolean keepMeP = walker.filter(null, read);
|
||||||
|
|
||||||
if ( keepMeP ) {
|
if ( keepMeP ) {
|
||||||
M x = walker.map(null, read);
|
M x = walker.map(null, read);
|
||||||
sum = walker.reduce(x, sum);
|
sum = walker.reduce(x, sum);
|
||||||
|
|
@ -513,17 +361,19 @@ public class TraversalEngine {
|
||||||
|
|
||||||
if ( this.maxReads > 0 && this.nRecords > this.maxReads ) {
|
if ( this.maxReads > 0 && this.nRecords > this.maxReads ) {
|
||||||
System.out.println("Maximum number of reads encountered, terminating traversal " + this.nRecords);
|
System.out.println("Maximum number of reads encountered, terminating traversal " + this.nRecords);
|
||||||
done = true;
|
break;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
printProgress("reads", loc);
|
|
||||||
|
|
||||||
|
printProgress("reads", loc);
|
||||||
|
}
|
||||||
|
|
||||||
if ( pastFinalLocation(loc) )
|
if ( pastFinalLocation(loc) )
|
||||||
done = true;
|
done = true;
|
||||||
//System.out.printf("Done? %b%n", done);
|
//System.out.printf("Done? %b%n", done);
|
||||||
}
|
}
|
||||||
|
|
||||||
printOnTraversalDone("reads", sum);
|
printProgress( true, "reads", null );
|
||||||
|
System.out.println("Traversal reduce result is " + sum);
|
||||||
walker.onTraveralDone();
|
walker.onTraveralDone();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
package edu.mit.broad.sting.atk.modules;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import edu.mit.broad.sting.atk.ReadWalker;
|
import org.broadinstitute.sting.atk.ReadWalker;
|
||||||
import edu.mit.broad.sting.atk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -27,7 +27,7 @@ public class BaseQualityHistoWalker implements ReadWalker<Integer, Integer> {
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map over the edu.mit.broad.sting.atk.LocusContext
|
// Map over the org.broadinstitute.sting.atk.LocusContext
|
||||||
public Integer map(LocusContext context, SAMRecord read) {
|
public Integer map(LocusContext context, SAMRecord read) {
|
||||||
for ( byte qual : read.getBaseQualities() ) {
|
for ( byte qual : read.getBaseQualities() ) {
|
||||||
//System.out.println(qual);
|
//System.out.println(qual);
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
package edu.mit.broad.sting.atk.modules;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import edu.mit.broad.sting.atk.LocusWalker;
|
import org.broadinstitute.sting.atk.LocusWalker;
|
||||||
import edu.mit.broad.sting.atk.LocusIterator;
|
import org.broadinstitute.sting.atk.LocusIterator;
|
||||||
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
package edu.mit.broad.sting.atk.modules;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import edu.mit.broad.sting.atk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
import edu.mit.broad.sting.atk.ReadWalker;
|
import org.broadinstitute.sting.atk.ReadWalker;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package edu.mit.broad.sting.atk.modules;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import edu.mit.broad.sting.atk.LocusIterator;
|
import org.broadinstitute.sting.atk.LocusIterator;
|
||||||
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -22,4 +22,4 @@ public class CountLociWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
return value + sum;
|
return value + sum;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package edu.mit.broad.sting.atk.modules;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import edu.mit.broad.sting.atk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
|
|
||||||
public class CountReadsWalker extends BasicReadWalker<Integer, Integer> {
|
public class CountReadsWalker extends BasicReadWalker<Integer, Integer> {
|
||||||
public Integer map(LocusContext context, SAMRecord read) {
|
public Integer map(LocusContext context, SAMRecord read) {
|
||||||
|
|
@ -1,10 +1,10 @@
|
||||||
package edu.mit.broad.sting.atk.modules;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import edu.mit.broad.sting.atk.LocusWalker;
|
import org.broadinstitute.sting.atk.LocusWalker;
|
||||||
import edu.mit.broad.sting.atk.LocusIterator;
|
import org.broadinstitute.sting.atk.LocusIterator;
|
||||||
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.utils.ReferenceOrderedDatum;
|
||||||
import edu.mit.broad.sting.utils.rodDbSNP;
|
import org.broadinstitute.sting.utils.rodDbSNP;
|
||||||
import edu.mit.broad.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -27,7 +27,7 @@ public class PileupWalker implements LocusWalker<Integer, Integer> {
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map over the edu.mit.broad.sting.atk.LocusContext
|
// Map over the org.broadinstitute.sting.atk.LocusContext
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
||||||
//System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size());
|
//System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size());
|
||||||
//for ( SAMRecord read : context.getReads() ) {
|
//for ( SAMRecord read : context.getReads() ) {
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package edu.mit.broad.sting.atk.modules;
|
package org.broadinstitute.sting.atk.modules;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import edu.mit.broad.sting.atk.LocusContext;
|
import org.broadinstitute.sting.atk.LocusContext;
|
||||||
|
|
||||||
public class PrintReadsWalker extends BasicReadWalker<Integer, Integer> {
|
public class PrintReadsWalker extends BasicReadWalker<Integer, Integer> {
|
||||||
public Integer map(LocusContext context, SAMRecord read) {
|
public Integer map(LocusContext context, SAMRecord read) {
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package edu.mit.broad.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package edu.mit.broad.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package edu.mit.broad.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package edu.mit.broad.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||||
*/
|
*/
|
||||||
package edu.mit.broad.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package edu.mit.broad.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package edu.mit.broad.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package edu.mit.broad.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package edu.mit.broad.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
|
@ -108,93 +108,4 @@ public class Utils {
|
||||||
|
|
||||||
GenomeLoc.setContigOrdering(refContigOrdering);
|
GenomeLoc.setContigOrdering(refContigOrdering);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Java Generics can't do primitive types, so I had to do this the simplistic way
|
|
||||||
|
|
||||||
public static Integer[] SortPermutation(final int[] A)
|
|
||||||
{
|
|
||||||
class comparator implements Comparator
|
|
||||||
{
|
|
||||||
public int compare(Object a, Object b)
|
|
||||||
{
|
|
||||||
if (A[(Integer)a] < A[(Integer)b]) { return -1; }
|
|
||||||
if (A[(Integer)a] == A[(Integer)b]) { return 0; }
|
|
||||||
if (A[(Integer)a] > A[(Integer)b]) { return 1; }
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Integer[] permutation = new Integer[A.length];
|
|
||||||
for (int i = 0; i < A.length; i++)
|
|
||||||
{
|
|
||||||
permutation[i] = i;
|
|
||||||
}
|
|
||||||
Arrays.sort(permutation, new comparator());
|
|
||||||
return permutation;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Integer[] SortPermutation(final double[] A)
|
|
||||||
{
|
|
||||||
class comparator implements Comparator
|
|
||||||
{
|
|
||||||
public int compare(Object a, Object b)
|
|
||||||
{
|
|
||||||
if (A[(Integer)a] < A[(Integer)b]) { return -1; }
|
|
||||||
if (A[(Integer)a] == A[(Integer)b]) { return 0; }
|
|
||||||
if (A[(Integer)a] > A[(Integer)b]) { return 1; }
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Integer[] permutation = new Integer[A.length];
|
|
||||||
for (int i = 0; i < A.length; i++)
|
|
||||||
{
|
|
||||||
permutation[i] = i;
|
|
||||||
}
|
|
||||||
Arrays.sort(permutation, new comparator());
|
|
||||||
return permutation;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int[] PermuteArray(int[] array, Integer[] permutation)
|
|
||||||
{
|
|
||||||
int[] output = new int[array.length];
|
|
||||||
for (int i = 0; i < output.length; i++)
|
|
||||||
{
|
|
||||||
output[i] = array[permutation[i]];
|
|
||||||
}
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static double[] PermuteArray(double[] array, Integer[] permutation)
|
|
||||||
{
|
|
||||||
double[] output = new double[array.length];
|
|
||||||
for (int i = 0; i < output.length; i++)
|
|
||||||
{
|
|
||||||
output[i] = array[permutation[i]];
|
|
||||||
}
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Object[] PermuteArray(Object[] array, Integer[] permutation)
|
|
||||||
{
|
|
||||||
Object[] output = new Object[array.length];
|
|
||||||
for (int i = 0; i < output.length; i++)
|
|
||||||
{
|
|
||||||
output[i] = array[permutation[i]];
|
|
||||||
}
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String[] PermuteArray(String[] array, Integer[] permutation)
|
|
||||||
{
|
|
||||||
String[] output = new String[array.length];
|
|
||||||
for (int i = 0; i < output.length; i++)
|
|
||||||
{
|
|
||||||
output[i] = array[permutation[i]];
|
|
||||||
}
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||||
*/
|
*/
|
||||||
package edu.mit.broad.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import edu.mit.broad.picard.util.BasicTextFileParser;
|
import edu.mit.broad.picard.util.BasicTextFileParser;
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package edu.mit.broad.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package edu.mit.broad.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
java -Xmx40000m -cp ../java/dist/AnalysisTK.jar org.broadinstitute.sting.atk.PrepareROD REF_FILE_ARG=/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta ROD_FILE=/seq/references/dbsnp/downloads/snp129_hg18.txt OUT=`echo $1/snp129_hg18.txt.rod` ROD_TYPE=dbSNP ROD_NAME=dbSNP
|
||||||
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
java -Xmx40000m -cp out/production/AnalysisTK:trunk/java/jars/functionalj.jar edu.mit.broad.sting.atk.PrepareROD REF_FILE_ARG=/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta ROD_FILE=/seq/references/dbsnp/downloads/snp129_hg18.txt OUT=`echo $1/snp129_hg18.txt.rod` ROD_TYPE=dbSNP ROD_NAME=dbSNP
|
|
||||||
|
|
||||||
Loading…
Reference in New Issue