Merge remote-tracking branch 'unstable/master'
This commit is contained in:
commit
d7b3f40259
34
build.xml
34
build.xml
|
|
@ -70,8 +70,6 @@
|
||||||
|
|
||||||
<property environment="env"/>
|
<property environment="env"/>
|
||||||
|
|
||||||
<property name="drmaa.jar" value="${env.SGE_ROOT}/${lib.dir}/drmaa.jar" />
|
|
||||||
|
|
||||||
<patternset id="java.source.pattern">
|
<patternset id="java.source.pattern">
|
||||||
<include name="${java.public.source.dir}/**/*.java" />
|
<include name="${java.public.source.dir}/**/*.java" />
|
||||||
<include name="${java.private.source.dir}/**/*.java" if="include.private" />
|
<include name="${java.private.source.dir}/**/*.java" if="include.private" />
|
||||||
|
|
@ -147,11 +145,7 @@
|
||||||
<property name="init.resolve.done" value="true"/>
|
<property name="init.resolve.done" value="true"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="init.gridengine" depends="init" if="include.gridengine">
|
<target name="resolve" depends="init.resolve,init"
|
||||||
<copy todir="${lib.dir}" file="${drmaa.jar}"/>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="resolve" depends="init.resolve,init,init.gridengine"
|
|
||||||
description="locate and download library dependencies">
|
description="locate and download library dependencies">
|
||||||
<property name="ivy.conf" value="default"/>
|
<property name="ivy.conf" value="default"/>
|
||||||
<ivy:retrieve file="ivy.xml" conf="${ivy.conf}" />
|
<ivy:retrieve file="ivy.xml" conf="${ivy.conf}" />
|
||||||
|
|
@ -179,13 +173,23 @@
|
||||||
<property name="build.version" value="${git.describe.output}" />
|
<property name="build.version" value="${git.describe.output}" />
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="untagged.build.version" depends="git.describe" unless="git.describe.succeeded">
|
<target name="git.rev-parse" depends="git.describe" unless="git.describe.succeeded">
|
||||||
<exec executable="git" outputproperty="build.version" failonerror="true">
|
<exec executable="git" outputproperty="git.rev-parse.output" resultproperty="git.rev-parse.exit.value" failonerror="false">
|
||||||
<arg line="rev-parse HEAD" />
|
<arg line="rev-parse HEAD" />
|
||||||
</exec>
|
</exec>
|
||||||
|
<condition property="git.rev-parse.succeeded">
|
||||||
|
<equals arg1="${git.rev-parse.exit.value}" arg2="0" />
|
||||||
|
</condition>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="generate.build.version" depends="tagged.build.version, untagged.build.version" />
|
<target name="untagged.build.version" depends="git.rev-parse" if="git.rev-parse.succeeded">
|
||||||
|
<property name="build.version" value="${git.rev-parse.output}" />
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="generate.build.version" depends="tagged.build.version, untagged.build.version">
|
||||||
|
<!-- Set build.version to exported if no other value has been set -->
|
||||||
|
<property name="build.version" value="exported" />
|
||||||
|
</target>
|
||||||
|
|
||||||
<!-- define some key locations that might change based on how the build is run -->
|
<!-- define some key locations that might change based on how the build is run -->
|
||||||
<target name="init" depends="generate.build.version">
|
<target name="init" depends="generate.build.version">
|
||||||
|
|
@ -215,12 +219,6 @@
|
||||||
</or>
|
</or>
|
||||||
</condition>
|
</condition>
|
||||||
|
|
||||||
<!-- Include Grid Engine in the compile if SGE_ROOT is available. -->
|
|
||||||
<!-- Based off of http://wikis.sun.com/display/GridEngine/Automating+Grid+Engine+Functions+Through+DRMAA -->
|
|
||||||
<condition property="include.gridengine">
|
|
||||||
<available file="${drmaa.jar}"/>
|
|
||||||
</condition>
|
|
||||||
|
|
||||||
<echo message="GATK build : ${gatk.target}"/>
|
<echo message="GATK build : ${gatk.target}"/>
|
||||||
<echo message="Scala build : ${scala.target}"/>
|
<echo message="Scala build : ${scala.target}"/>
|
||||||
<echo message="source revision : ${build.version}"/>
|
<echo message="source revision : ${build.version}"/>
|
||||||
|
|
@ -371,7 +369,6 @@
|
||||||
<src path="${scala.public.source.dir}" />
|
<src path="${scala.public.source.dir}" />
|
||||||
<src path="${queue-extensions.source.dir}" />
|
<src path="${queue-extensions.source.dir}" />
|
||||||
<include name="**/*.scala"/>
|
<include name="**/*.scala"/>
|
||||||
<exclude name="**/gridengine/**" unless="include.gridengine" />
|
|
||||||
</scalac>
|
</scalac>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
|
@ -388,7 +385,6 @@
|
||||||
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.classes}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
|
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.classes}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
|
||||||
<src path="${scala.private.source.dir}" />
|
<src path="${scala.private.source.dir}" />
|
||||||
<include name="**/*.scala"/>
|
<include name="**/*.scala"/>
|
||||||
<exclude name="**/gridengine/**" unless="include.gridengine" />
|
|
||||||
</scalac>
|
</scalac>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
|
@ -696,7 +692,6 @@
|
||||||
<src path="${scala.public.test.sources}" />
|
<src path="${scala.public.test.sources}" />
|
||||||
<src path="${scala.private.test.sources}" />
|
<src path="${scala.private.test.sources}" />
|
||||||
<include name="**/*.scala"/>
|
<include name="**/*.scala"/>
|
||||||
<exclude name="**/gridengine/**" unless="include.gridengine" />
|
|
||||||
<classpath>
|
<classpath>
|
||||||
<path refid="scala.dependencies"/>
|
<path refid="scala.dependencies"/>
|
||||||
<pathelement location="${scala.test.classes}"/>
|
<pathelement location="${scala.test.classes}"/>
|
||||||
|
|
@ -1046,6 +1041,7 @@
|
||||||
<delete dir="out"/>
|
<delete dir="out"/>
|
||||||
<delete dir="${build.dir}"/>
|
<delete dir="${build.dir}"/>
|
||||||
<delete dir="${lib.dir}"/>
|
<delete dir="${lib.dir}"/>
|
||||||
|
<delete dir="dump"/>
|
||||||
<delete dir="staging"/>
|
<delete dir="staging"/>
|
||||||
<delete dir="${dist.dir}"/>
|
<delete dir="${dist.dir}"/>
|
||||||
<delete dir="pipelinetests"/>
|
<delete dir="pipelinetests"/>
|
||||||
|
|
|
||||||
3
ivy.xml
3
ivy.xml
|
|
@ -48,6 +48,9 @@
|
||||||
<!-- Dependencies for amazon.com S3 support -->
|
<!-- Dependencies for amazon.com S3 support -->
|
||||||
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.0"/>
|
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.0"/>
|
||||||
|
|
||||||
|
<!-- Dependencies for GridEngine -->
|
||||||
|
<dependency org="net.sf.gridscheduler" name="drmaa" rev="latest.integration"/>
|
||||||
|
|
||||||
<!-- Scala dependancies -->
|
<!-- Scala dependancies -->
|
||||||
<dependency org="org.scala-lang" name="scala-compiler" rev="2.8.1"/>
|
<dependency org="org.scala-lang" name="scala-compiler" rev="2.8.1"/>
|
||||||
<dependency org="org.scala-lang" name="scala-library" rev="2.8.1"/>
|
<dependency org="org.scala-lang" name="scala-library" rev="2.8.1"/>
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,70 @@
|
||||||
|
#include "MainTools.h"
|
||||||
|
#include "Basevector.h"
|
||||||
|
#include "lookup/LookAlign.h"
|
||||||
|
#include "lookup/SerialQltout.h"
|
||||||
|
|
||||||
|
unsigned int MatchingEnd(look_align &la, vecbasevector &candidates, vecbasevector &ref) {
|
||||||
|
//la.PrintParseable(cout);
|
||||||
|
|
||||||
|
for (int i = 0; i < candidates.size(); i++) {
|
||||||
|
look_align newla = la;
|
||||||
|
|
||||||
|
if (newla.rc1) { candidates[i].ReverseComplement(); }
|
||||||
|
newla.ResetFromAlign(newla.a, candidates[i], ref[la.target_id]);
|
||||||
|
|
||||||
|
//newla.PrintParseable(cout, &candidates[i], &ref[newla.target_id]);
|
||||||
|
//cout << newla.Errors() << " " << la.Errors() << endl;
|
||||||
|
|
||||||
|
if (newla.Errors() == la.Errors()) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//FatalErr("Query id " + ToString(la.query_id) + " had no matches.");
|
||||||
|
|
||||||
|
return candidates.size() + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
RunTime();
|
||||||
|
|
||||||
|
BeginCommandArguments;
|
||||||
|
CommandArgument_String(ALIGNS);
|
||||||
|
CommandArgument_String(FASTB_END_1);
|
||||||
|
CommandArgument_String(FASTB_END_2);
|
||||||
|
CommandArgument_String(REFERENCE);
|
||||||
|
|
||||||
|
CommandArgument_String(ALIGNS_END_1_OUT);
|
||||||
|
CommandArgument_String(ALIGNS_END_2_OUT);
|
||||||
|
EndCommandArguments;
|
||||||
|
|
||||||
|
vecbasevector ref(REFERENCE);
|
||||||
|
vecbasevector reads1(FASTB_END_1);
|
||||||
|
vecbasevector reads2(FASTB_END_2);
|
||||||
|
|
||||||
|
ofstream aligns1stream(ALIGNS_END_1_OUT.c_str());
|
||||||
|
ofstream aligns2stream(ALIGNS_END_2_OUT.c_str());
|
||||||
|
|
||||||
|
basevector bv;
|
||||||
|
|
||||||
|
SerialQltout sqltout(ALIGNS);
|
||||||
|
look_align la;
|
||||||
|
while (sqltout.Next(la)) {
|
||||||
|
vecbasevector candidates(2);
|
||||||
|
candidates[0] = reads1[la.query_id];
|
||||||
|
candidates[1] = reads2[la.query_id];
|
||||||
|
|
||||||
|
unsigned int matchingend = MatchingEnd(la, candidates, ref);
|
||||||
|
if (matchingend < 2) {
|
||||||
|
bv = (matchingend == 0) ? reads1[la.query_id] : reads2[la.query_id];
|
||||||
|
|
||||||
|
//la.PrintParseable(cout, &bv, &ref[la.target_id]);
|
||||||
|
la.PrintParseable(((matchingend == 0) ? aligns1stream : aligns2stream), &bv, &ref[la.target_id]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
aligns1stream.close();
|
||||||
|
aligns2stream.close();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
CXX=g++
|
||||||
|
CXXFLAGS=-g -Wall -O2 -m64 -fPIC
|
||||||
|
|
||||||
|
.cpp.o:
|
||||||
|
$(CXX) -c $(CXXFLAGS) -I$(BWA_HOME) -I$(JAVA_INCLUDE) $< -o $@
|
||||||
|
|
||||||
|
all: init lib
|
||||||
|
|
||||||
|
init:
|
||||||
|
@echo Please make sure the following platforms are set correctly on your machine.
|
||||||
|
@echo BWA_HOME=$(BWA_HOME)
|
||||||
|
@echo JAVA_INCLUDE=$(JAVA_INCLUDE)
|
||||||
|
@echo TARGET_LIB=$(TARGET_LIB)
|
||||||
|
@echo EXTRA_LIBS=$(EXTRA_LIBS)
|
||||||
|
@echo LIBTOOL_COMMAND=$(LIBTOOL_COMMAND)
|
||||||
|
|
||||||
|
lib: org_broadinstitute_sting_alignment_bwa_c_BWACAligner.o bwa_gateway.o
|
||||||
|
$(LIBTOOL_COMMAND) $? -o $(TARGET_LIB) -L$(BWA_HOME) -lbwacore $(EXTRA_LIBS)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm *.o libbwa.*
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
#!/bin/sh
|
||||||
|
export BWA_HOME="/humgen/gsa-scr1/hanna/src/bwa-trunk/bwa"
|
||||||
|
export JAVA_INCLUDE="/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include -I/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include/linux"
|
||||||
|
export TARGET_LIB="libbwa.so"
|
||||||
|
export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread"
|
||||||
|
export LIBTOOL_COMMAND="g++ -shared -Wl,-soname,libbwa.so"
|
||||||
|
make
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
#!/bin/sh
|
||||||
|
export BWA_HOME="/Users/mhanna/src/bwa"
|
||||||
|
export JAVA_INCLUDE="/System/Library/Frameworks/JavaVM.framework/Headers"
|
||||||
|
export TARGET_LIB="libbwa.dylib"
|
||||||
|
export EXTRA_LIBS="-lc -lz -lsupc++"
|
||||||
|
export LIBTOOL_COMMAND="libtool -dynamic"
|
||||||
|
make
|
||||||
|
|
@ -0,0 +1,277 @@
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
#include "bwase.h"
|
||||||
|
#include "bwa_gateway.h"
|
||||||
|
|
||||||
|
BWA::BWA(const char* ann_filename,
|
||||||
|
const char* amb_filename,
|
||||||
|
const char* pac_filename,
|
||||||
|
const char* forward_bwt_filename,
|
||||||
|
const char* forward_sa_filename,
|
||||||
|
const char* reverse_bwt_filename,
|
||||||
|
const char* reverse_sa_filename)
|
||||||
|
{
|
||||||
|
// Load the bns (?) and reference
|
||||||
|
bns = bns_restore_core(ann_filename,amb_filename,pac_filename);
|
||||||
|
reference = new ubyte_t[bns->l_pac/4+1];
|
||||||
|
rewind(bns->fp_pac);
|
||||||
|
fread(reference, 1, bns->l_pac/4+1, bns->fp_pac);
|
||||||
|
fclose(bns->fp_pac);
|
||||||
|
bns->fp_pac = NULL;
|
||||||
|
|
||||||
|
// Load the BWTs (both directions) and suffix arrays (both directions)
|
||||||
|
bwts[0] = bwt_restore_bwt(forward_bwt_filename);
|
||||||
|
bwt_restore_sa(forward_sa_filename, bwts[0]);
|
||||||
|
bwts[1] = bwt_restore_bwt(reverse_bwt_filename);
|
||||||
|
bwt_restore_sa(reverse_sa_filename, bwts[1]);
|
||||||
|
load_default_options();
|
||||||
|
|
||||||
|
// Always reinitialize the random seed whenever a new set of files are loaded.
|
||||||
|
initialize_random_seed();
|
||||||
|
|
||||||
|
// initialize the bwase subsystem
|
||||||
|
bwase_initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
BWA::~BWA() {
|
||||||
|
delete[] reference;
|
||||||
|
bns_destroy(bns);
|
||||||
|
bwt_destroy(bwts[0]);
|
||||||
|
bwt_destroy(bwts[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& paths, unsigned& num_paths, unsigned& best_path_count, unsigned& second_best_path_count)
|
||||||
|
{
|
||||||
|
bwa_seq_t* sequence = create_sequence(bases, read_length);
|
||||||
|
|
||||||
|
// Calculate the suffix array interval for each sequence, storing the result in sequence->aln (and sequence->n_aln).
|
||||||
|
// This method will destroy the contents of seq and rseq.
|
||||||
|
bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options);
|
||||||
|
|
||||||
|
paths = new bwt_aln1_t[sequence->n_aln];
|
||||||
|
memcpy(paths,sequence->aln,sequence->n_aln*sizeof(bwt_aln1_t));
|
||||||
|
num_paths = sequence->n_aln;
|
||||||
|
|
||||||
|
// Call aln2seq to initialize the type of match present.
|
||||||
|
bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
|
||||||
|
best_path_count = sequence->c1;
|
||||||
|
second_best_path_count = sequence->c2;
|
||||||
|
|
||||||
|
bwa_free_read_seq(1,sequence);
|
||||||
|
}
|
||||||
|
|
||||||
|
Alignment* BWA::generate_single_alignment(const char* bases, const unsigned read_length) {
|
||||||
|
bwa_seq_t* sequence = create_sequence(bases,read_length);
|
||||||
|
|
||||||
|
// Calculate paths.
|
||||||
|
bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options);
|
||||||
|
|
||||||
|
// Check for no alignments found and return null.
|
||||||
|
if(sequence->n_aln == 0) {
|
||||||
|
bwa_free_read_seq(1,sequence);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// bwa_cal_sa_reg_gap destroys the bases / read length. Copy them back in.
|
||||||
|
copy_bases_into_sequence(sequence,bases,read_length);
|
||||||
|
|
||||||
|
// Pick best alignment and propagate its information into the sequence.
|
||||||
|
bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
|
||||||
|
|
||||||
|
// Generate the best alignment from the sequence.
|
||||||
|
Alignment* alignment = new Alignment;
|
||||||
|
*alignment = generate_final_alignment_from_sequence(sequence);
|
||||||
|
|
||||||
|
bwa_free_read_seq(1,sequence);
|
||||||
|
|
||||||
|
return alignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BWA::generate_alignments_from_paths(const char* bases,
|
||||||
|
const unsigned read_length,
|
||||||
|
bwt_aln1_t* paths,
|
||||||
|
const unsigned num_paths,
|
||||||
|
const unsigned best_count,
|
||||||
|
const unsigned second_best_count,
|
||||||
|
Alignment*& alignments,
|
||||||
|
unsigned& num_alignments)
|
||||||
|
{
|
||||||
|
bwa_seq_t* sequence = create_sequence(bases,read_length);
|
||||||
|
|
||||||
|
sequence->aln = paths;
|
||||||
|
sequence->n_aln = num_paths;
|
||||||
|
|
||||||
|
// (Ab)use bwa_aln2seq to propagate values stored in the path out into the sequence itself.
|
||||||
|
bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
|
||||||
|
|
||||||
|
// But overwrite key parts of the sequence in case the user passed back only a smaller subset
|
||||||
|
// of the paths.
|
||||||
|
sequence->c1 = best_count;
|
||||||
|
sequence->c2 = second_best_count;
|
||||||
|
sequence->type = sequence->c1 > 1 ? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE;
|
||||||
|
|
||||||
|
num_alignments = 0;
|
||||||
|
for(unsigned i = 0; i < (unsigned)sequence->n_aln; i++)
|
||||||
|
num_alignments += (sequence->aln + i)->l - (sequence->aln + i)->k + 1;
|
||||||
|
|
||||||
|
alignments = new Alignment[num_alignments];
|
||||||
|
unsigned alignment_idx = 0;
|
||||||
|
|
||||||
|
for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
|
||||||
|
// Stub in a 'working' path, so that only the desired alignment is local-aligned.
|
||||||
|
const bwt_aln1_t* path = paths + path_idx;
|
||||||
|
bwt_aln1_t working_path = *path;
|
||||||
|
|
||||||
|
// Loop through all alignments, aligning each one individually.
|
||||||
|
for(unsigned sa_idx = path->k; sa_idx <= path->l; sa_idx++) {
|
||||||
|
working_path.k = working_path.l = sa_idx;
|
||||||
|
sequence->aln = &working_path;
|
||||||
|
sequence->n_aln = 1;
|
||||||
|
|
||||||
|
sequence->sa = sa_idx;
|
||||||
|
sequence->strand = path->a;
|
||||||
|
sequence->score = path->score;
|
||||||
|
|
||||||
|
// Each time through bwa_refine_gapped, seq gets reversed. Revert the reverse.
|
||||||
|
// TODO: Fix the interface to bwa_refine_gapped so its easier to work with.
|
||||||
|
if(alignment_idx > 0)
|
||||||
|
seq_reverse(sequence->len, sequence->seq, 0);
|
||||||
|
|
||||||
|
// Copy the local alignment data into the alignment object.
|
||||||
|
*(alignments + alignment_idx) = generate_final_alignment_from_sequence(sequence);
|
||||||
|
|
||||||
|
alignment_idx++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sequence->aln = NULL;
|
||||||
|
sequence->n_aln = 0;
|
||||||
|
|
||||||
|
bwa_free_read_seq(1,sequence);
|
||||||
|
}
|
||||||
|
|
||||||
|
Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) {
|
||||||
|
// Calculate the local coordinate and local alignment.
|
||||||
|
bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr);
|
||||||
|
bwa_refine_gapped(bns, 1, sequence, reference, NULL);
|
||||||
|
|
||||||
|
// Copy the local alignment data into the alignment object.
|
||||||
|
Alignment alignment;
|
||||||
|
|
||||||
|
// Populate basic path info
|
||||||
|
alignment.edit_distance = sequence->nm;
|
||||||
|
alignment.num_mismatches = sequence->n_mm;
|
||||||
|
alignment.num_gap_opens = sequence->n_gapo;
|
||||||
|
alignment.num_gap_extensions = sequence->n_gape;
|
||||||
|
alignment.num_best = sequence->c1;
|
||||||
|
alignment.num_second_best = sequence->c2;
|
||||||
|
|
||||||
|
// Final alignment position.
|
||||||
|
alignment.type = sequence->type;
|
||||||
|
bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig);
|
||||||
|
alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1;
|
||||||
|
alignment.negative_strand = sequence->strand;
|
||||||
|
alignment.mapping_quality = sequence->mapQ;
|
||||||
|
|
||||||
|
// Cigar step.
|
||||||
|
alignment.cigar = NULL;
|
||||||
|
if(sequence->cigar) {
|
||||||
|
alignment.cigar = new uint16_t[sequence->n_cigar];
|
||||||
|
memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t));
|
||||||
|
}
|
||||||
|
alignment.n_cigar = sequence->n_cigar;
|
||||||
|
|
||||||
|
// MD tag with a better breakdown of differences in the cigar
|
||||||
|
alignment.md = strdup(sequence->md);
|
||||||
|
delete[] sequence->md;
|
||||||
|
sequence->md = NULL;
|
||||||
|
|
||||||
|
return alignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BWA::load_default_options()
|
||||||
|
{
|
||||||
|
options.s_mm = 3;
|
||||||
|
options.s_gapo = 11;
|
||||||
|
options.s_gape = 4;
|
||||||
|
options.mode = 3;
|
||||||
|
options.indel_end_skip = 5;
|
||||||
|
options.max_del_occ = 10;
|
||||||
|
options.max_entries = 2000000;
|
||||||
|
options.fnr = 0.04;
|
||||||
|
options.max_diff = -1;
|
||||||
|
options.max_gapo = 1;
|
||||||
|
options.max_gape = 6;
|
||||||
|
options.max_seed_diff = 2;
|
||||||
|
options.seed_len = 2147483647;
|
||||||
|
options.n_threads = 1;
|
||||||
|
options.max_top2 = 30;
|
||||||
|
options.trim_qual = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BWA::initialize_random_seed()
|
||||||
|
{
|
||||||
|
srand48(bns->seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BWA::set_max_edit_distance(float edit_distance) {
|
||||||
|
if(edit_distance > 0 && edit_distance < 1) {
|
||||||
|
options.fnr = edit_distance;
|
||||||
|
options.max_diff = -1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
options.fnr = -1.0;
|
||||||
|
options.max_diff = (int)edit_distance;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void BWA::set_max_gap_opens(int max_gap_opens) { options.max_gapo = max_gap_opens; }
|
||||||
|
void BWA::set_max_gap_extensions(int max_gap_extensions) { options.max_gape = max_gap_extensions; }
|
||||||
|
void BWA::set_disallow_indel_within_range(int indel_range) { options.indel_end_skip = indel_range; }
|
||||||
|
void BWA::set_mismatch_penalty(int penalty) { options.s_mm = penalty; }
|
||||||
|
void BWA::set_gap_open_penalty(int penalty) { options.s_gapo = penalty; }
|
||||||
|
void BWA::set_gap_extension_penalty(int penalty) { options.s_gape = penalty; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a sequence with a set of reasonable initial defaults.
|
||||||
|
* Will leave seq and rseq empty.
|
||||||
|
*/
|
||||||
|
bwa_seq_t* BWA::create_sequence(const char* bases, const unsigned read_length)
|
||||||
|
{
|
||||||
|
bwa_seq_t* sequence = new bwa_seq_t;
|
||||||
|
|
||||||
|
sequence->tid = -1;
|
||||||
|
|
||||||
|
sequence->name = 0;
|
||||||
|
|
||||||
|
copy_bases_into_sequence(sequence, bases, read_length);
|
||||||
|
|
||||||
|
sequence->qual = 0;
|
||||||
|
sequence->aln = 0;
|
||||||
|
sequence->md = 0;
|
||||||
|
|
||||||
|
sequence->cigar = NULL;
|
||||||
|
sequence->n_cigar = 0;
|
||||||
|
|
||||||
|
sequence->multi = NULL;
|
||||||
|
sequence->n_multi = 0;
|
||||||
|
|
||||||
|
return sequence;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BWA::copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length)
|
||||||
|
{
|
||||||
|
// seq, rseq will ultimately be freed by bwa_cal_sa_reg_gap
|
||||||
|
sequence->seq = new ubyte_t[read_length];
|
||||||
|
sequence->rseq = new ubyte_t[read_length];
|
||||||
|
for(unsigned i = 0; i < read_length; i++) sequence->seq[i] = nst_nt4_table[(unsigned)bases[i]];
|
||||||
|
memcpy(sequence->rseq,sequence->seq,read_length);
|
||||||
|
|
||||||
|
// BWA expects the read bases to arrive reversed.
|
||||||
|
seq_reverse(read_length,sequence->seq,0);
|
||||||
|
seq_reverse(read_length,sequence->rseq,1);
|
||||||
|
|
||||||
|
sequence->full_len = sequence->len = read_length;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,83 @@
|
||||||
|
#ifndef BWA_GATEWAY
|
||||||
|
#define BWA_GATEWAY
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
#include "bntseq.h"
|
||||||
|
#include "bwt.h"
|
||||||
|
#include "bwtaln.h"
|
||||||
|
|
||||||
|
class Alignment {
|
||||||
|
public:
|
||||||
|
uint32_t type;
|
||||||
|
int contig;
|
||||||
|
bwtint_t pos;
|
||||||
|
bool negative_strand;
|
||||||
|
uint32_t mapping_quality;
|
||||||
|
|
||||||
|
uint16_t *cigar;
|
||||||
|
int n_cigar;
|
||||||
|
|
||||||
|
uint8_t num_mismatches;
|
||||||
|
uint8_t num_gap_opens;
|
||||||
|
uint8_t num_gap_extensions;
|
||||||
|
uint16_t edit_distance;
|
||||||
|
|
||||||
|
uint32_t num_best;
|
||||||
|
uint32_t num_second_best;
|
||||||
|
|
||||||
|
char* md;
|
||||||
|
};
|
||||||
|
|
||||||
|
class BWA {
|
||||||
|
private:
|
||||||
|
bntseq_t *bns;
|
||||||
|
ubyte_t* reference;
|
||||||
|
bwt_t* bwts[2];
|
||||||
|
gap_opt_t options;
|
||||||
|
|
||||||
|
void load_default_options();
|
||||||
|
void initialize_random_seed();
|
||||||
|
bwa_seq_t* create_sequence(const char* bases, const unsigned read_length);
|
||||||
|
void copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length);
|
||||||
|
Alignment generate_final_alignment_from_sequence(bwa_seq_t* sequence);
|
||||||
|
|
||||||
|
public:
|
||||||
|
BWA(const char* ann_filename,
|
||||||
|
const char* amb_filename,
|
||||||
|
const char* pac_filename,
|
||||||
|
const char* forward_bwt_filename,
|
||||||
|
const char* forward_sa_filename,
|
||||||
|
const char* reverse_bwt_filename,
|
||||||
|
const char* reverse_sa_filename);
|
||||||
|
~BWA();
|
||||||
|
|
||||||
|
// Parameterize the aligner.
|
||||||
|
void set_max_edit_distance(float edit_distance);
|
||||||
|
void set_max_gap_opens(int max_gap_opens);
|
||||||
|
void set_max_gap_extensions(int max_gap_extensions);
|
||||||
|
void set_disallow_indel_within_range(int indel_range);
|
||||||
|
void set_mismatch_penalty(int penalty);
|
||||||
|
void set_gap_open_penalty(int penalty);
|
||||||
|
void set_gap_extension_penalty(int penalty);
|
||||||
|
|
||||||
|
// Perform the alignment
|
||||||
|
Alignment* generate_single_alignment(const char* bases,
|
||||||
|
const unsigned read_length);
|
||||||
|
void find_paths(const char* bases,
|
||||||
|
const unsigned read_length,
|
||||||
|
bwt_aln1_t*& paths,
|
||||||
|
unsigned& num_paths,
|
||||||
|
unsigned& best_path_count,
|
||||||
|
unsigned& second_best_path_count);
|
||||||
|
void generate_alignments_from_paths(const char* bases,
|
||||||
|
const unsigned read_length,
|
||||||
|
bwt_aln1_t* paths,
|
||||||
|
const unsigned num_paths,
|
||||||
|
const unsigned best_count,
|
||||||
|
const unsigned second_best_count,
|
||||||
|
Alignment*& alignments,
|
||||||
|
unsigned& num_alignments);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // BWA_GATEWAY
|
||||||
|
|
@ -0,0 +1,437 @@
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
#include "bntseq.h"
|
||||||
|
#include "bwt.h"
|
||||||
|
#include "bwtaln.h"
|
||||||
|
#include "bwa_gateway.h"
|
||||||
|
#include "org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h"
|
||||||
|
|
||||||
|
typedef void (BWA::*int_setter)(int value);
|
||||||
|
typedef void (BWA::*float_setter)(float value);
|
||||||
|
|
||||||
|
static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment);
|
||||||
|
static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name);
|
||||||
|
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter);
|
||||||
|
static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter);
|
||||||
|
static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message);
|
||||||
|
|
||||||
|
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create(JNIEnv* env, jobject instance, jobject bwtFiles, jobject configuration)
|
||||||
|
{
|
||||||
|
jstring java_ann = get_configuration_file(env,bwtFiles,"annFile");
|
||||||
|
if(java_ann == NULL) return 0L;
|
||||||
|
jstring java_amb = get_configuration_file(env,bwtFiles,"ambFile");
|
||||||
|
if(java_amb == NULL) return 0L;
|
||||||
|
jstring java_pac = get_configuration_file(env,bwtFiles,"pacFile");
|
||||||
|
if(java_pac == NULL) return 0L;
|
||||||
|
jstring java_forward_bwt = get_configuration_file(env,bwtFiles,"forwardBWTFile");
|
||||||
|
if(java_forward_bwt == NULL) return 0L;
|
||||||
|
jstring java_forward_sa = get_configuration_file(env,bwtFiles,"forwardSAFile");
|
||||||
|
if(java_forward_sa == NULL) return 0L;
|
||||||
|
jstring java_reverse_bwt = get_configuration_file(env,bwtFiles,"reverseBWTFile");
|
||||||
|
if(java_reverse_bwt == NULL) return 0L;
|
||||||
|
jstring java_reverse_sa = get_configuration_file(env,bwtFiles,"reverseSAFile");
|
||||||
|
if(java_reverse_sa == NULL) return 0L;
|
||||||
|
|
||||||
|
const char* ann_filename = env->GetStringUTFChars(java_ann,JNI_FALSE);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
const char* amb_filename = env->GetStringUTFChars(java_amb,JNI_FALSE);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
const char* pac_filename = env->GetStringUTFChars(java_pac,JNI_FALSE);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
const char* forward_bwt_filename = env->GetStringUTFChars(java_forward_bwt,JNI_FALSE);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
const char* forward_sa_filename = env->GetStringUTFChars(java_forward_sa,JNI_FALSE);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
const char* reverse_bwt_filename = env->GetStringUTFChars(java_reverse_bwt,JNI_FALSE);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
const char* reverse_sa_filename = env->GetStringUTFChars(java_reverse_sa,JNI_FALSE);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
|
||||||
|
BWA* bwa = new BWA(ann_filename,
|
||||||
|
amb_filename,
|
||||||
|
pac_filename,
|
||||||
|
forward_bwt_filename,
|
||||||
|
forward_sa_filename,
|
||||||
|
reverse_bwt_filename,
|
||||||
|
reverse_sa_filename);
|
||||||
|
|
||||||
|
Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(env,instance,(jlong)bwa,configuration);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
|
||||||
|
env->ReleaseStringUTFChars(java_ann,ann_filename);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
env->ReleaseStringUTFChars(java_amb,amb_filename);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
env->ReleaseStringUTFChars(java_pac,pac_filename);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
env->ReleaseStringUTFChars(java_forward_bwt,forward_bwt_filename);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
env->ReleaseStringUTFChars(java_forward_sa,forward_sa_filename);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
env->ReleaseStringUTFChars(java_reverse_bwt,reverse_bwt_filename);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
env->ReleaseStringUTFChars(java_reverse_sa,reverse_sa_filename);
|
||||||
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
|
||||||
|
return (jlong)bwa;
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_destroy(JNIEnv* env, jobject instance, jlong java_bwa)
|
||||||
|
{
|
||||||
|
BWA* bwa = (BWA*)java_bwa;
|
||||||
|
delete bwa;
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(JNIEnv *env, jobject instance, jlong java_bwa, jobject configuration) {
|
||||||
|
BWA* bwa = (BWA*)java_bwa;
|
||||||
|
set_float_configuration_param(env, configuration, "maximumEditDistance", bwa, &BWA::set_max_edit_distance);
|
||||||
|
if(env->ExceptionCheck()) return;
|
||||||
|
set_int_configuration_param(env, configuration, "maximumGapOpens", bwa, &BWA::set_max_gap_opens);
|
||||||
|
if(env->ExceptionCheck()) return;
|
||||||
|
set_int_configuration_param(env, configuration, "maximumGapExtensions", bwa, &BWA::set_max_gap_extensions);
|
||||||
|
if(env->ExceptionCheck()) return;
|
||||||
|
set_int_configuration_param(env, configuration, "disallowIndelWithinRange", bwa, &BWA::set_disallow_indel_within_range);
|
||||||
|
if(env->ExceptionCheck()) return;
|
||||||
|
set_int_configuration_param(env, configuration, "mismatchPenalty", bwa, &BWA::set_mismatch_penalty);
|
||||||
|
if(env->ExceptionCheck()) return;
|
||||||
|
set_int_configuration_param(env, configuration, "gapOpenPenalty", bwa, &BWA::set_gap_open_penalty);
|
||||||
|
if(env->ExceptionCheck()) return;
|
||||||
|
set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty);
|
||||||
|
if(env->ExceptionCheck()) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases)
|
||||||
|
{
|
||||||
|
BWA* bwa = (BWA*)java_bwa;
|
||||||
|
|
||||||
|
const jsize read_length = env->GetArrayLength(java_bases);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
|
||||||
|
jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
|
||||||
|
if(read_bases == NULL) return NULL;
|
||||||
|
|
||||||
|
bwt_aln1_t* paths = NULL;
|
||||||
|
unsigned num_paths = 0;
|
||||||
|
|
||||||
|
unsigned best_path_count, second_best_path_count;
|
||||||
|
bwa->find_paths((const char*)read_bases,read_length,paths,num_paths,best_path_count,second_best_path_count);
|
||||||
|
|
||||||
|
jobjectArray java_paths = env->NewObjectArray(num_paths, env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath"), NULL);
|
||||||
|
if(java_paths == NULL) return NULL;
|
||||||
|
|
||||||
|
for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
|
||||||
|
bwt_aln1_t& path = *(paths + path_idx);
|
||||||
|
|
||||||
|
jclass java_path_class = env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath");
|
||||||
|
if(java_path_class == NULL) return NULL;
|
||||||
|
|
||||||
|
jmethodID java_path_constructor = env->GetMethodID(java_path_class, "<init>", "(IIIZJJIII)V");
|
||||||
|
if(java_path_constructor == NULL) return NULL;
|
||||||
|
|
||||||
|
// Note that k/l are being cast to long. Bad things will happen if JNI assumes that they're ints.
|
||||||
|
jobject java_path = env->NewObject(java_path_class,
|
||||||
|
java_path_constructor,
|
||||||
|
path.n_mm,
|
||||||
|
path.n_gapo,
|
||||||
|
path.n_gape,
|
||||||
|
path.a,
|
||||||
|
(jlong)path.k,
|
||||||
|
(jlong)path.l,
|
||||||
|
path.score,
|
||||||
|
best_path_count,
|
||||||
|
second_best_path_count);
|
||||||
|
if(java_path == NULL) return NULL;
|
||||||
|
|
||||||
|
env->SetObjectArrayElement(java_paths,path_idx,java_path);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
|
||||||
|
env->DeleteLocalRef(java_path_class);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] paths;
|
||||||
|
|
||||||
|
env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
|
||||||
|
|
||||||
|
return env->ExceptionCheck() ? NULL : java_paths;
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases, jobjectArray java_paths)
|
||||||
|
{
|
||||||
|
BWA* bwa = (BWA*)java_bwa;
|
||||||
|
|
||||||
|
const jsize read_length = env->GetArrayLength(java_bases);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
|
||||||
|
jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
|
||||||
|
if(read_bases == NULL) return NULL;
|
||||||
|
|
||||||
|
const jsize num_paths = env->GetArrayLength(java_paths);
|
||||||
|
bwt_aln1_t* paths = new bwt_aln1_t[num_paths];
|
||||||
|
unsigned best_count = 0, second_best_count = 0;
|
||||||
|
|
||||||
|
for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
|
||||||
|
jobject java_path = env->GetObjectArrayElement(java_paths,path_idx);
|
||||||
|
jclass java_path_class = env->GetObjectClass(java_path);
|
||||||
|
if(java_path_class == NULL) return NULL;
|
||||||
|
|
||||||
|
bwt_aln1_t& path = *(paths + path_idx);
|
||||||
|
|
||||||
|
jfieldID mismatches_field = env->GetFieldID(java_path_class, "numMismatches", "I");
|
||||||
|
if(mismatches_field == NULL) return NULL;
|
||||||
|
path.n_mm = env->GetIntField(java_path,mismatches_field);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
|
||||||
|
jfieldID gap_opens_field = env->GetFieldID(java_path_class, "numGapOpens", "I");
|
||||||
|
if(gap_opens_field == NULL) return NULL;
|
||||||
|
path.n_gapo = env->GetIntField(java_path,gap_opens_field);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
|
||||||
|
jfieldID gap_extensions_field = env->GetFieldID(java_path_class, "numGapExtensions", "I");
|
||||||
|
if(gap_extensions_field == NULL) return NULL;
|
||||||
|
path.n_gape = env->GetIntField(java_path,gap_extensions_field);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
|
||||||
|
jfieldID negative_strand_field = env->GetFieldID(java_path_class, "negativeStrand", "Z");
|
||||||
|
if(negative_strand_field == NULL) return NULL;
|
||||||
|
path.a = env->GetBooleanField(java_path,negative_strand_field);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
|
||||||
|
jfieldID k_field = env->GetFieldID(java_path_class, "k", "J");
|
||||||
|
if(k_field == NULL) return NULL;
|
||||||
|
path.k = env->GetLongField(java_path,k_field);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
|
||||||
|
jfieldID l_field = env->GetFieldID(java_path_class, "l", "J");
|
||||||
|
if(l_field == NULL) return NULL;
|
||||||
|
path.l = env->GetLongField(java_path,l_field);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
|
||||||
|
jfieldID score_field = env->GetFieldID(java_path_class, "score", "I");
|
||||||
|
if(score_field == NULL) return NULL;
|
||||||
|
path.score = env->GetIntField(java_path,score_field);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
|
||||||
|
jfieldID best_count_field = env->GetFieldID(java_path_class, "bestCount", "I");
|
||||||
|
if(best_count_field == NULL) return NULL;
|
||||||
|
best_count = env->GetIntField(java_path,best_count_field);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
|
||||||
|
jfieldID second_best_count_field = env->GetFieldID(java_path_class, "secondBestCount", "I");
|
||||||
|
if(second_best_count_field == NULL) return NULL;
|
||||||
|
second_best_count = env->GetIntField(java_path,second_best_count_field);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
Alignment* alignments = NULL;
|
||||||
|
unsigned num_alignments = 0;
|
||||||
|
bwa->generate_alignments_from_paths((const char*)read_bases,read_length,paths,num_paths,best_count,second_best_count,alignments,num_alignments);
|
||||||
|
|
||||||
|
jobjectArray java_alignments = env->NewObjectArray(num_alignments, env->FindClass("org/broadinstitute/sting/alignment/Alignment"), NULL);
|
||||||
|
if(java_alignments == NULL) return NULL;
|
||||||
|
|
||||||
|
for(unsigned alignment_idx = 0; alignment_idx < (unsigned)num_alignments; alignment_idx++) {
|
||||||
|
Alignment& alignment = *(alignments + alignment_idx);
|
||||||
|
jobject java_alignment = convert_to_java_alignment(env,read_bases,read_length,alignment);
|
||||||
|
if(java_alignment == NULL) return NULL;
|
||||||
|
env->SetObjectArrayElement(java_alignments,alignment_idx,java_alignment);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] alignments;
|
||||||
|
delete[] paths;
|
||||||
|
|
||||||
|
env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
|
||||||
|
|
||||||
|
return env->ExceptionCheck() ? NULL : java_alignments;
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) {
|
||||||
|
BWA* bwa = (BWA*)java_bwa;
|
||||||
|
|
||||||
|
const jsize read_length = env->GetArrayLength(java_bases);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
|
||||||
|
jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
|
||||||
|
if(read_bases == NULL) return NULL;
|
||||||
|
|
||||||
|
Alignment* best_alignment = bwa->generate_single_alignment((const char*)read_bases,read_length);
|
||||||
|
jobject java_best_alignment = (best_alignment != NULL) ? convert_to_java_alignment(env,read_bases,read_length,*best_alignment) : NULL;
|
||||||
|
delete best_alignment;
|
||||||
|
|
||||||
|
env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
|
||||||
|
|
||||||
|
return java_best_alignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment) {
|
||||||
|
unsigned cigar_length;
|
||||||
|
if(alignment.type == BWA_TYPE_NO_MATCH) cigar_length = 0;
|
||||||
|
else if(!alignment.cigar) cigar_length = 1;
|
||||||
|
else cigar_length = alignment.n_cigar;
|
||||||
|
|
||||||
|
jcharArray java_cigar_operators = env->NewCharArray(cigar_length);
|
||||||
|
if(java_cigar_operators == NULL) return NULL;
|
||||||
|
jintArray java_cigar_lengths = env->NewIntArray(cigar_length);
|
||||||
|
if(java_cigar_lengths == NULL) return NULL;
|
||||||
|
|
||||||
|
if(alignment.cigar) {
|
||||||
|
for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) {
|
||||||
|
jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14];
|
||||||
|
jint cigar_length = alignment.cigar[cigar_idx]&0x3fff;
|
||||||
|
|
||||||
|
env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if(alignment.type != BWA_TYPE_NO_MATCH) {
|
||||||
|
jchar cigar_operator = 'M';
|
||||||
|
env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delete[] alignment.cigar;
|
||||||
|
|
||||||
|
jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment");
|
||||||
|
if(java_alignment_class == NULL) return NULL;
|
||||||
|
|
||||||
|
jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "<init>", "(IIZI[C[IILjava/lang/String;IIIII)V");
|
||||||
|
if(java_alignment_constructor == NULL) return NULL;
|
||||||
|
|
||||||
|
jstring java_md = env->NewStringUTF(alignment.md);
|
||||||
|
if(java_md == NULL) return NULL;
|
||||||
|
delete[] alignment.md;
|
||||||
|
|
||||||
|
jobject java_alignment = env->NewObject(java_alignment_class,
|
||||||
|
java_alignment_constructor,
|
||||||
|
alignment.contig,
|
||||||
|
alignment.pos,
|
||||||
|
alignment.negative_strand,
|
||||||
|
alignment.mapping_quality,
|
||||||
|
java_cigar_operators,
|
||||||
|
java_cigar_lengths,
|
||||||
|
alignment.edit_distance,
|
||||||
|
java_md,
|
||||||
|
alignment.num_mismatches,
|
||||||
|
alignment.num_gap_opens,
|
||||||
|
alignment.num_gap_extensions,
|
||||||
|
alignment.num_best,
|
||||||
|
alignment.num_second_best);
|
||||||
|
if(java_alignment == NULL) return NULL;
|
||||||
|
|
||||||
|
env->DeleteLocalRef(java_alignment_class);
|
||||||
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
|
||||||
|
return java_alignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name) {
|
||||||
|
jclass configuration_class = env->GetObjectClass(configuration);
|
||||||
|
if(configuration_class == NULL) return NULL;
|
||||||
|
|
||||||
|
jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/io/File;");
|
||||||
|
if(configuration_field == NULL) return NULL;
|
||||||
|
|
||||||
|
jobject configuration_file = (jobject)env->GetObjectField(configuration,configuration_field);
|
||||||
|
|
||||||
|
jclass file_class = env->FindClass("java/io/File");
|
||||||
|
if(file_class == NULL) return NULL;
|
||||||
|
|
||||||
|
jmethodID path_extractor = env->GetMethodID(file_class,"getAbsolutePath", "()Ljava/lang/String;");
|
||||||
|
if(path_extractor == NULL) return NULL;
|
||||||
|
|
||||||
|
jstring path = (jstring)env->CallObjectMethod(configuration_file,path_extractor);
|
||||||
|
if(path == NULL) return NULL;
|
||||||
|
|
||||||
|
env->DeleteLocalRef(configuration_class);
|
||||||
|
env->DeleteLocalRef(file_class);
|
||||||
|
env->DeleteLocalRef(configuration_file);
|
||||||
|
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) {
|
||||||
|
jclass configuration_class = env->GetObjectClass(configuration);
|
||||||
|
if(configuration_class == NULL) return;
|
||||||
|
|
||||||
|
jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Integer;");
|
||||||
|
if(configuration_field == NULL) return;
|
||||||
|
|
||||||
|
jobject boxed_value = env->GetObjectField(configuration,configuration_field);
|
||||||
|
if(env->ExceptionCheck()) return;
|
||||||
|
|
||||||
|
if(boxed_value != NULL) {
|
||||||
|
jclass int_box_class = env->FindClass("java/lang/Integer");
|
||||||
|
if(int_box_class == NULL) return;
|
||||||
|
|
||||||
|
jmethodID int_extractor = env->GetMethodID(int_box_class,"intValue", "()I");
|
||||||
|
if(int_extractor == NULL) return;
|
||||||
|
|
||||||
|
jint value = env->CallIntMethod(boxed_value,int_extractor);
|
||||||
|
if(env->ExceptionCheck()) return;
|
||||||
|
|
||||||
|
if(value < 0)
|
||||||
|
{
|
||||||
|
throw_config_value_exception(env,field_name,"cannot be set to a negative value");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
(bwa->*setter)(value);
|
||||||
|
|
||||||
|
env->DeleteLocalRef(int_box_class);
|
||||||
|
}
|
||||||
|
|
||||||
|
env->DeleteLocalRef(boxed_value);
|
||||||
|
env->DeleteLocalRef(configuration_class);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter)
|
||||||
|
{
|
||||||
|
jclass configuration_class = env->GetObjectClass(configuration);
|
||||||
|
if(configuration_class == NULL) return;
|
||||||
|
|
||||||
|
jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Float;");
|
||||||
|
if(configuration_field == NULL) return;
|
||||||
|
|
||||||
|
jobject boxed_value = env->GetObjectField(configuration,configuration_field);
|
||||||
|
if(boxed_value != NULL) {
|
||||||
|
jclass float_box_class = env->FindClass("java/lang/Float");
|
||||||
|
if(float_box_class == NULL) return;
|
||||||
|
|
||||||
|
jmethodID float_extractor = env->GetMethodID(float_box_class,"floatValue", "()F");
|
||||||
|
if(float_extractor == NULL) return;
|
||||||
|
|
||||||
|
jfloat value = env->CallFloatMethod(boxed_value,float_extractor);
|
||||||
|
if(env->ExceptionCheck()) return;
|
||||||
|
|
||||||
|
if(value < 0)
|
||||||
|
{
|
||||||
|
throw_config_value_exception(env,field_name,"cannot be set to a negative value");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
(bwa->*setter)(value);
|
||||||
|
|
||||||
|
env->DeleteLocalRef(float_box_class);
|
||||||
|
}
|
||||||
|
|
||||||
|
env->DeleteLocalRef(boxed_value);
|
||||||
|
env->DeleteLocalRef(configuration_class);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message)
|
||||||
|
{
|
||||||
|
char* buffer = new char[strlen(field_name)+1+strlen(message)+1];
|
||||||
|
sprintf(buffer,"%s %s",field_name,message);
|
||||||
|
jclass sting_exception_class = env->FindClass("org/broadinstitute/sting/utils/StingException");
|
||||||
|
if(sting_exception_class == NULL) return;
|
||||||
|
env->ThrowNew(sting_exception_class, buffer);
|
||||||
|
delete[] buffer;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,61 @@
|
||||||
|
/* DO NOT EDIT THIS FILE - it is machine generated */
|
||||||
|
#include <jni.h>
|
||||||
|
/* Header for class org_broadinstitute_sting_alignment_bwa_c_BWACAligner */
|
||||||
|
|
||||||
|
#ifndef _Included_org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||||
|
#define _Included_org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
/*
|
||||||
|
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||||
|
* Method: create
|
||||||
|
* Signature: (Lorg/broadinstitute/sting/alignment/bwa/BWTFiles;Lorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)J
|
||||||
|
*/
|
||||||
|
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create
|
||||||
|
(JNIEnv *, jobject, jobject, jobject);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||||
|
* Method: updateConfiguration
|
||||||
|
* Signature: (JLorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)V
|
||||||
|
*/
|
||||||
|
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration
|
||||||
|
(JNIEnv *, jobject, jlong, jobject);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||||
|
* Method: destroy
|
||||||
|
* Signature: (J)V
|
||||||
|
*/
|
||||||
|
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_destroy
|
||||||
|
(JNIEnv *, jobject, jlong);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||||
|
* Method: getPaths
|
||||||
|
* Signature: (J[B)[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath;
|
||||||
|
*/
|
||||||
|
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths
|
||||||
|
(JNIEnv *, jobject, jlong, jbyteArray);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||||
|
* Method: convertPathsToAlignments
|
||||||
|
* Signature: (J[B[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath;)[Lorg/broadinstitute/sting/alignment/Alignment;
|
||||||
|
*/
|
||||||
|
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments
|
||||||
|
(JNIEnv *, jobject, jlong, jbyteArray, jobjectArray);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||||
|
* Method: getBestAlignment
|
||||||
|
* Signature: (J[B)Lorg/broadinstitute/sting/alignment/Alignment;
|
||||||
|
*/
|
||||||
|
JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment
|
||||||
|
(JNIEnv *, jobject, jlong, jbyteArray);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
CC=gcc
|
||||||
|
CCFLAGS=-Wall -dynamiclib -arch i386 -arch x86_64
|
||||||
|
|
||||||
|
libenvironhack.dylib: libenvironhack.c
|
||||||
|
$(CC) $(CCFLAGS) -init _init_environ $< -o $@
|
||||||
|
|
||||||
|
all: libenvironhack.dylib
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f libenvironhack.dylib
|
||||||
|
|
@ -22,25 +22,16 @@
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
/*
|
||||||
|
LSF 7.0.6 on the mac is missing the unsatisfied exported symbol for environ which was removed on MacOS X 10.5+.
|
||||||
|
nm $LSF_LIBDIR/liblsf.dylib | grep environ
|
||||||
|
See "man environ" for more info, along with http://lists.apple.com/archives/java-dev/2007/Dec/msg00096.html
|
||||||
|
*/
|
||||||
|
|
||||||
import org.broadinstitute.sting.WalkerTest;
|
#include <crt_externs.h>
|
||||||
import org.testng.annotations.Test;
|
|
||||||
|
|
||||||
import java.io.File;
|
char **environ = (char **)0;
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
public class BatchMergeIntegrationTest extends WalkerTest {
|
void init_environ(void) {
|
||||||
@Test
|
environ = (*_NSGetEnviron());
|
||||||
public void testBatchMerge1() {
|
|
||||||
String bam = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
|
|
||||||
String alleles = validationDataLocation + "batch.merge.alleles.vcf";
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
|
||||||
"-T UnifiedGenotyper -NO_HEADER -BTI alleles -stand_call_conf 0.0 -glm BOTH -G none -nsl -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -o %s -R " + b37KGReference
|
|
||||||
+ " -B:alleles,VCF " + alleles
|
|
||||||
+ " -I " + bam,
|
|
||||||
1,
|
|
||||||
Arrays.asList("8bd114ceaf6a94e96a677fcc52350713"));
|
|
||||||
executeTest("testBatchMerge UG genotype given alleles:" + new File(bam).getName() + " with " + new File(alleles).getName(), spec);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Binary file not shown.
|
|
@ -0,0 +1,49 @@
|
||||||
|
package org.broadinstitute.sting.alignment;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create perfect alignments from the read to the genome represented by the given BWT / suffix array.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public interface Aligner {
|
||||||
|
/**
|
||||||
|
* Close this instance of the BWA pointer and delete its resources.
|
||||||
|
*/
|
||||||
|
public void close();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allow the aligner to choose one alignment randomly from the pile of best alignments.
|
||||||
|
* @param bases Bases to align.
|
||||||
|
* @return An align
|
||||||
|
*/
|
||||||
|
public Alignment getBestAlignment(final byte[] bases);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Align the read to the reference.
|
||||||
|
* @param read Read to align.
|
||||||
|
* @param header Optional header to drop in place.
|
||||||
|
* @return A list of the alignments.
|
||||||
|
*/
|
||||||
|
public SAMRecord align(final SAMRecord read, final SAMFileHeader header);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a iterator of alignments, batched by mapping quality.
|
||||||
|
* @param bases List of bases.
|
||||||
|
* @return Iterator to alignments.
|
||||||
|
*/
|
||||||
|
public Iterable<Alignment[]> getAllAlignments(final byte[] bases);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a iterator of aligned reads, batched by mapping quality.
|
||||||
|
* @param read Read to align.
|
||||||
|
* @param newHeader Optional new header to use when aligning the read. If present, it must be null.
|
||||||
|
* @return Iterator to alignments.
|
||||||
|
*/
|
||||||
|
public Iterable<SAMRecord[]> alignAll(final SAMRecord read, final SAMFileHeader newHeader);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,221 @@
|
||||||
|
package org.broadinstitute.sting.alignment;
|
||||||
|
|
||||||
|
import net.sf.samtools.*;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents an alignment of a read to a site in the reference genome.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class Alignment {
|
||||||
|
protected int contigIndex;
|
||||||
|
protected long alignmentStart;
|
||||||
|
protected boolean negativeStrand;
|
||||||
|
protected int mappingQuality;
|
||||||
|
|
||||||
|
protected char[] cigarOperators;
|
||||||
|
protected int[] cigarLengths;
|
||||||
|
|
||||||
|
protected int editDistance;
|
||||||
|
protected String mismatchingPositions;
|
||||||
|
|
||||||
|
protected int numMismatches;
|
||||||
|
protected int numGapOpens;
|
||||||
|
protected int numGapExtensions;
|
||||||
|
protected int bestCount;
|
||||||
|
protected int secondBestCount;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the index of the given contig.
|
||||||
|
* @return the inde
|
||||||
|
*/
|
||||||
|
public int getContigIndex() { return contigIndex; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the starting position for the given alignment.
|
||||||
|
* @return Starting position.
|
||||||
|
*/
|
||||||
|
public long getAlignmentStart() { return alignmentStart; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is the given alignment on the reverse strand?
|
||||||
|
* @return True if the alignment is on the reverse strand.
|
||||||
|
*/
|
||||||
|
public boolean isNegativeStrand() { return negativeStrand; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the score of this alignment.
|
||||||
|
* @return The score.
|
||||||
|
*/
|
||||||
|
public int getMappingQuality() { return mappingQuality; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the edit distance; will eventually end up in the NM SAM tag
|
||||||
|
* if this alignment makes it that far.
|
||||||
|
* @return The edit distance.
|
||||||
|
*/
|
||||||
|
public int getEditDistance() { return editDistance; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A string representation of which positions mismatch; contents of MD tag.
|
||||||
|
* @return String representation of mismatching positions.
|
||||||
|
*/
|
||||||
|
public String getMismatchingPositions() { return mismatchingPositions; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the number of mismatches in the read.
|
||||||
|
* @return Number of mismatches.
|
||||||
|
*/
|
||||||
|
public int getNumMismatches() { return numMismatches; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the number of gap opens.
|
||||||
|
* @return Number of gap opens.
|
||||||
|
*/
|
||||||
|
public int getNumGapOpens() { return numGapOpens; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the number of gap extensions.
|
||||||
|
* @return Number of gap extensions.
|
||||||
|
*/
|
||||||
|
public int getNumGapExtensions() { return numGapExtensions; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the number of best alignments.
|
||||||
|
* @return Number of top scoring alignments.
|
||||||
|
*/
|
||||||
|
public int getBestCount() { return bestCount; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the number of second best alignments.
|
||||||
|
* @return Number of second best scoring alignments.
|
||||||
|
*/
|
||||||
|
public int getSecondBestCount() { return secondBestCount; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the cigar for this alignment.
|
||||||
|
* @return sam-jdk formatted alignment.
|
||||||
|
*/
|
||||||
|
public Cigar getCigar() {
|
||||||
|
Cigar cigar = new Cigar();
|
||||||
|
for(int i = 0; i < cigarOperators.length; i++) {
|
||||||
|
CigarOperator operator = CigarOperator.characterToEnum(cigarOperators[i]);
|
||||||
|
cigar.add(new CigarElement(cigarLengths[i],operator));
|
||||||
|
}
|
||||||
|
return cigar;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Temporarily implement getCigarString() for debugging; the TextCigarCodec is unfortunately
|
||||||
|
* package-protected.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public String getCigarString() {
|
||||||
|
Cigar cigar = getCigar();
|
||||||
|
if(cigar.isEmpty()) return "*";
|
||||||
|
|
||||||
|
StringBuilder cigarString = new StringBuilder();
|
||||||
|
for(CigarElement element: cigar.getCigarElements()) {
|
||||||
|
cigarString.append(element.getLength());
|
||||||
|
cigarString.append(element.getOperator());
|
||||||
|
}
|
||||||
|
return cigarString.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stub for inheritance.
|
||||||
|
*/
|
||||||
|
public Alignment() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new alignment object.
|
||||||
|
* @param contigIndex The contig to which this read aligned.
|
||||||
|
* @param alignmentStart The point within the contig to which this read aligned.
|
||||||
|
* @param negativeStrand Forward or reverse alignment of the given read.
|
||||||
|
* @param mappingQuality How good does BWA think this mapping is?
|
||||||
|
* @param cigarOperators The ordered operators in the cigar string.
|
||||||
|
* @param cigarLengths The lengths to which each operator applies.
|
||||||
|
* @param editDistance The edit distance (cumulative) of the read.
|
||||||
|
* @param mismatchingPositions String representation of which bases in the read mismatch.
|
||||||
|
* @param numMismatches Number of total mismatches in the read.
|
||||||
|
* @param numGapOpens Number of gap opens in the read.
|
||||||
|
* @param numGapExtensions Number of gap extensions in the read.
|
||||||
|
* @param bestCount Number of best alignments in the read.
|
||||||
|
* @param secondBestCount Number of second best alignments in the read.
|
||||||
|
*/
|
||||||
|
public Alignment(int contigIndex,
|
||||||
|
int alignmentStart,
|
||||||
|
boolean negativeStrand,
|
||||||
|
int mappingQuality,
|
||||||
|
char[] cigarOperators,
|
||||||
|
int[] cigarLengths,
|
||||||
|
int editDistance,
|
||||||
|
String mismatchingPositions,
|
||||||
|
int numMismatches,
|
||||||
|
int numGapOpens,
|
||||||
|
int numGapExtensions,
|
||||||
|
int bestCount,
|
||||||
|
int secondBestCount) {
|
||||||
|
this.contigIndex = contigIndex;
|
||||||
|
this.alignmentStart = alignmentStart;
|
||||||
|
this.negativeStrand = negativeStrand;
|
||||||
|
this.mappingQuality = mappingQuality;
|
||||||
|
this.cigarOperators = cigarOperators;
|
||||||
|
this.cigarLengths = cigarLengths;
|
||||||
|
this.editDistance = editDistance;
|
||||||
|
this.mismatchingPositions = mismatchingPositions;
|
||||||
|
this.numMismatches = numMismatches;
|
||||||
|
this.numGapOpens = numGapOpens;
|
||||||
|
this.numGapExtensions = numGapExtensions;
|
||||||
|
this.bestCount = bestCount;
|
||||||
|
this.secondBestCount = secondBestCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a read directly from an alignment.
|
||||||
|
* @param alignment The alignment to convert to a read.
|
||||||
|
* @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags.
|
||||||
|
* @param newSAMHeader The new SAM header to use in creating this read. Can be null, but if so, the sequence
|
||||||
|
* dictionary in the
|
||||||
|
* @return A mapped alignment.
|
||||||
|
*/
|
||||||
|
public static SAMRecord convertToRead(Alignment alignment, SAMRecord unmappedRead, SAMFileHeader newSAMHeader) {
|
||||||
|
SAMRecord read;
|
||||||
|
try {
|
||||||
|
read = (SAMRecord)unmappedRead.clone();
|
||||||
|
}
|
||||||
|
catch(CloneNotSupportedException ex) {
|
||||||
|
throw new ReviewedStingException("Unable to create aligned read from template.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if(newSAMHeader != null)
|
||||||
|
read.setHeader(newSAMHeader);
|
||||||
|
|
||||||
|
// If we're realigning a previously aligned record, strip out the placement of the alignment.
|
||||||
|
read.setReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
|
||||||
|
read.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
|
||||||
|
read.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
|
||||||
|
read.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
|
||||||
|
|
||||||
|
if(alignment != null) {
|
||||||
|
read.setReadUnmappedFlag(false);
|
||||||
|
read.setReferenceIndex(alignment.getContigIndex());
|
||||||
|
read.setAlignmentStart((int)alignment.getAlignmentStart());
|
||||||
|
read.setReadNegativeStrandFlag(alignment.isNegativeStrand());
|
||||||
|
read.setMappingQuality(alignment.getMappingQuality());
|
||||||
|
read.setCigar(alignment.getCigar());
|
||||||
|
if(alignment.isNegativeStrand()) {
|
||||||
|
read.setReadBases(BaseUtils.simpleReverseComplement(read.getReadBases()));
|
||||||
|
read.setBaseQualities(Utils.reverse(read.getBaseQualities()));
|
||||||
|
}
|
||||||
|
read.setAttribute("NM",alignment.getEditDistance());
|
||||||
|
read.setAttribute("MD",alignment.getMismatchingPositions());
|
||||||
|
}
|
||||||
|
|
||||||
|
return read;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,157 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.alignment;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates consistency of the aligner interface by taking reads already aligned by BWA in a BAM file, stripping them
|
||||||
|
* of their alignment data, realigning them, and making sure one of the best resulting realignments matches the original
|
||||||
|
* alignment from the input file.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
||||||
|
/**
|
||||||
|
* The supporting BWT index generated using BWT.
|
||||||
|
*/
|
||||||
|
@Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
|
||||||
|
private String prefix = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The instance used to generate alignments.
|
||||||
|
*/
|
||||||
|
private BWACAligner aligner = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an aligner object. The aligner object will load and hold the BWT until close() is called.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void initialize() {
|
||||||
|
if(prefix == null)
|
||||||
|
prefix = getToolkit().getArguments().referenceFile.getAbsolutePath();
|
||||||
|
BWTFiles bwtFiles = new BWTFiles(prefix);
|
||||||
|
BWAConfiguration configuration = new BWAConfiguration();
|
||||||
|
aligner = new BWACAligner(bwtFiles,configuration);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Aligns a read to the given reference.
|
||||||
|
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
|
||||||
|
* @param read Read to align.
|
||||||
|
* @return Number of reads aligned by this map (aka 1).
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||||
|
//logger.info(String.format("examining read %s", read.getReadName()));
|
||||||
|
|
||||||
|
byte[] bases = read.getReadBases();
|
||||||
|
if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases);
|
||||||
|
|
||||||
|
boolean matches = true;
|
||||||
|
Iterable<Alignment[]> alignments = aligner.getAllAlignments(bases);
|
||||||
|
Iterator<Alignment[]> alignmentIterator = alignments.iterator();
|
||||||
|
|
||||||
|
if(!alignmentIterator.hasNext()) {
|
||||||
|
matches = read.getReadUnmappedFlag();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Alignment[] alignmentsOfBestQuality = alignmentIterator.next();
|
||||||
|
for(Alignment alignment: alignmentsOfBestQuality) {
|
||||||
|
matches = (alignment.getContigIndex() == read.getReferenceIndex());
|
||||||
|
matches &= (alignment.getAlignmentStart() == read.getAlignmentStart());
|
||||||
|
matches &= (alignment.isNegativeStrand() == read.getReadNegativeStrandFlag());
|
||||||
|
matches &= (alignment.getCigar().equals(read.getCigar()));
|
||||||
|
matches &= (alignment.getMappingQuality() == read.getMappingQuality());
|
||||||
|
if(matches) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!matches) {
|
||||||
|
logger.error("Found mismatch!");
|
||||||
|
logger.error(String.format("Read %s:",read.getReadName()));
|
||||||
|
logger.error(String.format(" Contig index: %d",read.getReferenceIndex()));
|
||||||
|
logger.error(String.format(" Alignment start: %d", read.getAlignmentStart()));
|
||||||
|
logger.error(String.format(" Negative strand: %b", read.getReadNegativeStrandFlag()));
|
||||||
|
logger.error(String.format(" Cigar: %s%n", read.getCigarString()));
|
||||||
|
logger.error(String.format(" Mapping quality: %s%n", read.getMappingQuality()));
|
||||||
|
for(Alignment[] alignmentsByScore: alignments) {
|
||||||
|
for(int i = 0; i < alignmentsByScore.length; i++) {
|
||||||
|
logger.error(String.format("Alignment %d:",i));
|
||||||
|
logger.error(String.format(" Contig index: %d",alignmentsByScore[i].getContigIndex()));
|
||||||
|
logger.error(String.format(" Alignment start: %d", alignmentsByScore[i].getAlignmentStart()));
|
||||||
|
logger.error(String.format(" Negative strand: %b", alignmentsByScore[i].isNegativeStrand()));
|
||||||
|
logger.error(String.format(" Cigar: %s", alignmentsByScore[i].getCigarString()));
|
||||||
|
logger.error(String.format(" Mapping quality: %s%n", alignmentsByScore[i].getMappingQuality()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new ReviewedStingException(String.format("Read %s mismatches!", read.getReadName()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initial value for reduce. In this case, validated reads will be counted.
|
||||||
|
* @return 0, indicating no reads yet validated.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the number of reads processed.
|
||||||
|
* @param value Number of reads processed by this map.
|
||||||
|
* @param sum Number of reads processed before this map.
|
||||||
|
* @return Number of reads processed up to and including this map.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
return value + sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleanup.
|
||||||
|
* @param result Number of reads processed.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void onTraversalDone(Integer result) {
|
||||||
|
aligner.close();
|
||||||
|
super.onTraversalDone(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,134 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.alignment;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Aligns reads to a given reference using Heng Li's BWA aligner, presenting the resulting alignments in SAM or BAM format.
|
||||||
|
* Mimics the steps 'bwa aln' followed by 'bwa samse' using the BWA/C implementation.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
@WalkerName("Align")
|
||||||
|
public class AlignmentWalker extends ReadWalker<Integer,Integer> {
|
||||||
|
@Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " +
|
||||||
|
"generated by bwa index -d bwtsw. If unspecified, will default " +
|
||||||
|
"to the reference specified via the -R argument.",required=false)
|
||||||
|
private File targetReferenceFile = null;
|
||||||
|
|
||||||
|
@Output
|
||||||
|
private StingSAMFileWriter out = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The actual aligner.
|
||||||
|
*/
|
||||||
|
private BWACAligner aligner = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* New header to use, if desired.
|
||||||
|
*/
|
||||||
|
private SAMFileHeader header;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an aligner object. The aligner object will load and hold the BWT until close() is called.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void initialize() {
|
||||||
|
if(targetReferenceFile == null)
|
||||||
|
targetReferenceFile = getToolkit().getArguments().referenceFile;
|
||||||
|
BWTFiles bwtFiles = new BWTFiles(targetReferenceFile.getAbsolutePath());
|
||||||
|
BWAConfiguration configuration = new BWAConfiguration();
|
||||||
|
aligner = new BWACAligner(bwtFiles,configuration);
|
||||||
|
|
||||||
|
// Take the header of the SAM file, tweak it by adding in the reference dictionary and specifying that the target file is unsorted.
|
||||||
|
header = getToolkit().getSAMFileHeader().clone();
|
||||||
|
SAMSequenceDictionary referenceDictionary =
|
||||||
|
ReferenceSequenceFileFactory.getReferenceSequenceFile(targetReferenceFile).getSequenceDictionary();
|
||||||
|
header.setSequenceDictionary(referenceDictionary);
|
||||||
|
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
|
||||||
|
|
||||||
|
out.writeHeader(header);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Aligns a read to the given reference.
|
||||||
|
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
|
||||||
|
* @param read Read to align.
|
||||||
|
* @return Number of alignments found for this read.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||||
|
SAMRecord alignedRead = aligner.align(read,header);
|
||||||
|
out.addAlignment(alignedRead);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initial value for reduce. In this case, alignments will be counted.
|
||||||
|
* @return 0, indicating no alignments yet found.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the number of alignments found.
|
||||||
|
* @param value Number of alignments found by this map.
|
||||||
|
* @param sum Number of alignments found before this map.
|
||||||
|
* @return Number of alignments found up to and including this map.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
return value + sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleanup.
|
||||||
|
* @param result Number of reads processed.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void onTraversalDone(Integer result) {
|
||||||
|
aligner.close();
|
||||||
|
super.onTraversalDone(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,128 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.alignment;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.SortedMap;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Counts the number of best alignments as presented by BWA and outputs a histogram of number of placements vs. the
|
||||||
|
* frequency of that number of placements.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class CountBestAlignmentsWalker extends ReadWalker<Integer,Integer> {
|
||||||
|
/**
|
||||||
|
* The supporting BWT index generated using BWT.
|
||||||
|
*/
|
||||||
|
@Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
|
||||||
|
private String prefix = null;
|
||||||
|
|
||||||
|
@Output
|
||||||
|
private PrintStream out = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The actual aligner.
|
||||||
|
*/
|
||||||
|
private Aligner aligner = null;
|
||||||
|
|
||||||
|
private SortedMap<Integer,Integer> alignmentFrequencies = new TreeMap<Integer,Integer>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an aligner object. The aligner object will load and hold the BWT until close() is called.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void initialize() {
|
||||||
|
if(prefix == null)
|
||||||
|
prefix = getToolkit().getArguments().referenceFile.getAbsolutePath();
|
||||||
|
BWTFiles bwtFiles = new BWTFiles(prefix);
|
||||||
|
BWAConfiguration configuration = new BWAConfiguration();
|
||||||
|
aligner = new BWACAligner(bwtFiles,configuration);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Aligns a read to the given reference.
|
||||||
|
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
|
||||||
|
* @param read Read to align.
|
||||||
|
* @return Number of alignments found for this read.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||||
|
Iterator<Alignment[]> alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator();
|
||||||
|
if(alignmentIterator.hasNext()) {
|
||||||
|
int numAlignments = alignmentIterator.next().length;
|
||||||
|
if(alignmentFrequencies.containsKey(numAlignments))
|
||||||
|
alignmentFrequencies.put(numAlignments,alignmentFrequencies.get(numAlignments)+1);
|
||||||
|
else
|
||||||
|
alignmentFrequencies.put(numAlignments,1);
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initial value for reduce. In this case, validated reads will be counted.
|
||||||
|
* @return 0, indicating no reads yet validated.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the number of reads processed.
|
||||||
|
* @param value Number of reads processed by this map.
|
||||||
|
* @param sum Number of reads processed before this map.
|
||||||
|
* @return Number of reads processed up to and including this map.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
return value + sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleanup.
|
||||||
|
* @param result Number of reads processed.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void onTraversalDone(Integer result) {
|
||||||
|
aligner.close();
|
||||||
|
for(Map.Entry<Integer,Integer> alignmentFrequency: alignmentFrequencies.entrySet())
|
||||||
|
out.printf("%d\t%d%n", alignmentFrequency.getKey(), alignmentFrequency.getValue());
|
||||||
|
super.onTraversalDone(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,38 @@
|
||||||
|
package org.broadinstitute.sting.alignment.bwa;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.alignment.Aligner;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Align reads using BWA.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public abstract class BWAAligner implements Aligner {
|
||||||
|
/**
|
||||||
|
* The supporting files used by BWA.
|
||||||
|
*/
|
||||||
|
protected BWTFiles bwtFiles;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The current configuration for the BWA aligner.
|
||||||
|
*/
|
||||||
|
protected BWAConfiguration configuration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new BWAAligner. Purpose of this call is to ensure that all BWA constructors accept the correct
|
||||||
|
* parameters.
|
||||||
|
* @param bwtFiles The many files representing BWTs persisted to disk.
|
||||||
|
* @param configuration Configuration parameters for the alignment.
|
||||||
|
*/
|
||||||
|
public BWAAligner(BWTFiles bwtFiles, BWAConfiguration configuration) {
|
||||||
|
this.bwtFiles = bwtFiles;
|
||||||
|
this.configuration = configuration;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the configuration passed to the BWA aligner.
|
||||||
|
* @param configuration New configuration to set.
|
||||||
|
*/
|
||||||
|
public abstract void updateConfiguration(BWAConfiguration configuration);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
package org.broadinstitute.sting.alignment.bwa;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configuration for the BWA/C aligner.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class BWAConfiguration {
|
||||||
|
/**
|
||||||
|
* The maximum edit distance used by BWA.
|
||||||
|
*/
|
||||||
|
public Float maximumEditDistance = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many gap opens are acceptable within this alignment?
|
||||||
|
*/
|
||||||
|
public Integer maximumGapOpens = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many gap extensions are acceptable within this alignment?
|
||||||
|
*/
|
||||||
|
public Integer maximumGapExtensions = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do we disallow indels within a certain range from the start / end?
|
||||||
|
*/
|
||||||
|
public Integer disallowIndelWithinRange = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* What is the scoring penalty for a mismatch?
|
||||||
|
*/
|
||||||
|
public Integer mismatchPenalty = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* What is the scoring penalty for a gap open?
|
||||||
|
*/
|
||||||
|
public Integer gapOpenPenalty = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* What is the scoring penalty for a gap extension?
|
||||||
|
*/
|
||||||
|
public Integer gapExtensionPenalty = null;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,234 @@
|
||||||
|
package org.broadinstitute.sting.alignment.bwa;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.bwt.*;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Support files for BWT.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class BWTFiles {
|
||||||
|
/**
|
||||||
|
* ANN (?) file name.
|
||||||
|
*/
|
||||||
|
public final File annFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AMB (?) file name.
|
||||||
|
*/
|
||||||
|
public final File ambFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Packed reference sequence file.
|
||||||
|
*/
|
||||||
|
public final File pacFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reverse of packed reference sequence file.
|
||||||
|
*/
|
||||||
|
public final File rpacFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Forward BWT file.
|
||||||
|
*/
|
||||||
|
public final File forwardBWTFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Forward suffix array file.
|
||||||
|
*/
|
||||||
|
public final File forwardSAFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reverse BWT file.
|
||||||
|
*/
|
||||||
|
public final File reverseBWTFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reverse suffix array file.
|
||||||
|
*/
|
||||||
|
public final File reverseSAFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Where these files autogenerated on the fly?
|
||||||
|
*/
|
||||||
|
public final boolean autogenerated;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new BWA configuration file using the given prefix.
|
||||||
|
* @param prefix Prefix to use when creating the configuration. Must not be null.
|
||||||
|
*/
|
||||||
|
public BWTFiles(String prefix) {
|
||||||
|
if(prefix == null)
|
||||||
|
throw new ReviewedStingException("Prefix must not be null.");
|
||||||
|
annFile = new File(prefix + ".ann");
|
||||||
|
ambFile = new File(prefix + ".amb");
|
||||||
|
pacFile = new File(prefix + ".pac");
|
||||||
|
rpacFile = new File(prefix + ".rpac");
|
||||||
|
forwardBWTFile = new File(prefix + ".bwt");
|
||||||
|
forwardSAFile = new File(prefix + ".sa");
|
||||||
|
reverseBWTFile = new File(prefix + ".rbwt");
|
||||||
|
reverseSAFile = new File(prefix + ".rsa");
|
||||||
|
autogenerated = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hand-create a new BWTFiles object, specifying a unique file object for each type.
|
||||||
|
* @param annFile ANN (alternate dictionary) file.
|
||||||
|
* @param ambFile AMB (holes) files.
|
||||||
|
* @param pacFile Packed representation of the forward reference sequence.
|
||||||
|
* @param forwardBWTFile BWT representation of the forward reference sequence.
|
||||||
|
* @param forwardSAFile SA representation of the forward reference sequence.
|
||||||
|
* @param rpacFile Packed representation of the reversed reference sequence.
|
||||||
|
* @param reverseBWTFile BWT representation of the reversed reference sequence.
|
||||||
|
* @param reverseSAFile SA representation of the reversed reference sequence.
|
||||||
|
*/
|
||||||
|
private BWTFiles(File annFile,
|
||||||
|
File ambFile,
|
||||||
|
File pacFile,
|
||||||
|
File forwardBWTFile,
|
||||||
|
File forwardSAFile,
|
||||||
|
File rpacFile,
|
||||||
|
File reverseBWTFile,
|
||||||
|
File reverseSAFile) {
|
||||||
|
this.annFile = annFile;
|
||||||
|
this.ambFile = ambFile;
|
||||||
|
this.pacFile = pacFile;
|
||||||
|
this.forwardBWTFile = forwardBWTFile;
|
||||||
|
this.forwardSAFile = forwardSAFile;
|
||||||
|
this.rpacFile = rpacFile;
|
||||||
|
this.reverseBWTFile = reverseBWTFile;
|
||||||
|
this.reverseSAFile = reverseSAFile;
|
||||||
|
autogenerated = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close out this files object, in the process deleting any temporary filse
|
||||||
|
* that were created.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
if(autogenerated) {
|
||||||
|
boolean success = true;
|
||||||
|
success = annFile.delete();
|
||||||
|
success &= ambFile.delete();
|
||||||
|
success &= pacFile.delete();
|
||||||
|
success &= forwardBWTFile.delete();
|
||||||
|
success &= forwardSAFile.delete();
|
||||||
|
success &= rpacFile.delete();
|
||||||
|
success &= reverseBWTFile.delete();
|
||||||
|
success &= reverseSAFile.delete();
|
||||||
|
|
||||||
|
if(!success)
|
||||||
|
throw new ReviewedStingException("Unable to clean up autogenerated representation");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new set of BWT files from the given reference sequence.
|
||||||
|
* @param referenceSequence Sequence from which to build metadata.
|
||||||
|
* @return A new object representing encoded representations of each sequence.
|
||||||
|
*/
|
||||||
|
public static BWTFiles createFromReferenceSequence(byte[] referenceSequence) {
|
||||||
|
byte[] normalizedReferenceSequence = new byte[referenceSequence.length];
|
||||||
|
System.arraycopy(referenceSequence,0,normalizedReferenceSequence,0,referenceSequence.length);
|
||||||
|
normalizeReferenceSequence(normalizedReferenceSequence);
|
||||||
|
|
||||||
|
File annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile;
|
||||||
|
try {
|
||||||
|
// Write the ann and amb for this reference sequence.
|
||||||
|
annFile = File.createTempFile("bwt",".ann");
|
||||||
|
ambFile = File.createTempFile("bwt",".amb");
|
||||||
|
|
||||||
|
SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
|
||||||
|
dictionary.addSequence(new SAMSequenceRecord("autogenerated",normalizedReferenceSequence.length));
|
||||||
|
|
||||||
|
ANNWriter annWriter = new ANNWriter(annFile);
|
||||||
|
annWriter.write(dictionary);
|
||||||
|
annWriter.close();
|
||||||
|
|
||||||
|
AMBWriter ambWriter = new AMBWriter(ambFile);
|
||||||
|
ambWriter.writeEmpty(dictionary);
|
||||||
|
ambWriter.close();
|
||||||
|
|
||||||
|
// Write the encoded files for the forward version of this reference sequence.
|
||||||
|
pacFile = File.createTempFile("bwt",".pac");
|
||||||
|
bwtFile = File.createTempFile("bwt",".bwt");
|
||||||
|
saFile = File.createTempFile("bwt",".sa");
|
||||||
|
|
||||||
|
writeEncodedReferenceSequence(normalizedReferenceSequence,pacFile,bwtFile,saFile);
|
||||||
|
|
||||||
|
// Write the encoded files for the reverse version of this reference sequence.
|
||||||
|
byte[] reverseReferenceSequence = Utils.reverse(normalizedReferenceSequence);
|
||||||
|
|
||||||
|
rpacFile = File.createTempFile("bwt",".rpac");
|
||||||
|
rbwtFile = File.createTempFile("bwt",".rbwt");
|
||||||
|
rsaFile = File.createTempFile("bwt",".rsa");
|
||||||
|
|
||||||
|
writeEncodedReferenceSequence(reverseReferenceSequence,rpacFile,rbwtFile,rsaFile);
|
||||||
|
}
|
||||||
|
catch(IOException ex) {
|
||||||
|
throw new ReviewedStingException("Unable to write autogenerated reference sequence to temporary files");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure that, at the very least, all temporary files are deleted on exit.
|
||||||
|
annFile.deleteOnExit();
|
||||||
|
ambFile.deleteOnExit();
|
||||||
|
pacFile.deleteOnExit();
|
||||||
|
bwtFile.deleteOnExit();
|
||||||
|
saFile.deleteOnExit();
|
||||||
|
rpacFile.deleteOnExit();
|
||||||
|
rbwtFile.deleteOnExit();
|
||||||
|
rsaFile.deleteOnExit();
|
||||||
|
|
||||||
|
return new BWTFiles(annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write the encoded form of the reference sequence. In the case of BWA, the encoded reference
|
||||||
|
* sequence is the reference itself in PAC format, the BWT, and the suffix array.
|
||||||
|
* @param referenceSequence The reference sequence to encode.
|
||||||
|
* @param pacFile Target for the PAC-encoded reference.
|
||||||
|
* @param bwtFile Target for the BWT representation of the reference.
|
||||||
|
* @param suffixArrayFile Target for the suffix array encoding of the reference.
|
||||||
|
* @throws java.io.IOException In case of issues writing to the file.
|
||||||
|
*/
|
||||||
|
private static void writeEncodedReferenceSequence(byte[] referenceSequence,
|
||||||
|
File pacFile,
|
||||||
|
File bwtFile,
|
||||||
|
File suffixArrayFile) throws IOException {
|
||||||
|
PackUtils.writeReferenceSequence(pacFile,referenceSequence);
|
||||||
|
|
||||||
|
BWT bwt = BWT.createFromReferenceSequence(referenceSequence);
|
||||||
|
BWTWriter bwtWriter = new BWTWriter(bwtFile);
|
||||||
|
bwtWriter.write(bwt);
|
||||||
|
bwtWriter.close();
|
||||||
|
|
||||||
|
SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence);
|
||||||
|
SuffixArrayWriter suffixArrayWriter = new SuffixArrayWriter(suffixArrayFile);
|
||||||
|
suffixArrayWriter.write(suffixArray);
|
||||||
|
suffixArrayWriter.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert the given reference sequence into a form suitable for building into
|
||||||
|
* on-the-fly sequences.
|
||||||
|
* @param referenceSequence The reference sequence to normalize.
|
||||||
|
* @throws org.broadinstitute.sting.utils.exceptions.ReviewedStingException if normalized sequence cannot be generated.
|
||||||
|
*/
|
||||||
|
private static void normalizeReferenceSequence(byte[] referenceSequence) {
|
||||||
|
StringUtil.toUpperCase(referenceSequence);
|
||||||
|
for(byte base: referenceSequence) {
|
||||||
|
if(base != 'A' && base != 'C' && base != 'G' && base != 'T')
|
||||||
|
throw new ReviewedStingException(String.format("Base type %c is not supported when building references on-the-fly",(char)base));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,259 @@
|
||||||
|
package org.broadinstitute.sting.alignment.bwa.c;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.alignment.Alignment;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An aligner using the BWA/C implementation.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class BWACAligner extends BWAAligner {
|
||||||
|
static {
|
||||||
|
System.loadLibrary("bwa");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A pointer to the C++ object representing the BWA engine.
|
||||||
|
*/
|
||||||
|
private long thunkPointer = 0;
|
||||||
|
|
||||||
|
public BWACAligner(BWTFiles bwtFiles, BWAConfiguration configuration) {
|
||||||
|
super(bwtFiles,configuration);
|
||||||
|
if(thunkPointer != 0)
|
||||||
|
throw new ReviewedStingException("BWA/C attempting to reinitialize.");
|
||||||
|
|
||||||
|
if(!bwtFiles.annFile.exists()) throw new ReviewedStingException("ANN file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||||
|
if(!bwtFiles.ambFile.exists()) throw new ReviewedStingException("AMB file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||||
|
if(!bwtFiles.pacFile.exists()) throw new ReviewedStingException("PAC file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||||
|
if(!bwtFiles.forwardBWTFile.exists()) throw new ReviewedStingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||||
|
if(!bwtFiles.forwardSAFile.exists()) throw new ReviewedStingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||||
|
if(!bwtFiles.reverseBWTFile.exists()) throw new ReviewedStingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||||
|
if(!bwtFiles.reverseSAFile.exists()) throw new ReviewedStingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||||
|
|
||||||
|
thunkPointer = create(bwtFiles,configuration);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an aligner object using an array of bytes as a reference.
|
||||||
|
* @param referenceSequence Reference sequence to encode ad-hoc.
|
||||||
|
* @param configuration Configuration for the given aligner.
|
||||||
|
*/
|
||||||
|
public BWACAligner(byte[] referenceSequence, BWAConfiguration configuration) {
|
||||||
|
this(BWTFiles.createFromReferenceSequence(referenceSequence),configuration);
|
||||||
|
// Now that the temporary files are created, the temporary files can be destroyed.
|
||||||
|
bwtFiles.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the configuration passed to the BWA aligner.
|
||||||
|
* @param configuration New configuration to set.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void updateConfiguration(BWAConfiguration configuration) {
|
||||||
|
if(thunkPointer == 0)
|
||||||
|
throw new ReviewedStingException("BWA/C: attempting to update configuration of uninitialized aligner.");
|
||||||
|
updateConfiguration(thunkPointer,configuration);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close this instance of the BWA pointer and delete its resources.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
if(thunkPointer == 0)
|
||||||
|
throw new ReviewedStingException("BWA/C close attempted, but BWA/C is not properly initialized.");
|
||||||
|
destroy(thunkPointer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allow the aligner to choose one alignment randomly from the pile of best alignments.
|
||||||
|
* @param bases Bases to align.
|
||||||
|
* @return An align
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Alignment getBestAlignment(final byte[] bases) {
|
||||||
|
if(thunkPointer == 0)
|
||||||
|
throw new ReviewedStingException("BWA/C getBestAlignment attempted, but BWA/C is not properly initialized.");
|
||||||
|
return getBestAlignment(thunkPointer,bases);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the best aligned read, chosen randomly from the pile of best alignments.
|
||||||
|
* @param read Read to align.
|
||||||
|
* @param newHeader New header to apply to this SAM file. Can be null, but if so, read header must be valid.
|
||||||
|
* @return Read with injected alignment data.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public SAMRecord align(final SAMRecord read, final SAMFileHeader newHeader) {
|
||||||
|
if(bwtFiles.autogenerated)
|
||||||
|
throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable");
|
||||||
|
return Alignment.convertToRead(getBestAlignment(read.getReadBases()),read,newHeader);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a iterator of alignments, batched by mapping quality.
|
||||||
|
* @param bases List of bases.
|
||||||
|
* @return Iterator to alignments.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Iterable<Alignment[]> getAllAlignments(final byte[] bases) {
|
||||||
|
final BWAPath[] paths = getPaths(bases);
|
||||||
|
return new Iterable<Alignment[]>() {
|
||||||
|
public Iterator<Alignment[]> iterator() {
|
||||||
|
return new Iterator<Alignment[]>() {
|
||||||
|
/**
|
||||||
|
* The last position accessed.
|
||||||
|
*/
|
||||||
|
private int position = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether all alignments have been seen based on the current position.
|
||||||
|
* @return True if any more alignments are pending. False otherwise.
|
||||||
|
*/
|
||||||
|
public boolean hasNext() { return position < paths.length; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the next cross-section of alignments, based on mapping quality.
|
||||||
|
* @return Array of the next set of alignments of a given mapping quality.
|
||||||
|
*/
|
||||||
|
public Alignment[] next() {
|
||||||
|
if(position >= paths.length)
|
||||||
|
throw new UnsupportedOperationException("Out of alignments to return.");
|
||||||
|
int score = paths[position].score;
|
||||||
|
int startingPosition = position;
|
||||||
|
while(position < paths.length && paths[position].score == score) position++;
|
||||||
|
return convertPathsToAlignments(bases,Arrays.copyOfRange(paths,startingPosition,position));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unsupported.
|
||||||
|
*/
|
||||||
|
public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); }
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a iterator of aligned reads, batched by mapping quality.
|
||||||
|
* @param read Read to align.
|
||||||
|
* @param newHeader Optional new header to use when aligning the read. If present, it must be null.
|
||||||
|
* @return Iterator to alignments.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Iterable<SAMRecord[]> alignAll(final SAMRecord read, final SAMFileHeader newHeader) {
|
||||||
|
if(bwtFiles.autogenerated)
|
||||||
|
throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable");
|
||||||
|
final Iterable<Alignment[]> alignments = getAllAlignments(read.getReadBases());
|
||||||
|
return new Iterable<SAMRecord[]>() {
|
||||||
|
public Iterator<SAMRecord[]> iterator() {
|
||||||
|
final Iterator<Alignment[]> alignmentIterator = alignments.iterator();
|
||||||
|
return new Iterator<SAMRecord[]>() {
|
||||||
|
/**
|
||||||
|
* Whether all alignments have been seen based on the current position.
|
||||||
|
* @return True if any more alignments are pending. False otherwise.
|
||||||
|
*/
|
||||||
|
public boolean hasNext() { return alignmentIterator.hasNext(); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the next cross-section of alignments, based on mapping quality.
|
||||||
|
* @return Array of the next set of alignments of a given mapping quality.
|
||||||
|
*/
|
||||||
|
public SAMRecord[] next() {
|
||||||
|
Alignment[] alignmentsOfQuality = alignmentIterator.next();
|
||||||
|
SAMRecord[] reads = new SAMRecord[alignmentsOfQuality.length];
|
||||||
|
for(int i = 0; i < alignmentsOfQuality.length; i++) {
|
||||||
|
reads[i] = Alignment.convertToRead(alignmentsOfQuality[i],read,newHeader);
|
||||||
|
}
|
||||||
|
return reads;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unsupported.
|
||||||
|
*/
|
||||||
|
public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); }
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the paths associated with the given base string.
|
||||||
|
* @param bases List of bases.
|
||||||
|
* @return A set of paths through the BWA.
|
||||||
|
*/
|
||||||
|
public BWAPath[] getPaths(byte[] bases) {
|
||||||
|
if(thunkPointer == 0)
|
||||||
|
throw new ReviewedStingException("BWA/C getPaths attempted, but BWA/C is not properly initialized.");
|
||||||
|
return getPaths(thunkPointer,bases);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a pointer to the BWA/C thunk.
|
||||||
|
* @param files BWT source files.
|
||||||
|
* @param configuration Configuration of the aligner.
|
||||||
|
* @return Pointer to the BWA/C thunk.
|
||||||
|
*/
|
||||||
|
protected native long create(BWTFiles files, BWAConfiguration configuration);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the configuration passed to the BWA aligner. For internal use only.
|
||||||
|
* @param thunkPointer pointer to BWA object.
|
||||||
|
* @param configuration New configuration to set.
|
||||||
|
*/
|
||||||
|
protected native void updateConfiguration(long thunkPointer, BWAConfiguration configuration);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Destroy the BWA/C thunk.
|
||||||
|
* @param thunkPointer Pointer to the allocated thunk.
|
||||||
|
*/
|
||||||
|
protected native void destroy(long thunkPointer);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do the extra steps involved in converting a local alignment to a global alignment.
|
||||||
|
* @param bases ASCII representation of byte array.
|
||||||
|
* @param paths Paths through the current BWT.
|
||||||
|
* @return A list of alignments.
|
||||||
|
*/
|
||||||
|
protected Alignment[] convertPathsToAlignments(byte[] bases, BWAPath[] paths) {
|
||||||
|
if(thunkPointer == 0)
|
||||||
|
throw new ReviewedStingException("BWA/C convertPathsToAlignments attempted, but BWA/C is not properly initialized.");
|
||||||
|
return convertPathsToAlignments(thunkPointer,bases,paths);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Caller to the path generation functionality within BWA/C. Call this method's getPaths() wrapper (above) instead.
|
||||||
|
* @param thunkPointer pointer to the C++ object managing BWA/C.
|
||||||
|
* @param bases ASCII representation of byte array.
|
||||||
|
* @return A list of paths through the specified BWT.
|
||||||
|
*/
|
||||||
|
protected native BWAPath[] getPaths(long thunkPointer, byte[] bases);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do the extra steps involved in converting a local alignment to a global alignment.
|
||||||
|
* Call this method's convertPathsToAlignments() wrapper (above) instead.
|
||||||
|
* @param thunkPointer pointer to the C++ object managing BWA/C.
|
||||||
|
* @param bases ASCII representation of byte array.
|
||||||
|
* @param paths Paths through the current BWT.
|
||||||
|
* @return A list of alignments.
|
||||||
|
*/
|
||||||
|
protected native Alignment[] convertPathsToAlignments(long thunkPointer, byte[] bases, BWAPath[] paths);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the best alignment from BWA/C, randomly selected from all best-aligned reads.
|
||||||
|
* @param thunkPointer Pointer to BWA thunk.
|
||||||
|
* @param bases bases to align.
|
||||||
|
* @return The best alignment from BWA/C.
|
||||||
|
*/
|
||||||
|
protected native Alignment getBestAlignment(long thunkPointer, byte[] bases);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,101 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.alignment.bwa.c;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Models a BWA path.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class BWAPath {
|
||||||
|
/**
|
||||||
|
* Number of mismatches encountered along this path.
|
||||||
|
*/
|
||||||
|
public final int numMismatches;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of gap opens encountered along this path.
|
||||||
|
*/
|
||||||
|
public final int numGapOpens;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of gap extensions along this path.
|
||||||
|
*/
|
||||||
|
public final int numGapExtensions;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether this alignment was found on the positive or negative strand.
|
||||||
|
*/
|
||||||
|
public final boolean negativeStrand;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starting coordinate in the BWT.
|
||||||
|
*/
|
||||||
|
public final long k;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ending coordinate in the BWT.
|
||||||
|
*/
|
||||||
|
public final long l;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The score of this path.
|
||||||
|
*/
|
||||||
|
public final int score;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of best alignments seen along this path.
|
||||||
|
*/
|
||||||
|
public final int bestCount;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of second best alignments seen along this path.
|
||||||
|
*/
|
||||||
|
public final int secondBestCount;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new path with the given attributes.
|
||||||
|
* @param numMismatches Number of mismatches along path.
|
||||||
|
* @param numGapOpens Number of gap opens along path.
|
||||||
|
* @param numGapExtensions Number of gap extensions along path.
|
||||||
|
* @param k Index to first coordinate within BWT.
|
||||||
|
* @param l Index to last coordinate within BWT.
|
||||||
|
* @param score Score of this alignment. Not the mapping quality.
|
||||||
|
*/
|
||||||
|
public BWAPath(int numMismatches, int numGapOpens, int numGapExtensions, boolean negativeStrand, long k, long l, int score, int bestCount, int secondBestCount) {
|
||||||
|
this.numMismatches = numMismatches;
|
||||||
|
this.numGapOpens = numGapOpens;
|
||||||
|
this.numGapExtensions = numGapExtensions;
|
||||||
|
this.negativeStrand = negativeStrand;
|
||||||
|
this.k = k;
|
||||||
|
this.l = l;
|
||||||
|
this.score = score;
|
||||||
|
this.bestCount = bestCount;
|
||||||
|
this.secondBestCount = secondBestCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,164 @@
|
||||||
|
package org.broadinstitute.sting.alignment.bwa.java;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import net.sf.samtools.*;
|
||||||
|
import org.broadinstitute.sting.alignment.Aligner;
|
||||||
|
import org.broadinstitute.sting.alignment.Alignment;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A test harness to ensure that the perfect aligner works.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class AlignerTestHarness {
|
||||||
|
public static void main( String argv[] ) throws FileNotFoundException {
|
||||||
|
if( argv.length != 6 ) {
|
||||||
|
System.out.println("PerfectAlignerTestHarness <fasta> <bwt> <rbwt> <sa> <rsa> <bam>");
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
File referenceFile = new File(argv[0]);
|
||||||
|
File bwtFile = new File(argv[1]);
|
||||||
|
File rbwtFile = new File(argv[2]);
|
||||||
|
File suffixArrayFile = new File(argv[3]);
|
||||||
|
File reverseSuffixArrayFile = new File(argv[4]);
|
||||||
|
File bamFile = new File(argv[5]);
|
||||||
|
|
||||||
|
align(referenceFile,bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile,bamFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void align(File referenceFile, File bwtFile, File rbwtFile, File suffixArrayFile, File reverseSuffixArrayFile, File bamFile) throws FileNotFoundException {
|
||||||
|
Aligner aligner = new BWAJavaAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile);
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
SAMFileReader reader = new SAMFileReader(bamFile);
|
||||||
|
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
||||||
|
|
||||||
|
int mismatches = 0;
|
||||||
|
int failures = 0;
|
||||||
|
|
||||||
|
for(SAMRecord read: reader) {
|
||||||
|
count++;
|
||||||
|
if( count > 200000 ) break;
|
||||||
|
//if( count < 366000 ) continue;
|
||||||
|
//if( count > 2 ) break;
|
||||||
|
//if( !read.getReadName().endsWith("SL-XBC:1:82:506:404#0") )
|
||||||
|
// continue;
|
||||||
|
//if( !read.getReadName().endsWith("SL-XBC:1:36:30:1926#0") )
|
||||||
|
// continue;
|
||||||
|
//if( !read.getReadName().endsWith("SL-XBC:1:60:1342:1340#0") )
|
||||||
|
// continue;
|
||||||
|
|
||||||
|
SAMRecord alignmentCleaned = null;
|
||||||
|
try {
|
||||||
|
alignmentCleaned = (SAMRecord)read.clone();
|
||||||
|
}
|
||||||
|
catch( CloneNotSupportedException ex ) {
|
||||||
|
throw new ReviewedStingException("SAMRecord clone not supported", ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( alignmentCleaned.getReadNegativeStrandFlag() )
|
||||||
|
alignmentCleaned.setReadBases(BaseUtils.simpleReverseComplement(alignmentCleaned.getReadBases()));
|
||||||
|
|
||||||
|
alignmentCleaned.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
|
||||||
|
alignmentCleaned.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
|
||||||
|
alignmentCleaned.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
|
||||||
|
alignmentCleaned.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
|
||||||
|
|
||||||
|
// Clear everything except flags pertaining to pairing and set 'unmapped' status to true.
|
||||||
|
alignmentCleaned.setFlags(alignmentCleaned.getFlags() & 0x00A1 | 0x000C);
|
||||||
|
|
||||||
|
Iterable<Alignment[]> alignments = aligner.getAllAlignments(alignmentCleaned.getReadBases());
|
||||||
|
if(!alignments.iterator().hasNext() ) {
|
||||||
|
//throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));
|
||||||
|
System.out.printf("Unable to align read %s to reference; count = %d%n",read.getReadName(),count);
|
||||||
|
failures++;
|
||||||
|
}
|
||||||
|
|
||||||
|
Alignment foundAlignment = null;
|
||||||
|
for(Alignment[] alignmentsOfQuality: alignments) {
|
||||||
|
for(Alignment alignment: alignmentsOfQuality) {
|
||||||
|
if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() )
|
||||||
|
continue;
|
||||||
|
if( read.getAlignmentStart() != alignment.getAlignmentStart() )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
foundAlignment = alignment;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( foundAlignment != null ) {
|
||||||
|
//System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), foundAlignment.getAlignmentStart(), foundAlignment.getMismatches(), foundAlignment.getGapOpens(), foundAlignment.getGapExtensions());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
System.out.printf("Error aligning read %s%n", read.getReadName());
|
||||||
|
|
||||||
|
mismatches++;
|
||||||
|
|
||||||
|
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
||||||
|
|
||||||
|
System.out.printf("read = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(read.getReadString(),read.getCigar(),CigarOperator.DELETION),
|
||||||
|
read.getAlignmentStart(),
|
||||||
|
read.getReadNegativeStrandFlag());
|
||||||
|
int numDeletions = numDeletionsInCigar(read.getCigar());
|
||||||
|
String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()+numDeletions-1).getBases());
|
||||||
|
System.out.printf("expected ref = %s%n", formatBasesBasedOnCigar(expectedRef,read.getCigar(),CigarOperator.INSERTION));
|
||||||
|
|
||||||
|
for(Alignment[] alignmentsOfQuality: alignments) {
|
||||||
|
for(Alignment alignment: alignmentsOfQuality) {
|
||||||
|
System.out.println();
|
||||||
|
|
||||||
|
Cigar cigar = ((BWAAlignment)alignment).getCigar();
|
||||||
|
|
||||||
|
System.out.printf("read = %s%n", formatBasesBasedOnCigar(read.getReadString(),cigar,CigarOperator.DELETION));
|
||||||
|
|
||||||
|
int deletionCount = ((BWAAlignment)alignment).getNumberOfBasesMatchingState(AlignmentState.DELETION);
|
||||||
|
String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignment.getAlignmentStart(),alignment.getAlignmentStart()+read.getReadLength()+deletionCount-1).getBases());
|
||||||
|
System.out.printf("actual ref = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(alignedRef,cigar,CigarOperator.INSERTION),
|
||||||
|
alignment.getAlignmentStart(),
|
||||||
|
alignment.isNegativeStrand());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//throw new StingException(String.format("Read %s was placed at incorrect location; count = %d%n",read.getReadName(),count));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if( count % 1000 == 0 )
|
||||||
|
System.out.printf("%d reads examined.%n",count);
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.printf("%d reads examined; %d mismatches; %d failures.%n",count,mismatches,failures);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String formatBasesBasedOnCigar( String bases, Cigar cigar, CigarOperator toBlank ) {
|
||||||
|
StringBuilder formatted = new StringBuilder();
|
||||||
|
int readIndex = 0;
|
||||||
|
for(CigarElement cigarElement: cigar.getCigarElements()) {
|
||||||
|
if(cigarElement.getOperator() == toBlank) {
|
||||||
|
int number = cigarElement.getLength();
|
||||||
|
while( number-- > 0 ) formatted.append(' ');
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int number = cigarElement.getLength();
|
||||||
|
while( number-- > 0 ) formatted.append(bases.charAt(readIndex++));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return formatted.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int numDeletionsInCigar( Cigar cigar ) {
|
||||||
|
int numDeletions = 0;
|
||||||
|
for(CigarElement cigarElement: cigar.getCigarElements()) {
|
||||||
|
if(cigarElement.getOperator() == CigarOperator.DELETION)
|
||||||
|
numDeletions += cigarElement.getLength();
|
||||||
|
}
|
||||||
|
return numDeletions;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,150 @@
|
||||||
|
package org.broadinstitute.sting.alignment.bwa.java;
|
||||||
|
|
||||||
|
import net.sf.samtools.Cigar;
|
||||||
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.CigarOperator;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.util.ArrayDeque;
|
||||||
|
import java.util.Deque;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a sequence of matches.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class AlignmentMatchSequence implements Cloneable {
|
||||||
|
/**
|
||||||
|
* Stores the particular match entries in the order they occur.
|
||||||
|
*/
|
||||||
|
private Deque<AlignmentMatchSequenceEntry> entries = new ArrayDeque<AlignmentMatchSequenceEntry>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clone the given match sequence.
|
||||||
|
* @return A deep copy of the current match sequence.
|
||||||
|
*/
|
||||||
|
public AlignmentMatchSequence clone() {
|
||||||
|
AlignmentMatchSequence copy = null;
|
||||||
|
try {
|
||||||
|
copy = (AlignmentMatchSequence)super.clone();
|
||||||
|
}
|
||||||
|
catch( CloneNotSupportedException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to clone AlignmentMatchSequence.");
|
||||||
|
}
|
||||||
|
|
||||||
|
copy.entries = new ArrayDeque<AlignmentMatchSequenceEntry>();
|
||||||
|
for( AlignmentMatchSequenceEntry entry: entries )
|
||||||
|
copy.entries.add(entry.clone());
|
||||||
|
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Cigar convertToCigar(boolean negativeStrand) {
|
||||||
|
Cigar cigar = new Cigar();
|
||||||
|
Iterator<AlignmentMatchSequenceEntry> iterator = negativeStrand ? entries.descendingIterator() : entries.iterator();
|
||||||
|
while( iterator.hasNext() ) {
|
||||||
|
AlignmentMatchSequenceEntry entry = iterator.next();
|
||||||
|
CigarOperator operator;
|
||||||
|
switch( entry.getAlignmentState() ) {
|
||||||
|
case MATCH_MISMATCH: operator = CigarOperator.MATCH_OR_MISMATCH; break;
|
||||||
|
case INSERTION: operator = CigarOperator.INSERTION; break;
|
||||||
|
case DELETION: operator = CigarOperator.DELETION; break;
|
||||||
|
default: throw new ReviewedStingException("convertToCigar: cannot process state: " + entry.getAlignmentState());
|
||||||
|
}
|
||||||
|
cigar.add( new CigarElement(entry.count,operator) );
|
||||||
|
}
|
||||||
|
return cigar;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* All a new alignment of the given state.
|
||||||
|
* @param state State to add to the sequence.
|
||||||
|
*/
|
||||||
|
public void addNext( AlignmentState state ) {
|
||||||
|
AlignmentMatchSequenceEntry last = entries.peekLast();
|
||||||
|
// If the last entry is the same as this one, increment it. Otherwise, add a new entry.
|
||||||
|
if( last != null && last.alignmentState == state )
|
||||||
|
last.increment();
|
||||||
|
else
|
||||||
|
entries.add(new AlignmentMatchSequenceEntry(state));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the current state of this alignment (what's the state of the last base?)
|
||||||
|
* @return State of the most recently aligned base.
|
||||||
|
*/
|
||||||
|
public AlignmentState getCurrentState() {
|
||||||
|
if( entries.size() == 0 )
|
||||||
|
return AlignmentState.MATCH_MISMATCH;
|
||||||
|
return entries.peekLast().getAlignmentState();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bases in the read match the given state.
|
||||||
|
* @param state State to test.
|
||||||
|
* @return number of bases which match that state.
|
||||||
|
*/
|
||||||
|
public int getNumberOfBasesMatchingState(AlignmentState state) {
|
||||||
|
int matches = 0;
|
||||||
|
for( AlignmentMatchSequenceEntry entry: entries ) {
|
||||||
|
if( entry.getAlignmentState() == state )
|
||||||
|
matches += entry.count;
|
||||||
|
}
|
||||||
|
return matches;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stores an individual match sequence entry.
|
||||||
|
*/
|
||||||
|
private class AlignmentMatchSequenceEntry implements Cloneable {
|
||||||
|
/**
|
||||||
|
* The state of the alignment throughout a given point in the sequence.
|
||||||
|
*/
|
||||||
|
private final AlignmentState alignmentState;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of bases having this particular state.
|
||||||
|
*/
|
||||||
|
private int count;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new sequence entry with the given state.
|
||||||
|
* @param alignmentState The state that this sequence should contain.
|
||||||
|
*/
|
||||||
|
AlignmentMatchSequenceEntry( AlignmentState alignmentState ) {
|
||||||
|
this.alignmentState = alignmentState;
|
||||||
|
this.count = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clone the given match sequence entry.
|
||||||
|
* @return A deep copy of the current match sequence entry.
|
||||||
|
*/
|
||||||
|
public AlignmentMatchSequenceEntry clone() {
|
||||||
|
try {
|
||||||
|
return (AlignmentMatchSequenceEntry)super.clone();
|
||||||
|
}
|
||||||
|
catch( CloneNotSupportedException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to clone AlignmentMatchSequenceEntry.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the current state of the alignment.
|
||||||
|
* @return The state of the current sequence.
|
||||||
|
*/
|
||||||
|
AlignmentState getAlignmentState() {
|
||||||
|
return alignmentState;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment the count of alignments having this particular state.
|
||||||
|
*/
|
||||||
|
void increment() {
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
package org.broadinstitute.sting.alignment.bwa.java;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The state of a given base in the alignment.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public enum AlignmentState {
|
||||||
|
MATCH_MISMATCH,
|
||||||
|
INSERTION,
|
||||||
|
DELETION
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,190 @@
|
||||||
|
package org.broadinstitute.sting.alignment.bwa.java;
|
||||||
|
|
||||||
|
import net.sf.samtools.Cigar;
|
||||||
|
import org.broadinstitute.sting.alignment.Alignment;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An alignment object to be used incrementally as the BWA aligner
|
||||||
|
* inspects the read.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class BWAAlignment extends Alignment implements Cloneable {
|
||||||
|
/**
|
||||||
|
* Track the number of alignments that have been created.
|
||||||
|
*/
|
||||||
|
private static long numCreated;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Which number alignment is this?
|
||||||
|
*/
|
||||||
|
private long creationNumber;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The aligner performing the alignments.
|
||||||
|
*/
|
||||||
|
protected BWAJavaAligner aligner;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The sequence of matches/mismatches/insertions/deletions.
|
||||||
|
*/
|
||||||
|
private AlignmentMatchSequence alignmentMatchSequence = new AlignmentMatchSequence();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Working variable. How many bases have been matched at this point.
|
||||||
|
*/
|
||||||
|
protected int position;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Working variable. How many mismatches have been encountered at this point.
|
||||||
|
*/
|
||||||
|
private int mismatches;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of gap opens in alignment.
|
||||||
|
*/
|
||||||
|
private int gapOpens;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of gap extensions in alignment.
|
||||||
|
*/
|
||||||
|
private int gapExtensions;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Working variable. The lower bound of the alignment within the BWT.
|
||||||
|
*/
|
||||||
|
protected long loBound;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Working variable. The upper bound of the alignment within the BWT.
|
||||||
|
*/
|
||||||
|
protected long hiBound;
|
||||||
|
|
||||||
|
protected void setAlignmentStart(long position) {
|
||||||
|
this.alignmentStart = position;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void setNegativeStrand(boolean negativeStrand) {
|
||||||
|
this.negativeStrand = negativeStrand;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cache the score.
|
||||||
|
*/
|
||||||
|
private int score;
|
||||||
|
|
||||||
|
public Cigar getCigar() {
|
||||||
|
return alignmentMatchSequence.convertToCigar(isNegativeStrand());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the current state of this alignment (state of the last base viewed)..
|
||||||
|
* @return Current state of the alignment.
|
||||||
|
*/
|
||||||
|
public AlignmentState getCurrentState() {
|
||||||
|
return alignmentMatchSequence.getCurrentState();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds the given state to the current alignment.
|
||||||
|
* @param state State to add to the given alignment.
|
||||||
|
*/
|
||||||
|
public void addState( AlignmentState state ) {
|
||||||
|
alignmentMatchSequence.addNext(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the BWA score of this alignment.
|
||||||
|
* @return BWA-style scores. 0 is best.
|
||||||
|
*/
|
||||||
|
public int getScore() {
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getMismatches() { return mismatches; }
|
||||||
|
public int getGapOpens() { return gapOpens; }
|
||||||
|
public int getGapExtensions() { return gapExtensions; }
|
||||||
|
|
||||||
|
public void incrementMismatches() {
|
||||||
|
this.mismatches++;
|
||||||
|
updateScore();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrementGapOpens() {
|
||||||
|
this.gapOpens++;
|
||||||
|
updateScore();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrementGapExtensions() {
|
||||||
|
this.gapExtensions++;
|
||||||
|
updateScore();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates the score based on new information about matches / mismatches.
|
||||||
|
*/
|
||||||
|
private void updateScore() {
|
||||||
|
score = mismatches*aligner.MISMATCH_PENALTY + gapOpens*aligner.GAP_OPEN_PENALTY + gapExtensions*aligner.GAP_EXTENSION_PENALTY;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new alignment with the given parent aligner.
|
||||||
|
* @param aligner Aligner being used.
|
||||||
|
*/
|
||||||
|
public BWAAlignment( BWAJavaAligner aligner ) {
|
||||||
|
this.aligner = aligner;
|
||||||
|
this.creationNumber = numCreated++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clone the alignment.
|
||||||
|
* @return New instance of the alignment.
|
||||||
|
*/
|
||||||
|
public BWAAlignment clone() {
|
||||||
|
BWAAlignment newAlignment = null;
|
||||||
|
try {
|
||||||
|
newAlignment = (BWAAlignment)super.clone();
|
||||||
|
}
|
||||||
|
catch( CloneNotSupportedException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to clone BWAAlignment.");
|
||||||
|
}
|
||||||
|
newAlignment.creationNumber = numCreated++;
|
||||||
|
newAlignment.alignmentMatchSequence = alignmentMatchSequence.clone();
|
||||||
|
|
||||||
|
return newAlignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bases in the read match the given state.
|
||||||
|
* @param state State to test.
|
||||||
|
* @return number of bases which match that state.
|
||||||
|
*/
|
||||||
|
public int getNumberOfBasesMatchingState(AlignmentState state) {
|
||||||
|
return alignmentMatchSequence.getNumberOfBasesMatchingState(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compare this alignment to another alignment.
|
||||||
|
* @param rhs Other alignment to which to compare.
|
||||||
|
* @return < 0 if this < other, == 0 if this == other, > 0 if this > other
|
||||||
|
*/
|
||||||
|
public int compareTo(Alignment rhs) {
|
||||||
|
BWAAlignment other = (BWAAlignment)rhs;
|
||||||
|
|
||||||
|
// If the scores are different, disambiguate using the score.
|
||||||
|
if(score != other.score)
|
||||||
|
return score > other.score ? 1 : -1;
|
||||||
|
|
||||||
|
// Otherwise, use the order in which the elements were created.
|
||||||
|
if(creationNumber != other.creationNumber)
|
||||||
|
return creationNumber > other.creationNumber ? -1 : 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("position: %d, strand: %b, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d, creationNumber: %d", position, negativeStrand, alignmentMatchSequence.getCurrentState(), mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore(), creationNumber);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,393 @@
|
||||||
|
package org.broadinstitute.sting.alignment.bwa.java;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.alignment.Alignment;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.bwt.*;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.PriorityQueue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create imperfect alignments from the read to the genome represented by the given BWT / suffix array.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class BWAJavaAligner extends BWAAligner {
|
||||||
|
/**
|
||||||
|
* BWT in the forward direction.
|
||||||
|
*/
|
||||||
|
private BWT forwardBWT;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* BWT in the reverse direction.
|
||||||
|
*/
|
||||||
|
private BWT reverseBWT;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suffix array in the forward direction.
|
||||||
|
*/
|
||||||
|
private SuffixArray forwardSuffixArray;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suffix array in the reverse direction.
|
||||||
|
*/
|
||||||
|
private SuffixArray reverseSuffixArray;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum edit distance (-n option from original BWA).
|
||||||
|
*/
|
||||||
|
private final int MAXIMUM_EDIT_DISTANCE = 4;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum number of gap opens (-o option from original BWA).
|
||||||
|
*/
|
||||||
|
private final int MAXIMUM_GAP_OPENS = 1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum number of gap extensions (-e option from original BWA).
|
||||||
|
*/
|
||||||
|
private final int MAXIMUM_GAP_EXTENSIONS = 6;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Penalty for straight mismatches (-M option from original BWA).
|
||||||
|
*/
|
||||||
|
public final int MISMATCH_PENALTY = 3;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Penalty for gap opens (-O option from original BWA).
|
||||||
|
*/
|
||||||
|
public final int GAP_OPEN_PENALTY = 11;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Penalty for gap extensions (-E option from original BWA).
|
||||||
|
*/
|
||||||
|
public final int GAP_EXTENSION_PENALTY = 4;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Skip the ends of indels.
|
||||||
|
*/
|
||||||
|
public final int INDEL_END_SKIP = 5;
|
||||||
|
|
||||||
|
public BWAJavaAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) {
|
||||||
|
super(null,null);
|
||||||
|
forwardBWT = new BWTReader(forwardBWTFile).read();
|
||||||
|
reverseBWT = new BWTReader(reverseBWTFile).read();
|
||||||
|
forwardSuffixArray = new SuffixArrayReader(forwardSuffixArrayFile,forwardBWT).read();
|
||||||
|
reverseSuffixArray = new SuffixArrayReader(reverseSuffixArrayFile,reverseBWT).read();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close this instance of the BWA pointer and delete its resources.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
throw new UnsupportedOperationException("BWA aligner can't currently be closed.");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the current parameters of this aligner.
|
||||||
|
* @param configuration New configuration to set.
|
||||||
|
*/
|
||||||
|
public void updateConfiguration(BWAConfiguration configuration) {
|
||||||
|
throw new UnsupportedOperationException("Configuration of the BWA aligner can't currently be changed.");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allow the aligner to choose one alignment randomly from the pile of best alignments.
|
||||||
|
* @param bases Bases to align.
|
||||||
|
* @return An align
|
||||||
|
*/
|
||||||
|
public Alignment getBestAlignment(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Align the read to the reference.
|
||||||
|
* @param read Read to align.
|
||||||
|
* @param header Optional header to drop in place.
|
||||||
|
* @return A list of the alignments.
|
||||||
|
*/
|
||||||
|
public SAMRecord align(final SAMRecord read, final SAMFileHeader header) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a iterator of alignments, batched by mapping quality.
|
||||||
|
* @param bases List of bases.
|
||||||
|
* @return Iterator to alignments.
|
||||||
|
*/
|
||||||
|
public Iterable<Alignment[]> getAllAlignments(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a iterator of aligned reads, batched by mapping quality.
|
||||||
|
* @param read Read to align.
|
||||||
|
* @param newHeader Optional new header to use when aligning the read. If present, it must be null.
|
||||||
|
* @return Iterator to alignments.
|
||||||
|
*/
|
||||||
|
public Iterable<SAMRecord[]> alignAll(final SAMRecord read, final SAMFileHeader newHeader) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
|
||||||
|
|
||||||
|
|
||||||
|
public List<Alignment> align( SAMRecord read ) {
|
||||||
|
List<Alignment> successfulMatches = new ArrayList<Alignment>();
|
||||||
|
|
||||||
|
Byte[] uncomplementedBases = normalizeBases(read.getReadBases());
|
||||||
|
Byte[] complementedBases = normalizeBases(Utils.reverse(BaseUtils.simpleReverseComplement(read.getReadBases())));
|
||||||
|
|
||||||
|
List<LowerBound> forwardLowerBounds = LowerBound.create(uncomplementedBases,forwardBWT);
|
||||||
|
List<LowerBound> reverseLowerBounds = LowerBound.create(complementedBases,reverseBWT);
|
||||||
|
|
||||||
|
// Seed the best score with any score that won't overflow on comparison.
|
||||||
|
int bestScore = Integer.MAX_VALUE - MISMATCH_PENALTY;
|
||||||
|
int bestDiff = MAXIMUM_EDIT_DISTANCE+1;
|
||||||
|
int maxDiff = MAXIMUM_EDIT_DISTANCE;
|
||||||
|
|
||||||
|
PriorityQueue<BWAAlignment> alignments = new PriorityQueue<BWAAlignment>();
|
||||||
|
|
||||||
|
// Create a fictional initial alignment, with the position just off the end of the read, and the limits
|
||||||
|
// set as the entire BWT.
|
||||||
|
alignments.add(createSeedAlignment(reverseBWT));
|
||||||
|
alignments.add(createSeedAlignment(forwardBWT));
|
||||||
|
|
||||||
|
while(!alignments.isEmpty()) {
|
||||||
|
BWAAlignment alignment = alignments.remove();
|
||||||
|
|
||||||
|
// From bwtgap.c in the original BWT; if the rank is worse than the best score + the mismatch PENALTY, move on.
|
||||||
|
if( alignment.getScore() > bestScore + MISMATCH_PENALTY )
|
||||||
|
break;
|
||||||
|
|
||||||
|
Byte[] bases = alignment.isNegativeStrand() ? complementedBases : uncomplementedBases;
|
||||||
|
BWT bwt = alignment.isNegativeStrand() ? forwardBWT : reverseBWT;
|
||||||
|
List<LowerBound> lowerBounds = alignment.isNegativeStrand() ? reverseLowerBounds : forwardLowerBounds;
|
||||||
|
|
||||||
|
// if z < D(i) then return {}
|
||||||
|
int mismatches = maxDiff - alignment.getMismatches() - alignment.getGapOpens() - alignment.getGapExtensions();
|
||||||
|
if( alignment.position < lowerBounds.size()-1 && mismatches < lowerBounds.get(alignment.position+1).value )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if(mismatches == 0) {
|
||||||
|
exactMatch(alignment,bases,bwt);
|
||||||
|
if(alignment.loBound > alignment.hiBound)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Found a valid alignment; store it and move on.
|
||||||
|
if(alignment.position >= read.getReadLength()-1) {
|
||||||
|
for(long bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++) {
|
||||||
|
BWAAlignment finalAlignment = alignment.clone();
|
||||||
|
|
||||||
|
if( finalAlignment.isNegativeStrand() )
|
||||||
|
finalAlignment.setAlignmentStart(forwardSuffixArray.get(bwtIndex) + 1);
|
||||||
|
else {
|
||||||
|
int sizeAlongReference = read.getReadLength() -
|
||||||
|
finalAlignment.getNumberOfBasesMatchingState(AlignmentState.INSERTION) +
|
||||||
|
finalAlignment.getNumberOfBasesMatchingState(AlignmentState.DELETION);
|
||||||
|
finalAlignment.setAlignmentStart(reverseBWT.length() - reverseSuffixArray.get(bwtIndex) - sizeAlongReference + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
successfulMatches.add(finalAlignment);
|
||||||
|
|
||||||
|
bestScore = Math.min(finalAlignment.getScore(),bestScore);
|
||||||
|
bestDiff = Math.min(finalAlignment.getMismatches()+finalAlignment.getGapOpens()+finalAlignment.getGapExtensions(),bestDiff);
|
||||||
|
maxDiff = bestDiff + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
//System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d, base = %s%n", alignments.size(), alignment, lowerBounds.get(alignment.position+1).value, alignment.position >= 0 ? (char)bases[alignment.position].byteValue() : "");
|
||||||
|
/*
|
||||||
|
System.out.printf("#1\t[%d,%d,%d,%c]\t[%d,%d,%d]\t[%d,%d]\t[%d,%d]%n",alignments.size(),
|
||||||
|
alignment.negativeStrand?1:0,
|
||||||
|
bases.length-alignment.position-1,
|
||||||
|
alignment.getCurrentState().toString().charAt(0),
|
||||||
|
alignment.getMismatches(),
|
||||||
|
alignment.getGapOpens(),
|
||||||
|
alignment.getGapExtensions(),
|
||||||
|
lowerBounds.get(alignment.position+1).value,
|
||||||
|
lowerBounds.get(alignment.position+1).width,
|
||||||
|
alignment.loBound,
|
||||||
|
alignment.hiBound);
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Temporary -- look ahead to see if the next alignment is bounded.
|
||||||
|
boolean allowDifferences = mismatches > 0;
|
||||||
|
boolean allowMismatches = mismatches > 0;
|
||||||
|
|
||||||
|
if( allowDifferences &&
|
||||||
|
alignment.position+1 >= INDEL_END_SKIP-1+alignment.getGapOpens()+alignment.getGapExtensions() &&
|
||||||
|
read.getReadLength()-1-(alignment.position+1) >= INDEL_END_SKIP+alignment.getGapOpens()+alignment.getGapExtensions() ) {
|
||||||
|
if( alignment.getCurrentState() == AlignmentState.MATCH_MISMATCH ) {
|
||||||
|
if( alignment.getGapOpens() < MAXIMUM_GAP_OPENS ) {
|
||||||
|
// Add a potential insertion extension.
|
||||||
|
BWAAlignment insertionAlignment = createInsertionAlignment(alignment);
|
||||||
|
insertionAlignment.incrementGapOpens();
|
||||||
|
alignments.add(insertionAlignment);
|
||||||
|
|
||||||
|
// Add a potential deletion by marking a deletion and augmenting the position.
|
||||||
|
List<BWAAlignment> deletionAlignments = createDeletionAlignments(bwt,alignment);
|
||||||
|
for( BWAAlignment deletionAlignment: deletionAlignments )
|
||||||
|
deletionAlignment.incrementGapOpens();
|
||||||
|
alignments.addAll(deletionAlignments);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if( alignment.getCurrentState() == AlignmentState.INSERTION ) {
|
||||||
|
if( alignment.getGapExtensions() < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) {
|
||||||
|
// Add a potential insertion extension.
|
||||||
|
BWAAlignment insertionAlignment = createInsertionAlignment(alignment);
|
||||||
|
insertionAlignment.incrementGapExtensions();
|
||||||
|
alignments.add(insertionAlignment);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if( alignment.getCurrentState() == AlignmentState.DELETION ) {
|
||||||
|
if( alignment.getGapExtensions() < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) {
|
||||||
|
// Add a potential deletion by marking a deletion and augmenting the position.
|
||||||
|
List<BWAAlignment> deletionAlignments = createDeletionAlignments(bwt,alignment);
|
||||||
|
for( BWAAlignment deletionAlignment: deletionAlignments )
|
||||||
|
deletionAlignment.incrementGapExtensions();
|
||||||
|
alignments.addAll(deletionAlignments);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mismatches
|
||||||
|
alignments.addAll(createMatchedAlignments(bwt,alignment,bases,allowDifferences&&allowMismatches));
|
||||||
|
}
|
||||||
|
|
||||||
|
return successfulMatches;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an seeding alignment to use as a starting point when traversing.
|
||||||
|
* @param bwt source BWT.
|
||||||
|
* @return Seed alignment.
|
||||||
|
*/
|
||||||
|
private BWAAlignment createSeedAlignment(BWT bwt) {
|
||||||
|
BWAAlignment seed = new BWAAlignment(this);
|
||||||
|
seed.setNegativeStrand(bwt == forwardBWT);
|
||||||
|
seed.position = -1;
|
||||||
|
seed.loBound = 0;
|
||||||
|
seed.hiBound = bwt.length();
|
||||||
|
return seed;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new alignments representing direct matches / mismatches.
|
||||||
|
* @param bwt Source BWT with which to work.
|
||||||
|
* @param alignment Alignment for the previous position.
|
||||||
|
* @param bases The bases in the read.
|
||||||
|
* @param allowMismatch Should mismatching bases be allowed?
|
||||||
|
* @return New alignment representing this position if valid; null otherwise.
|
||||||
|
*/
|
||||||
|
private List<BWAAlignment> createMatchedAlignments( BWT bwt, BWAAlignment alignment, Byte[] bases, boolean allowMismatch ) {
|
||||||
|
List<BWAAlignment> newAlignments = new ArrayList<BWAAlignment>();
|
||||||
|
|
||||||
|
List<Byte> baseChoices = new ArrayList<Byte>();
|
||||||
|
Byte thisBase = bases[alignment.position+1];
|
||||||
|
|
||||||
|
if( allowMismatch )
|
||||||
|
baseChoices.addAll(Bases.allOf());
|
||||||
|
else
|
||||||
|
baseChoices.add(thisBase);
|
||||||
|
|
||||||
|
if( thisBase != null ) {
|
||||||
|
// Keep rotating the current base to the last position until we've hit the current base.
|
||||||
|
for( ;; ) {
|
||||||
|
baseChoices.add(baseChoices.remove(0));
|
||||||
|
if( thisBase.equals(baseChoices.get(baseChoices.size()-1)) )
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(byte base: baseChoices) {
|
||||||
|
BWAAlignment newAlignment = alignment.clone();
|
||||||
|
|
||||||
|
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
|
||||||
|
newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
|
||||||
|
|
||||||
|
// If this alignment is valid, skip it.
|
||||||
|
if( newAlignment.loBound > newAlignment.hiBound )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
newAlignment.position++;
|
||||||
|
newAlignment.addState(AlignmentState.MATCH_MISMATCH);
|
||||||
|
if( bases[newAlignment.position] == null || base != bases[newAlignment.position] )
|
||||||
|
newAlignment.incrementMismatches();
|
||||||
|
|
||||||
|
newAlignments.add(newAlignment);
|
||||||
|
}
|
||||||
|
|
||||||
|
return newAlignments;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new alignment representing an insertion at this point in the read.
|
||||||
|
* @param alignment Alignment from which to derive the insertion.
|
||||||
|
* @return New alignment reflecting the insertion.
|
||||||
|
*/
|
||||||
|
private BWAAlignment createInsertionAlignment( BWAAlignment alignment ) {
|
||||||
|
// Add a potential insertion extension.
|
||||||
|
BWAAlignment newAlignment = alignment.clone();
|
||||||
|
newAlignment.position++;
|
||||||
|
newAlignment.addState(AlignmentState.INSERTION);
|
||||||
|
return newAlignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create new alignments representing a deletion at this point in the read.
|
||||||
|
* @param bwt source BWT for inferring deletion info.
|
||||||
|
* @param alignment Alignment from which to derive the deletion.
|
||||||
|
* @return New alignments reflecting all possible deletions.
|
||||||
|
*/
|
||||||
|
private List<BWAAlignment> createDeletionAlignments( BWT bwt, BWAAlignment alignment) {
|
||||||
|
List<BWAAlignment> newAlignments = new ArrayList<BWAAlignment>();
|
||||||
|
for(byte base: Bases.instance) {
|
||||||
|
BWAAlignment newAlignment = alignment.clone();
|
||||||
|
|
||||||
|
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
|
||||||
|
newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
|
||||||
|
|
||||||
|
// If this alignment is valid, skip it.
|
||||||
|
if( newAlignment.loBound > newAlignment.hiBound )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
newAlignment.addState(AlignmentState.DELETION);
|
||||||
|
|
||||||
|
newAlignments.add(newAlignment);
|
||||||
|
}
|
||||||
|
|
||||||
|
return newAlignments;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Exactly match the given alignment against the given BWT.
|
||||||
|
* @param alignment Alignment to match.
|
||||||
|
* @param bases Bases to use.
|
||||||
|
* @param bwt BWT to use.
|
||||||
|
*/
|
||||||
|
private void exactMatch( BWAAlignment alignment, Byte[] bases, BWT bwt ) {
|
||||||
|
while( ++alignment.position < bases.length ) {
|
||||||
|
byte base = bases[alignment.position];
|
||||||
|
alignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
|
||||||
|
alignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
|
||||||
|
if( alignment.loBound > alignment.hiBound )
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Make each base into A/C/G/T or null if unknown.
|
||||||
|
* @param bases Base string to normalize.
|
||||||
|
* @return Array of normalized bases.
|
||||||
|
*/
|
||||||
|
private Byte[] normalizeBases( byte[] bases ) {
|
||||||
|
Byte[] normalBases = new Byte[bases.length];
|
||||||
|
for(int i = 0; i < bases.length; i++)
|
||||||
|
normalBases[i] = Bases.fromASCII(bases[i]);
|
||||||
|
return normalBases;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,88 @@
|
||||||
|
package org.broadinstitute.sting.alignment.bwa.java;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.alignment.reference.bwt.BWT;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* At any point along the given read, what is a good lower bound for the
|
||||||
|
* total number of differences?
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class LowerBound {
|
||||||
|
/**
|
||||||
|
* Lower bound of the suffix array.
|
||||||
|
*/
|
||||||
|
public final long loIndex;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Upper bound of the suffix array.
|
||||||
|
*/
|
||||||
|
public final long hiIndex;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Width of the bwt from loIndex -> hiIndex, inclusive.
|
||||||
|
*/
|
||||||
|
public final long width;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The lower bound at the given point.
|
||||||
|
*/
|
||||||
|
public final int value;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new lower bound with the given value.
|
||||||
|
* @param loIndex The lower bound of the BWT.
|
||||||
|
* @param hiIndex The upper bound of the BWT.
|
||||||
|
* @param value Value for the lower bound at this site.
|
||||||
|
*/
|
||||||
|
private LowerBound(long loIndex, long hiIndex, int value) {
|
||||||
|
this.loIndex = loIndex;
|
||||||
|
this.hiIndex = hiIndex;
|
||||||
|
this.width = hiIndex - loIndex + 1;
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a non-optimal bound according to the algorithm specified in Figure 3 of the BWA paper.
|
||||||
|
* @param bases Bases of the read to use when creating a new BWT.
|
||||||
|
* @param bwt BWT to check against.
|
||||||
|
* @return A list of lower bounds at every point in the reference.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public static List<LowerBound> create(Byte[] bases, BWT bwt) {
|
||||||
|
List<LowerBound> bounds = new ArrayList<LowerBound>();
|
||||||
|
|
||||||
|
long loIndex = 0, hiIndex = bwt.length();
|
||||||
|
int mismatches = 0;
|
||||||
|
for( int i = bases.length-1; i >= 0; i-- ) {
|
||||||
|
Byte base = bases[i];
|
||||||
|
|
||||||
|
// Ignore non-ACGT bases.
|
||||||
|
if( base != null ) {
|
||||||
|
loIndex = bwt.counts(base) + bwt.occurrences(base,loIndex-1) + 1;
|
||||||
|
hiIndex = bwt.counts(base) + bwt.occurrences(base,hiIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( base == null || loIndex > hiIndex ) {
|
||||||
|
loIndex = 0;
|
||||||
|
hiIndex = bwt.length();
|
||||||
|
mismatches++;
|
||||||
|
}
|
||||||
|
bounds.add(0,new LowerBound(loIndex,hiIndex,mismatches));
|
||||||
|
}
|
||||||
|
|
||||||
|
return bounds;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a string representation of this bound.
|
||||||
|
* @return String version of this bound.
|
||||||
|
*/
|
||||||
|
public String toString() {
|
||||||
|
return String.format("LowerBound: w = %d, value = %d",width,value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
/**
|
||||||
|
* Analyses used to validate the correctness and performance the BWA Java bindings.
|
||||||
|
*/
|
||||||
|
package org.broadinstitute.sting.alignment;
|
||||||
|
|
@ -0,0 +1,68 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes .amb files - a file indicating where 'holes' (indeterminant bases)
|
||||||
|
* exist in the contig. Currently, only empty, placeholder AMBs are supported.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class AMBWriter {
|
||||||
|
/**
|
||||||
|
* Number of holes is fixed at zero.
|
||||||
|
*/
|
||||||
|
private static final int NUM_HOLES = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Input stream from which to read BWT data.
|
||||||
|
*/
|
||||||
|
private final PrintStream out;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new ANNWriter targeting the given file.
|
||||||
|
* @param file file into which ANN data should be written.
|
||||||
|
* @throws java.io.IOException if there is a problem opening the output file.
|
||||||
|
*/
|
||||||
|
public AMBWriter(File file) throws IOException {
|
||||||
|
out = new PrintStream(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new ANNWriter targeting the given OutputStream.
|
||||||
|
* @param stream Stream into which ANN data should be written.
|
||||||
|
*/
|
||||||
|
public AMBWriter(OutputStream stream) {
|
||||||
|
out = new PrintStream(stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write the contents of the given dictionary into the AMB file.
|
||||||
|
* Assumes that there are no holes in the dictionary.
|
||||||
|
* @param dictionary Dictionary to write.
|
||||||
|
*/
|
||||||
|
public void writeEmpty(SAMSequenceDictionary dictionary) {
|
||||||
|
long genomeLength = 0L;
|
||||||
|
for(SAMSequenceRecord sequence: dictionary.getSequences())
|
||||||
|
genomeLength += sequence.getSequenceLength();
|
||||||
|
|
||||||
|
int sequences = dictionary.getSequences().size();
|
||||||
|
|
||||||
|
// Write the header
|
||||||
|
out.printf("%d %d %d%n",genomeLength,sequences,NUM_HOLES);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the given output stream.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,95 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes .ann files - an alternate sequence dictionary format
|
||||||
|
* used by BWA/C. For best results, the input sequence dictionary
|
||||||
|
* should be created with Picard's CreateSequenceDictionary.jar,
|
||||||
|
* TRUNCATE_NAMES_AT_WHITESPACE=false.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class ANNWriter {
|
||||||
|
/**
|
||||||
|
* BWA uses a fixed seed of 11, written into every file.
|
||||||
|
*/
|
||||||
|
private static final int BNS_SEED = 11;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A seemingly unused value that appears in every contig in the ANN.
|
||||||
|
*/
|
||||||
|
private static final int GI = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Input stream from which to read BWT data.
|
||||||
|
*/
|
||||||
|
private final PrintStream out;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new ANNWriter targeting the given file.
|
||||||
|
* @param file file into which ANN data should be written.
|
||||||
|
* @throws IOException if there is a problem opening the output file.
|
||||||
|
*/
|
||||||
|
public ANNWriter(File file) throws IOException {
|
||||||
|
out = new PrintStream(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new ANNWriter targeting the given OutputStream.
|
||||||
|
* @param stream Stream into which ANN data should be written.
|
||||||
|
*/
|
||||||
|
public ANNWriter(OutputStream stream) {
|
||||||
|
out = new PrintStream(stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write the contents of the given dictionary into the ANN file.
|
||||||
|
* Assumes that no ambs (blocks of indeterminate base) are present in the dictionary.
|
||||||
|
* @param dictionary Dictionary to write.
|
||||||
|
*/
|
||||||
|
public void write(SAMSequenceDictionary dictionary) {
|
||||||
|
long genomeLength = 0L;
|
||||||
|
for(SAMSequenceRecord sequence: dictionary.getSequences())
|
||||||
|
genomeLength += sequence.getSequenceLength();
|
||||||
|
|
||||||
|
int sequences = dictionary.getSequences().size();
|
||||||
|
|
||||||
|
// Write the header
|
||||||
|
out.printf("%d %d %d%n",genomeLength,sequences,BNS_SEED);
|
||||||
|
|
||||||
|
for(SAMSequenceRecord sequence: dictionary.getSequences()) {
|
||||||
|
String fullSequenceName = sequence.getSequenceName();
|
||||||
|
String trimmedSequenceName = fullSequenceName;
|
||||||
|
String sequenceComment = "(null)";
|
||||||
|
|
||||||
|
long offset = 0;
|
||||||
|
|
||||||
|
// Separate the sequence name from the sequence comment, based on BWA's definition.
|
||||||
|
// BWA's definition appears to accept a zero-length contig name, so mimic that behavior.
|
||||||
|
if(fullSequenceName.indexOf(' ') >= 0) {
|
||||||
|
trimmedSequenceName = fullSequenceName.substring(0,fullSequenceName.indexOf(' '));
|
||||||
|
sequenceComment = fullSequenceName.substring(fullSequenceName.indexOf(' ')+1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the sequence GI (?), name, and comment.
|
||||||
|
out.printf("%d %s %s%n",GI,trimmedSequenceName,sequenceComment);
|
||||||
|
// Write the sequence offset, length, and ambs (currently fixed at 0).
|
||||||
|
out.printf("%d %d %d%n",offset,sequence.getSequenceLength(),0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the given output stream.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,172 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents the Burrows-Wheeler Transform of a reference sequence.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class BWT {
|
||||||
|
/**
|
||||||
|
* Write an occurrence table after every SEQUENCE_BLOCK_SIZE bases.
|
||||||
|
* For this implementation to behave correctly, SEQUENCE_BLOCK_SIZE % 8 == 0
|
||||||
|
*/
|
||||||
|
public static final int SEQUENCE_BLOCK_SIZE = 128;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The inverse SA, used as a placeholder for determining where the special EOL character sits.
|
||||||
|
*/
|
||||||
|
protected final long inverseSA0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cumulative counts for the entire BWT.
|
||||||
|
*/
|
||||||
|
protected final Counts counts;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The individual sequence blocks, modelling how they appear on disk.
|
||||||
|
*/
|
||||||
|
protected final SequenceBlock[] sequenceBlocks;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new BWT with the given inverse SA, counts, and sequence (in ASCII).
|
||||||
|
* @param inverseSA0 Inverse SA entry for the first element. Will be missing from the BWT sequence.
|
||||||
|
* @param counts Cumulative count of bases, in A,C,G,T order.
|
||||||
|
* @param sequenceBlocks The full BWT sequence, sans the '$'.
|
||||||
|
*/
|
||||||
|
public BWT( long inverseSA0, Counts counts, SequenceBlock[] sequenceBlocks ) {
|
||||||
|
this.inverseSA0 = inverseSA0;
|
||||||
|
this.counts = counts;
|
||||||
|
this.sequenceBlocks = sequenceBlocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new BWT with the given inverse SA, occurrences, and sequence (in ASCII).
|
||||||
|
* @param inverseSA0 Inverse SA entry for the first element. Will be missing from the BWT sequence.
|
||||||
|
* @param counts Count of bases, in A,C,G,T order.
|
||||||
|
* @param sequence The full BWT sequence, sans the '$'.
|
||||||
|
*/
|
||||||
|
public BWT( long inverseSA0, Counts counts, byte[] sequence ) {
|
||||||
|
this(inverseSA0,counts,generateSequenceBlocks(sequence));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract the full sequence from the list of block.
|
||||||
|
* @return The full BWT string as a byte array.
|
||||||
|
*/
|
||||||
|
public byte[] getSequence() {
|
||||||
|
byte[] sequence = new byte[(int)counts.getTotal()];
|
||||||
|
for( SequenceBlock block: sequenceBlocks )
|
||||||
|
System.arraycopy(block.sequence,0,sequence,block.sequenceStart,block.sequenceLength);
|
||||||
|
return sequence;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the total counts of bases lexicographically smaller than the given base, for Ferragina and Manzini's search.
|
||||||
|
* @param base The base.
|
||||||
|
* @return Total counts for all bases lexicographically smaller than this base.
|
||||||
|
*/
|
||||||
|
public long counts(byte base) {
|
||||||
|
return counts.getCumulative(base);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the total counts of bases lexicographically smaller than the given base, for Ferragina and Manzini's search.
|
||||||
|
* @param base The base.
|
||||||
|
* @param index The position to search within the BWT.
|
||||||
|
* @return Total counts for all bases lexicographically smaller than this base.
|
||||||
|
*/
|
||||||
|
public long occurrences(byte base,long index) {
|
||||||
|
SequenceBlock block = getSequenceBlock(index);
|
||||||
|
int position = getSequencePosition(index);
|
||||||
|
long accumulator = block.occurrences.get(base);
|
||||||
|
for(int i = 0; i <= position; i++) {
|
||||||
|
if(base == block.sequence[i])
|
||||||
|
accumulator++;
|
||||||
|
}
|
||||||
|
return accumulator;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of bases in the BWT as a whole.
|
||||||
|
* @return Number of bases.
|
||||||
|
*/
|
||||||
|
public long length() {
|
||||||
|
return counts.getTotal();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new BWT from the given reference sequence.
|
||||||
|
* @param referenceSequence Sequence from which to derive the BWT.
|
||||||
|
* @return reference sequence-derived BWT.
|
||||||
|
*/
|
||||||
|
public static BWT createFromReferenceSequence(byte[] referenceSequence) {
|
||||||
|
SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence);
|
||||||
|
|
||||||
|
byte[] bwt = new byte[(int)suffixArray.length()-1];
|
||||||
|
int bwtIndex = 0;
|
||||||
|
for(long suffixArrayIndex = 0; suffixArrayIndex < suffixArray.length(); suffixArrayIndex++) {
|
||||||
|
if(suffixArray.get(suffixArrayIndex) == 0)
|
||||||
|
continue;
|
||||||
|
bwt[bwtIndex++] = referenceSequence[(int)suffixArray.get(suffixArrayIndex)-1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return new BWT(suffixArray.inverseSA0,suffixArray.occurrences,bwt);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the base at a given position in the BWT.
|
||||||
|
* @param index The index to use.
|
||||||
|
* @return The base at that location.
|
||||||
|
*/
|
||||||
|
protected byte getBase(long index) {
|
||||||
|
if(index == inverseSA0)
|
||||||
|
throw new ReviewedStingException(String.format("Base at index %d does not have a text representation",index));
|
||||||
|
|
||||||
|
SequenceBlock block = getSequenceBlock(index);
|
||||||
|
int position = getSequencePosition(index);
|
||||||
|
return block.sequence[position];
|
||||||
|
}
|
||||||
|
|
||||||
|
private SequenceBlock getSequenceBlock(long index) {
|
||||||
|
// If the index is above the SA-1[0], remap it to the appropriate coordinate space.
|
||||||
|
if(index > inverseSA0) index--;
|
||||||
|
return sequenceBlocks[(int)(index/SEQUENCE_BLOCK_SIZE)];
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getSequencePosition(long index) {
|
||||||
|
// If the index is above the SA-1[0], remap it to the appropriate coordinate space.
|
||||||
|
if(index > inverseSA0) index--;
|
||||||
|
return (int)(index%SEQUENCE_BLOCK_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a set of sequence blocks from one long sequence.
|
||||||
|
* @param sequence Sequence from which to derive blocks.
|
||||||
|
* @return Array of sequence blocks containing data from the sequence.
|
||||||
|
*/
|
||||||
|
private static SequenceBlock[] generateSequenceBlocks( byte[] sequence ) {
|
||||||
|
Counts occurrences = new Counts();
|
||||||
|
|
||||||
|
int numSequenceBlocks = PackUtils.numberOfPartitions(sequence.length,SEQUENCE_BLOCK_SIZE);
|
||||||
|
SequenceBlock[] sequenceBlocks = new SequenceBlock[numSequenceBlocks];
|
||||||
|
|
||||||
|
for( int block = 0; block < numSequenceBlocks; block++ ) {
|
||||||
|
int blockStart = block*SEQUENCE_BLOCK_SIZE;
|
||||||
|
int blockLength = Math.min(SEQUENCE_BLOCK_SIZE, sequence.length-blockStart);
|
||||||
|
byte[] subsequence = new byte[blockLength];
|
||||||
|
|
||||||
|
System.arraycopy(sequence,blockStart,subsequence,0,blockLength);
|
||||||
|
|
||||||
|
sequenceBlocks[block] = new SequenceBlock(blockStart,blockLength,occurrences.clone(),subsequence);
|
||||||
|
|
||||||
|
for( byte base: subsequence )
|
||||||
|
occurrences.increment(base);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sequenceBlocks;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,89 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.alignment.reference.packing.BasePackedInputStream;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedInputStream;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
/**
|
||||||
|
* Reads a BWT from a given file.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class BWTReader {
|
||||||
|
/**
|
||||||
|
* Input stream from which to read BWT data.
|
||||||
|
*/
|
||||||
|
private FileInputStream inputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new BWT reader.
|
||||||
|
* @param inputFile File in which the BWT is stored.
|
||||||
|
*/
|
||||||
|
public BWTReader( File inputFile ) {
|
||||||
|
try {
|
||||||
|
this.inputStream = new FileInputStream(inputFile);
|
||||||
|
}
|
||||||
|
catch( FileNotFoundException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to open input file", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read a BWT from the input stream.
|
||||||
|
* @return The BWT stored in the input stream.
|
||||||
|
*/
|
||||||
|
public BWT read() {
|
||||||
|
UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN);
|
||||||
|
BasePackedInputStream basePackedInputStream = new BasePackedInputStream<Integer>(Integer.class, inputStream, ByteOrder.LITTLE_ENDIAN);
|
||||||
|
|
||||||
|
long inverseSA0;
|
||||||
|
long[] count;
|
||||||
|
SequenceBlock[] sequenceBlocks;
|
||||||
|
|
||||||
|
try {
|
||||||
|
inverseSA0 = uintPackedInputStream.read();
|
||||||
|
count = new long[PackUtils.ALPHABET_SIZE];
|
||||||
|
uintPackedInputStream.read(count);
|
||||||
|
|
||||||
|
long bwtSize = count[PackUtils.ALPHABET_SIZE-1];
|
||||||
|
sequenceBlocks = new SequenceBlock[PackUtils.numberOfPartitions(bwtSize,BWT.SEQUENCE_BLOCK_SIZE)];
|
||||||
|
|
||||||
|
for( int block = 0; block < sequenceBlocks.length; block++ ) {
|
||||||
|
int sequenceStart = block* BWT.SEQUENCE_BLOCK_SIZE;
|
||||||
|
int sequenceLength = (int)Math.min(BWT.SEQUENCE_BLOCK_SIZE,bwtSize-sequenceStart);
|
||||||
|
|
||||||
|
long[] occurrences = new long[PackUtils.ALPHABET_SIZE];
|
||||||
|
byte[] bwt = new byte[sequenceLength];
|
||||||
|
|
||||||
|
uintPackedInputStream.read(occurrences);
|
||||||
|
basePackedInputStream.read(bwt);
|
||||||
|
|
||||||
|
sequenceBlocks[block] = new SequenceBlock(sequenceStart,sequenceLength,new Counts(occurrences,false),bwt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch( IOException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to read BWT from input stream.", ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new BWT(inverseSA0, new Counts(count,true), sequenceBlocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the input stream.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
try {
|
||||||
|
inputStream.close();
|
||||||
|
}
|
||||||
|
catch( IOException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to close input file", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
|
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate BWA supplementary files (.ann, .amb) from the command line.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class BWTSupplementaryFileGenerator {
|
||||||
|
enum SupplementaryFileType { ANN, AMB }
|
||||||
|
|
||||||
|
public static void main(String[] args) throws IOException {
|
||||||
|
if(args.length < 3)
|
||||||
|
usage("Incorrect number of arguments supplied");
|
||||||
|
|
||||||
|
File fastaFile = new File(args[0]);
|
||||||
|
File outputFile = new File(args[1]);
|
||||||
|
SupplementaryFileType outputType = null;
|
||||||
|
try {
|
||||||
|
outputType = Enum.valueOf(SupplementaryFileType.class,args[2]);
|
||||||
|
}
|
||||||
|
catch(IllegalArgumentException ex) {
|
||||||
|
usage("Invalid output type: " + args[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
ReferenceSequenceFile sequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(fastaFile);
|
||||||
|
SAMSequenceDictionary dictionary = sequenceFile.getSequenceDictionary();
|
||||||
|
|
||||||
|
switch(outputType) {
|
||||||
|
case ANN:
|
||||||
|
ANNWriter annWriter = new ANNWriter(outputFile);
|
||||||
|
annWriter.write(dictionary);
|
||||||
|
annWriter.close();
|
||||||
|
break;
|
||||||
|
case AMB:
|
||||||
|
AMBWriter ambWriter = new AMBWriter(outputFile);
|
||||||
|
ambWriter.writeEmpty(dictionary);
|
||||||
|
ambWriter.close();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
usage("Unsupported output type: " + outputType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print usage information and exit.
|
||||||
|
*/
|
||||||
|
private static void usage(String message) {
|
||||||
|
System.err.println(message);
|
||||||
|
System.err.println("Usage: BWTSupplementaryFileGenerator <fasta> <output file> <output type>");
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,71 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.alignment.reference.packing.BasePackedOutputStream;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedOutputStream;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes an in-memory BWT to an outputstream.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class BWTWriter {
|
||||||
|
/**
|
||||||
|
* Input stream from which to read BWT data.
|
||||||
|
*/
|
||||||
|
private final OutputStream outputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new BWT writer.
|
||||||
|
* @param outputFile File in which the BWT is stored.
|
||||||
|
*/
|
||||||
|
public BWTWriter( File outputFile ) {
|
||||||
|
try {
|
||||||
|
this.outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
|
||||||
|
}
|
||||||
|
catch( FileNotFoundException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to open output file", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a BWT to the output stream.
|
||||||
|
* @param bwt Transform to be written to the output stream.
|
||||||
|
*/
|
||||||
|
public void write( BWT bwt ) {
|
||||||
|
UnsignedIntPackedOutputStream intPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN);
|
||||||
|
BasePackedOutputStream basePackedOutputStream = new BasePackedOutputStream<Integer>(Integer.class, outputStream, ByteOrder.LITTLE_ENDIAN);
|
||||||
|
|
||||||
|
try {
|
||||||
|
intPackedOutputStream.write(bwt.inverseSA0);
|
||||||
|
intPackedOutputStream.write(bwt.counts.toArray(true));
|
||||||
|
|
||||||
|
for( SequenceBlock block: bwt.sequenceBlocks ) {
|
||||||
|
intPackedOutputStream.write(block.occurrences.toArray(false));
|
||||||
|
basePackedOutputStream.write(block.sequence);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The last block is the last set of counts in the structure.
|
||||||
|
intPackedOutputStream.write(bwt.counts.toArray(false));
|
||||||
|
}
|
||||||
|
catch( IOException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to read BWT from input stream.", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the input stream.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
try {
|
||||||
|
outputStream.close();
|
||||||
|
}
|
||||||
|
catch( IOException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to close input file", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,108 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enhanced enum representation of a base.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class Bases implements Iterable<Byte>
|
||||||
|
{
|
||||||
|
public static byte A = 'A';
|
||||||
|
public static byte C = 'C';
|
||||||
|
public static byte G = 'G';
|
||||||
|
public static byte T = 'T';
|
||||||
|
|
||||||
|
public static final Bases instance = new Bases();
|
||||||
|
|
||||||
|
private static final List<Byte> allBases;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Representation of the base broken down by packed value.
|
||||||
|
*/
|
||||||
|
private static final Map<Integer,Byte> basesByPack = new HashMap<Integer,Byte>();
|
||||||
|
|
||||||
|
static {
|
||||||
|
List<Byte> bases = new ArrayList<Byte>();
|
||||||
|
bases.add(A);
|
||||||
|
bases.add(C);
|
||||||
|
bases.add(G);
|
||||||
|
bases.add(T);
|
||||||
|
allBases = Collections.unmodifiableList(bases);
|
||||||
|
|
||||||
|
for(int i = 0; i < allBases.size(); i++)
|
||||||
|
basesByPack.put(i,allBases.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new base with the given ascii representation and
|
||||||
|
* pack value.
|
||||||
|
*/
|
||||||
|
private Bases() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return all possible bases.
|
||||||
|
* @return Byte representation of all bases.
|
||||||
|
*/
|
||||||
|
public static Collection<Byte> allOf() {
|
||||||
|
return allBases;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the number of known bases.
|
||||||
|
* @return The number of known bases.
|
||||||
|
*/
|
||||||
|
public static int size() {
|
||||||
|
return allBases.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets an iterator over the total number of known base types.
|
||||||
|
* @return Iterator over all known bases.
|
||||||
|
*/
|
||||||
|
public Iterator<Byte> iterator() {
|
||||||
|
return basesByPack.values().iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the given base from the packed representation.
|
||||||
|
* @param pack Packed representation.
|
||||||
|
* @return base.
|
||||||
|
*/
|
||||||
|
public static byte fromPack( int pack ) { return basesByPack.get(pack); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert the given base to its packed value.
|
||||||
|
* @param ascii ASCII representation of the base.
|
||||||
|
* @return Packed value.
|
||||||
|
*/
|
||||||
|
public static int toPack( byte ascii )
|
||||||
|
{
|
||||||
|
for( Map.Entry<Integer,Byte> entry: basesByPack.entrySet() ) {
|
||||||
|
if( entry.getValue().equals(ascii) )
|
||||||
|
return entry.getKey();
|
||||||
|
}
|
||||||
|
throw new ReviewedStingException(String.format("Base %c is an invalid base to pack", (char)ascii));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert the ASCII representation of a base to its 'normalized' representation.
|
||||||
|
* @param base The base itself.
|
||||||
|
* @return The byte, if present. Null if unknown.
|
||||||
|
*/
|
||||||
|
public static Byte fromASCII( byte base ) {
|
||||||
|
Byte found = null;
|
||||||
|
for( Byte normalized: allBases ) {
|
||||||
|
if( normalized.equals(base) ) {
|
||||||
|
found = normalized;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,151 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Counts of how many bases of each type have been seen.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class Counts implements Cloneable {
|
||||||
|
/**
|
||||||
|
* Internal representation of counts, broken down by ASCII value.
|
||||||
|
*/
|
||||||
|
private Map<Byte,Long> counts = new HashMap<Byte,Long>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Internal representation of cumulative counts, broken down by ASCII value.
|
||||||
|
*/
|
||||||
|
private Map<Byte,Long> cumulativeCounts = new HashMap<Byte,Long>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an empty Counts object with values A=0,C=0,G=0,T=0.
|
||||||
|
*/
|
||||||
|
public Counts()
|
||||||
|
{
|
||||||
|
for(byte base: Bases.instance) {
|
||||||
|
counts.put(base,0L);
|
||||||
|
cumulativeCounts.put(base,0L);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a counts data structure with the given initial values.
|
||||||
|
* @param data Count data, broken down by base.
|
||||||
|
* @param cumulative Whether the counts are cumulative, (count_G=numA+numC+numG,for example).
|
||||||
|
*/
|
||||||
|
public Counts( long[] data, boolean cumulative ) {
|
||||||
|
if(cumulative) {
|
||||||
|
long priorCount = 0;
|
||||||
|
for(byte base: Bases.instance) {
|
||||||
|
long count = data[Bases.toPack(base)];
|
||||||
|
counts.put(base,count-priorCount);
|
||||||
|
cumulativeCounts.put(base,priorCount);
|
||||||
|
priorCount = count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
long priorCount = 0;
|
||||||
|
for(byte base: Bases.instance) {
|
||||||
|
long count = data[Bases.toPack(base)];
|
||||||
|
counts.put(base,count);
|
||||||
|
cumulativeCounts.put(base,priorCount);
|
||||||
|
priorCount += count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert to an array for persistence.
|
||||||
|
* @param cumulative Use a cumulative representation.
|
||||||
|
* @return Array of count values.
|
||||||
|
*/
|
||||||
|
public long[] toArray(boolean cumulative) {
|
||||||
|
long[] countArray = new long[counts.size()];
|
||||||
|
if(cumulative) {
|
||||||
|
int index = 0;
|
||||||
|
boolean first = true;
|
||||||
|
for(byte base: Bases.instance) {
|
||||||
|
if(first) {
|
||||||
|
first = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
countArray[index++] = getCumulative(base);
|
||||||
|
}
|
||||||
|
countArray[countArray.length-1] = getTotal();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int index = 0;
|
||||||
|
for(byte base: Bases.instance)
|
||||||
|
countArray[index++] = counts.get(base);
|
||||||
|
}
|
||||||
|
return countArray;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a unique copy of the current object.
|
||||||
|
* @return A duplicate of this object.
|
||||||
|
*/
|
||||||
|
public Counts clone() {
|
||||||
|
Counts other;
|
||||||
|
try {
|
||||||
|
other = (Counts)super.clone();
|
||||||
|
}
|
||||||
|
catch(CloneNotSupportedException ex) {
|
||||||
|
throw new ReviewedStingException("Unable to clone counts object", ex);
|
||||||
|
}
|
||||||
|
other.counts = new HashMap<Byte,Long>(counts);
|
||||||
|
other.cumulativeCounts = new HashMap<Byte,Long>(cumulativeCounts);
|
||||||
|
return other;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment the number of bases seen at the given location.
|
||||||
|
* @param base Base to increment.
|
||||||
|
*/
|
||||||
|
public void increment(byte base) {
|
||||||
|
counts.put(base,counts.get(base)+1);
|
||||||
|
boolean increment = false;
|
||||||
|
for(byte cumulative: Bases.instance) {
|
||||||
|
if(increment) cumulativeCounts.put(cumulative,cumulativeCounts.get(cumulative)+1);
|
||||||
|
increment |= (cumulative == base);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a count of the number of bases seen at a given location.
|
||||||
|
* Note that counts in this case are not cumulative (counts for A,C,G,T
|
||||||
|
* are independent).
|
||||||
|
* @param base Base for which to query counts.
|
||||||
|
* @return Number of bases of this type seen.
|
||||||
|
*/
|
||||||
|
public long get(byte base) {
|
||||||
|
return counts.get(base);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a count of the number of bases seen before this base.
|
||||||
|
* Note that counts in this case are cumulative.
|
||||||
|
* @param base Base for which to query counts.
|
||||||
|
* @return Number of bases of this type seen.
|
||||||
|
*/
|
||||||
|
public long getCumulative(byte base) {
|
||||||
|
return cumulativeCounts.get(base);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many total bases are represented by this count structure?
|
||||||
|
* @return Total bases represented.
|
||||||
|
*/
|
||||||
|
public long getTotal() {
|
||||||
|
int accumulator = 0;
|
||||||
|
for(byte base: Bases.instance) {
|
||||||
|
accumulator += get(base);
|
||||||
|
}
|
||||||
|
return accumulator;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,200 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITHoc THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
|
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a suffix array data structure.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class CreateBWTFromReference {
|
||||||
|
private byte[] loadReference( File inputFile ) {
|
||||||
|
// Read in the first sequence in the input file
|
||||||
|
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile);
|
||||||
|
ReferenceSequence sequence = reference.nextSequence();
|
||||||
|
return sequence.getBases();
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] loadReverseReference( File inputFile ) {
|
||||||
|
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile);
|
||||||
|
ReferenceSequence sequence = reference.nextSequence();
|
||||||
|
PackUtils.reverse(sequence.getBases());
|
||||||
|
return sequence.getBases();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Counts countOccurrences( byte[] sequence ) {
|
||||||
|
Counts occurrences = new Counts();
|
||||||
|
for( byte base: sequence )
|
||||||
|
occurrences.increment(base);
|
||||||
|
return occurrences;
|
||||||
|
}
|
||||||
|
|
||||||
|
private long[] createSuffixArray( byte[] sequence ) {
|
||||||
|
return SuffixArray.createFromReferenceSequence(sequence).sequence;
|
||||||
|
}
|
||||||
|
|
||||||
|
private long[] invertSuffixArray( long[] suffixArray ) {
|
||||||
|
long[] inverseSuffixArray = new long[suffixArray.length];
|
||||||
|
for( int i = 0; i < suffixArray.length; i++ )
|
||||||
|
inverseSuffixArray[(int)suffixArray[i]] = i;
|
||||||
|
return inverseSuffixArray;
|
||||||
|
}
|
||||||
|
|
||||||
|
private long[] createCompressedSuffixArray( int[] suffixArray, int[] inverseSuffixArray ) {
|
||||||
|
long[] compressedSuffixArray = new long[suffixArray.length];
|
||||||
|
compressedSuffixArray[0] = inverseSuffixArray[0];
|
||||||
|
for( int i = 1; i < suffixArray.length; i++ )
|
||||||
|
compressedSuffixArray[i] = inverseSuffixArray[suffixArray[i]+1];
|
||||||
|
return compressedSuffixArray;
|
||||||
|
}
|
||||||
|
|
||||||
|
private long[] createInversedCompressedSuffixArray( int[] compressedSuffixArray ) {
|
||||||
|
long[] inverseCompressedSuffixArray = new long[compressedSuffixArray.length];
|
||||||
|
for( int i = 0; i < compressedSuffixArray.length; i++ )
|
||||||
|
inverseCompressedSuffixArray[compressedSuffixArray[i]] = i;
|
||||||
|
return inverseCompressedSuffixArray;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main( String argv[] ) throws IOException {
|
||||||
|
if( argv.length != 5 ) {
|
||||||
|
System.out.println("USAGE: CreateBWTFromReference <input>.fasta <output bwt> <output rbwt> <output sa> <output rsa>");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String inputFileName = argv[0];
|
||||||
|
File inputFile = new File(inputFileName);
|
||||||
|
|
||||||
|
String bwtFileName = argv[1];
|
||||||
|
File bwtFile = new File(bwtFileName);
|
||||||
|
|
||||||
|
String rbwtFileName = argv[2];
|
||||||
|
File rbwtFile = new File(rbwtFileName);
|
||||||
|
|
||||||
|
String saFileName = argv[3];
|
||||||
|
File saFile = new File(saFileName);
|
||||||
|
|
||||||
|
String rsaFileName = argv[4];
|
||||||
|
File rsaFile = new File(rsaFileName);
|
||||||
|
|
||||||
|
CreateBWTFromReference creator = new CreateBWTFromReference();
|
||||||
|
|
||||||
|
byte[] sequence = creator.loadReference(inputFile);
|
||||||
|
byte[] reverseSequence = creator.loadReverseReference(inputFile);
|
||||||
|
|
||||||
|
// Count the occurences of each given base.
|
||||||
|
Counts occurrences = creator.countOccurrences(sequence);
|
||||||
|
System.out.printf("Occurrences: a=%d, c=%d, g=%d, t=%d%n",occurrences.getCumulative(Bases.A),
|
||||||
|
occurrences.getCumulative(Bases.C),
|
||||||
|
occurrences.getCumulative(Bases.G),
|
||||||
|
occurrences.getCumulative(Bases.T));
|
||||||
|
|
||||||
|
// Generate the suffix array and print diagnostics.
|
||||||
|
long[] suffixArrayData = creator.createSuffixArray(sequence);
|
||||||
|
long[] reverseSuffixArrayData = creator.createSuffixArray(reverseSequence);
|
||||||
|
|
||||||
|
// Invert the suffix array and print diagnostics.
|
||||||
|
long[] inverseSuffixArray = creator.invertSuffixArray(suffixArrayData);
|
||||||
|
long[] reverseInverseSuffixArray = creator.invertSuffixArray(reverseSuffixArrayData);
|
||||||
|
|
||||||
|
SuffixArray suffixArray = new SuffixArray( inverseSuffixArray[0], occurrences, suffixArrayData );
|
||||||
|
SuffixArray reverseSuffixArray = new SuffixArray( reverseInverseSuffixArray[0], occurrences, reverseSuffixArrayData );
|
||||||
|
|
||||||
|
/*
|
||||||
|
// Create the data structure for the compressed suffix array and print diagnostics.
|
||||||
|
int[] compressedSuffixArray = creator.createCompressedSuffixArray(suffixArray.sequence,inverseSuffixArray);
|
||||||
|
int reconstructedInverseSA = compressedSuffixArray[0];
|
||||||
|
for( int i = 0; i < 8; i++ ) {
|
||||||
|
System.out.printf("compressedSuffixArray[%d] = %d (SA-1[%d] = %d)%n", i, compressedSuffixArray[i], i, reconstructedInverseSA);
|
||||||
|
reconstructedInverseSA = compressedSuffixArray[reconstructedInverseSA];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the data structure for the inverse compressed suffix array and print diagnostics.
|
||||||
|
int[] inverseCompressedSuffixArray = creator.createInversedCompressedSuffixArray(compressedSuffixArray);
|
||||||
|
for( int i = 0; i < 8; i++ ) {
|
||||||
|
System.out.printf("inverseCompressedSuffixArray[%d] = %d%n", i, inverseCompressedSuffixArray[i]);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Create the BWT.
|
||||||
|
BWT bwt = BWT.createFromReferenceSequence(sequence);
|
||||||
|
BWT reverseBWT = BWT.createFromReferenceSequence(reverseSequence);
|
||||||
|
|
||||||
|
byte[] bwtSequence = bwt.getSequence();
|
||||||
|
System.out.printf("BWT: %s... (length = %d)%n", new String(bwtSequence,0,80),bwt.length());
|
||||||
|
|
||||||
|
BWTWriter bwtWriter = new BWTWriter(bwtFile);
|
||||||
|
bwtWriter.write(bwt);
|
||||||
|
bwtWriter.close();
|
||||||
|
|
||||||
|
BWTWriter reverseBWTWriter = new BWTWriter(rbwtFile);
|
||||||
|
reverseBWTWriter.write(reverseBWT);
|
||||||
|
reverseBWTWriter.close();
|
||||||
|
|
||||||
|
/*
|
||||||
|
SuffixArrayWriter saWriter = new SuffixArrayWriter(saFile);
|
||||||
|
saWriter.write(suffixArray);
|
||||||
|
saWriter.close();
|
||||||
|
|
||||||
|
SuffixArrayWriter reverseSAWriter = new SuffixArrayWriter(rsaFile);
|
||||||
|
reverseSAWriter.write(reverseSuffixArray);
|
||||||
|
reverseSAWriter.close();
|
||||||
|
*/
|
||||||
|
|
||||||
|
File existingBWTFile = new File(inputFileName+".bwt");
|
||||||
|
BWTReader existingBWTReader = new BWTReader(existingBWTFile);
|
||||||
|
BWT existingBWT = existingBWTReader.read();
|
||||||
|
|
||||||
|
byte[] existingBWTSequence = existingBWT.getSequence();
|
||||||
|
System.out.printf("Existing BWT: %s... (length = %d)%n",new String(existingBWTSequence,0,80),existingBWT.length());
|
||||||
|
|
||||||
|
for( int i = 0; i < bwt.length(); i++ ) {
|
||||||
|
if( bwtSequence[i] != existingBWTSequence[i] )
|
||||||
|
throw new ReviewedStingException("BWT mismatch at " + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
File existingSAFile = new File(inputFileName+".sa");
|
||||||
|
SuffixArrayReader existingSuffixArrayReader = new SuffixArrayReader(existingSAFile,existingBWT);
|
||||||
|
SuffixArray existingSuffixArray = existingSuffixArrayReader.read();
|
||||||
|
|
||||||
|
for(int i = 0; i < suffixArray.length(); i++) {
|
||||||
|
if( i % 10000 == 0 )
|
||||||
|
System.out.printf("Validating suffix array entry %d%n", i);
|
||||||
|
if( suffixArray.get(i) != existingSuffixArray.get(i) )
|
||||||
|
throw new ReviewedStingException(String.format("Suffix array mismatch at %d; SA is %d; should be %d",i,existingSuffixArray.get(i),suffixArray.get(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,41 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Models a block of bases within the BWT.
|
||||||
|
*/
|
||||||
|
public class SequenceBlock {
|
||||||
|
/**
|
||||||
|
* Start position of this sequence within the BWT.
|
||||||
|
*/
|
||||||
|
public final int sequenceStart;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Length of this sequence within the BWT.
|
||||||
|
*/
|
||||||
|
public final int sequenceLength;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Occurrences of each letter up to this sequence block.
|
||||||
|
*/
|
||||||
|
public final Counts occurrences;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sequence for this segment.
|
||||||
|
*/
|
||||||
|
public final byte[] sequence;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new block within this BWT.
|
||||||
|
* @param sequenceStart Starting position of this sequence within the BWT.
|
||||||
|
* @param sequenceLength Length of this sequence.
|
||||||
|
* @param occurrences How many of each base has been seen before this sequence began.
|
||||||
|
* @param sequence The actual sequence from the BWT.
|
||||||
|
*/
|
||||||
|
public SequenceBlock( int sequenceStart, int sequenceLength, Counts occurrences, byte[] sequence ) {
|
||||||
|
this.sequenceStart = sequenceStart;
|
||||||
|
this.sequenceLength = sequenceLength;
|
||||||
|
this.occurrences = occurrences;
|
||||||
|
this.sequence = sequence;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,158 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
|
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An in-memory representation of a suffix array.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class SuffixArray {
|
||||||
|
public final long inverseSA0;
|
||||||
|
public final Counts occurrences;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The elements of the sequence actually stored in memory.
|
||||||
|
*/
|
||||||
|
protected final long[] sequence;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How often are individual elements in the sequence actually stored
|
||||||
|
* in memory, as opposed to being calculated on the fly?
|
||||||
|
*/
|
||||||
|
protected final int sequenceInterval;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The BWT used to calculate missing portions of the sequence.
|
||||||
|
*/
|
||||||
|
protected final BWT bwt;
|
||||||
|
|
||||||
|
public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence) {
|
||||||
|
this(inverseSA0,occurrences,sequence,1,null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new sequence array with the given inverse SA, occurrences, and values.
|
||||||
|
* @param inverseSA0 Inverse SA entry for the first element.
|
||||||
|
* @param occurrences Cumulative number of occurrences of A,C,G,T, in order.
|
||||||
|
* @param sequence The full suffix array.
|
||||||
|
* @param sequenceInterval How frequently is the sequence interval stored.
|
||||||
|
* @param bwt bwt used to infer the remaining entries in the BWT.
|
||||||
|
*/
|
||||||
|
public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence, int sequenceInterval, BWT bwt) {
|
||||||
|
this.inverseSA0 = inverseSA0;
|
||||||
|
this.occurrences = occurrences;
|
||||||
|
this.sequence = sequence;
|
||||||
|
this.sequenceInterval = sequenceInterval;
|
||||||
|
this.bwt = bwt;
|
||||||
|
|
||||||
|
if(sequenceInterval != 1 && bwt == null)
|
||||||
|
throw new ReviewedStingException("A BWT must be provided if the sequence interval is not 1");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the length of the sequence array.
|
||||||
|
* @return Length of the suffix array.
|
||||||
|
*/
|
||||||
|
public long length() {
|
||||||
|
if( bwt != null )
|
||||||
|
return bwt.length()+1;
|
||||||
|
else
|
||||||
|
return sequence.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the suffix array value at a given sequence.
|
||||||
|
* @param index Index at which to retrieve the suffix array vaule.
|
||||||
|
* @return The suffix array value at that entry.
|
||||||
|
*/
|
||||||
|
public long get(long index) {
|
||||||
|
int iterations = 0;
|
||||||
|
while(index%sequenceInterval != 0) {
|
||||||
|
// The inverseSA0 ('$') doesn't have a usable ASCII representation; it must be treated as a special case.
|
||||||
|
if(index == inverseSA0)
|
||||||
|
index = 0;
|
||||||
|
else {
|
||||||
|
byte base = bwt.getBase(index);
|
||||||
|
index = bwt.counts(base) + bwt.occurrences(base,index);
|
||||||
|
}
|
||||||
|
iterations++;
|
||||||
|
}
|
||||||
|
return (sequence[(int)(index/sequenceInterval)]+iterations) % length();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a suffix array from a given reference sequence.
|
||||||
|
* @param sequence The reference sequence to use when building the suffix array.
|
||||||
|
* @return a constructed suffix array.
|
||||||
|
*/
|
||||||
|
public static SuffixArray createFromReferenceSequence(byte[] sequence) {
|
||||||
|
// The builder for the suffix array. Use an integer in this case because
|
||||||
|
// Java arrays can only hold an integer.
|
||||||
|
TreeSet<Integer> suffixArrayBuilder = new TreeSet<Integer>(new SuffixArrayComparator(sequence));
|
||||||
|
|
||||||
|
Counts occurrences = new Counts();
|
||||||
|
for( byte base: sequence )
|
||||||
|
occurrences.increment(base);
|
||||||
|
|
||||||
|
// Build out the suffix array using a custom comparator.
|
||||||
|
for( int i = 0; i <= sequence.length; i++ )
|
||||||
|
suffixArrayBuilder.add(i);
|
||||||
|
|
||||||
|
// Copy the suffix array into an array.
|
||||||
|
long[] suffixArray = new long[suffixArrayBuilder.size()];
|
||||||
|
int i = 0;
|
||||||
|
for( Integer element: suffixArrayBuilder )
|
||||||
|
suffixArray[i++] = element;
|
||||||
|
|
||||||
|
// Find the first element in the inverse suffix array.
|
||||||
|
long inverseSA0 = -1;
|
||||||
|
for(i = 0; i < suffixArray.length; i++) {
|
||||||
|
if(suffixArray[i] == 0)
|
||||||
|
inverseSA0 = i;
|
||||||
|
}
|
||||||
|
if(inverseSA0 < 0)
|
||||||
|
throw new ReviewedStingException("Unable to find first inverse SA entry in generated suffix array.");
|
||||||
|
|
||||||
|
return new SuffixArray(inverseSA0,occurrences,suffixArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compares two suffix arrays of the given sequence. Will return whichever string appears
|
||||||
|
* first in lexicographic order.
|
||||||
|
*/
|
||||||
|
private static class SuffixArrayComparator implements Comparator<Integer> {
|
||||||
|
/**
|
||||||
|
* The data source for all suffix arrays.
|
||||||
|
*/
|
||||||
|
private final String sequence;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new comparator.
|
||||||
|
* @param sequence Reference sequence to use as basis for comparison.
|
||||||
|
*/
|
||||||
|
public SuffixArrayComparator( byte[] sequence ) {
|
||||||
|
// Processing the suffix array tends to be easier as a string.
|
||||||
|
this.sequence = StringUtil.bytesToString(sequence);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compare the two given suffix arrays. Criteria for comparison is the lexicographic order of
|
||||||
|
* the two substrings sequence[lhs:], sequence[rhs:].
|
||||||
|
* @param lhs Left-hand side of comparison.
|
||||||
|
* @param rhs Right-hand side of comparison.
|
||||||
|
* @return How the suffix arrays represented by lhs, rhs compare.
|
||||||
|
*/
|
||||||
|
public int compare( Integer lhs, Integer rhs ) {
|
||||||
|
String lhsSuffixArray = sequence.substring(lhs);
|
||||||
|
String rhsSuffixArray = sequence.substring(rhs);
|
||||||
|
return lhsSuffixArray.compareTo(rhsSuffixArray);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,85 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedInputStream;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A reader for suffix arrays in permanent storage.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class SuffixArrayReader {
|
||||||
|
/**
|
||||||
|
* Input stream from which to read suffix array data.
|
||||||
|
*/
|
||||||
|
private FileInputStream inputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* BWT to use to fill in missing data.
|
||||||
|
*/
|
||||||
|
private BWT bwt;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new suffix array reader.
|
||||||
|
* @param inputFile File in which the suffix array is stored.
|
||||||
|
* @param bwt BWT to use when filling in missing data.
|
||||||
|
*/
|
||||||
|
public SuffixArrayReader(File inputFile, BWT bwt) {
|
||||||
|
try {
|
||||||
|
this.inputStream = new FileInputStream(inputFile);
|
||||||
|
this.bwt = bwt;
|
||||||
|
}
|
||||||
|
catch( FileNotFoundException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to open input file", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read a suffix array from the input stream.
|
||||||
|
* @return The suffix array stored in the input stream.
|
||||||
|
*/
|
||||||
|
public SuffixArray read() {
|
||||||
|
UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN);
|
||||||
|
|
||||||
|
long inverseSA0;
|
||||||
|
long[] occurrences;
|
||||||
|
long[] suffixArray;
|
||||||
|
int suffixArrayInterval;
|
||||||
|
|
||||||
|
try {
|
||||||
|
inverseSA0 = uintPackedInputStream.read();
|
||||||
|
occurrences = new long[PackUtils.ALPHABET_SIZE];
|
||||||
|
uintPackedInputStream.read(occurrences);
|
||||||
|
// Throw away the suffix array size in bytes and use the occurrences table directly.
|
||||||
|
suffixArrayInterval = (int)uintPackedInputStream.read();
|
||||||
|
suffixArray = new long[(int)((occurrences[occurrences.length-1]+suffixArrayInterval-1)/suffixArrayInterval)];
|
||||||
|
uintPackedInputStream.read(suffixArray);
|
||||||
|
}
|
||||||
|
catch( IOException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to read BWT from input stream.", ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new SuffixArray(inverseSA0, new Counts(occurrences,true), suffixArray, suffixArrayInterval, bwt);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the input stream.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
try {
|
||||||
|
inputStream.close();
|
||||||
|
}
|
||||||
|
catch( IOException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to close input file", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,67 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedOutputStream;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Javadoc goes here.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class SuffixArrayWriter {
|
||||||
|
/**
|
||||||
|
* Input stream from which to read suffix array data.
|
||||||
|
*/
|
||||||
|
private OutputStream outputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new suffix array reader.
|
||||||
|
* @param outputFile File in which the suffix array is stored.
|
||||||
|
*/
|
||||||
|
public SuffixArrayWriter( File outputFile ) {
|
||||||
|
try {
|
||||||
|
this.outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
|
||||||
|
}
|
||||||
|
catch( FileNotFoundException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to open input file", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a suffix array to the output stream.
|
||||||
|
* @param suffixArray suffix array to write.
|
||||||
|
*/
|
||||||
|
public void write(SuffixArray suffixArray) {
|
||||||
|
UnsignedIntPackedOutputStream uintPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN);
|
||||||
|
|
||||||
|
try {
|
||||||
|
uintPackedOutputStream.write(suffixArray.inverseSA0);
|
||||||
|
uintPackedOutputStream.write(suffixArray.occurrences.toArray(true));
|
||||||
|
// How frequently the suffix array entry is placed.
|
||||||
|
uintPackedOutputStream.write(1);
|
||||||
|
// Length of the suffix array.
|
||||||
|
uintPackedOutputStream.write(suffixArray.length()-1);
|
||||||
|
uintPackedOutputStream.write(suffixArray.sequence,1,suffixArray.sequence.length-1);
|
||||||
|
}
|
||||||
|
catch( IOException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to read BWT from input stream.", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the input stream.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
try {
|
||||||
|
outputStream.close();
|
||||||
|
}
|
||||||
|
catch( IOException ex ) {
|
||||||
|
throw new ReviewedStingException("Unable to close input file", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,95 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.packing;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
import java.nio.channels.FileChannel;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a packed version of the input stream.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class BasePackedInputStream<T> {
|
||||||
|
/**
|
||||||
|
* Type of object to unpack.
|
||||||
|
*/
|
||||||
|
private final Class<T> type;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ultimate source for packed bases.
|
||||||
|
*/
|
||||||
|
private final FileInputStream targetInputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Channel source for packed bases.
|
||||||
|
*/
|
||||||
|
private final FileChannel targetInputChannel;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A fixed-size buffer for word-packed data.
|
||||||
|
*/
|
||||||
|
private final ByteOrder byteOrder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bases are in a given packed word.
|
||||||
|
*/
|
||||||
|
private final int basesPerPackedWord = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BASE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bytes in an integer?
|
||||||
|
*/
|
||||||
|
private final int bytesPerInteger = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE;
|
||||||
|
|
||||||
|
|
||||||
|
public BasePackedInputStream( Class<T> type, File inputFile, ByteOrder byteOrder ) throws FileNotFoundException {
|
||||||
|
this(type,new FileInputStream(inputFile),byteOrder);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BasePackedInputStream( Class<T> type, FileInputStream inputStream, ByteOrder byteOrder ) {
|
||||||
|
if( type != Integer.class )
|
||||||
|
throw new ReviewedStingException("Only bases packed into 32-bit words are currently supported by this input stream. Type specified: " + type.getName());
|
||||||
|
this.type = type;
|
||||||
|
this.targetInputStream = inputStream;
|
||||||
|
this.targetInputChannel = inputStream.getChannel();
|
||||||
|
this.byteOrder = byteOrder;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read the entire contents of the input stream.
|
||||||
|
* @param bwt array into which bases should be read.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void read(byte[] bwt) throws IOException {
|
||||||
|
read(bwt,0,bwt.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read the next <code>length</code> bases into the bwt array, starting at the given offset.
|
||||||
|
* @param bwt array holding the given data.
|
||||||
|
* @param offset target position in the bases array into which bytes should be written.
|
||||||
|
* @param length number of bases to read from the stream.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void read(byte[] bwt, int offset, int length) throws IOException {
|
||||||
|
int bufferWidth = ((bwt.length+basesPerPackedWord-1)/basesPerPackedWord)*bytesPerInteger;
|
||||||
|
ByteBuffer buffer = ByteBuffer.allocate(bufferWidth).order(byteOrder);
|
||||||
|
targetInputChannel.read(buffer);
|
||||||
|
targetInputChannel.position(targetInputChannel.position()+buffer.remaining());
|
||||||
|
buffer.flip();
|
||||||
|
|
||||||
|
int packedWord = 0;
|
||||||
|
int i = 0;
|
||||||
|
while(i < length) {
|
||||||
|
if(i % basesPerPackedWord == 0) packedWord = buffer.getInt();
|
||||||
|
int position = basesPerPackedWord - i%basesPerPackedWord - 1;
|
||||||
|
bwt[offset+i++] = PackUtils.unpackBase((byte)((packedWord >> position*PackUtils.BITS_PER_BASE) & 0x3));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,140 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.packing;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A general-purpose stream for writing packed bases.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class BasePackedOutputStream<T> {
|
||||||
|
/**
|
||||||
|
* Type of object to pack.
|
||||||
|
*/
|
||||||
|
private final Class<T> type;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bases can be stored in the given data structure?
|
||||||
|
*/
|
||||||
|
private final int basesPerType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ultimate target for the packed bases.
|
||||||
|
*/
|
||||||
|
private final OutputStream targetOutputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A fixed-size buffer for word-packed data.
|
||||||
|
*/
|
||||||
|
private final ByteBuffer buffer;
|
||||||
|
|
||||||
|
public BasePackedOutputStream( Class<T> type, File outputFile, ByteOrder byteOrder ) throws FileNotFoundException {
|
||||||
|
this(type,new BufferedOutputStream(new FileOutputStream(outputFile)),byteOrder);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write packed bases to the given output stream.
|
||||||
|
* @param type Type of data to pack bases into.
|
||||||
|
* @param outputStream Output stream to which to write packed bases.
|
||||||
|
* @param byteOrder Switch between big endian / little endian when reading / writing files.
|
||||||
|
*/
|
||||||
|
public BasePackedOutputStream( Class<T> type, OutputStream outputStream, ByteOrder byteOrder) {
|
||||||
|
this.targetOutputStream = outputStream;
|
||||||
|
this.type = type;
|
||||||
|
basesPerType = PackUtils.bitsInType(type)/PackUtils.BITS_PER_BASE;
|
||||||
|
this.buffer = ByteBuffer.allocate(basesPerType/PackUtils.ALPHABET_SIZE).order(byteOrder);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes the given base to the output stream. Will write only this base; no packing will be performed.
|
||||||
|
* @param base List of bases to write.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void write( int base ) throws IOException {
|
||||||
|
write( new byte[] { (byte)base } );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes an array of bases to the target output stream.
|
||||||
|
* @param bases List of bases to write.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void write( byte[] bases ) throws IOException {
|
||||||
|
write(bases,0,bases.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes a subset of the array of bases to the output stream.
|
||||||
|
* @param bases List of bases to write.
|
||||||
|
* @param offset site at which to start writing.
|
||||||
|
* @param length number of bases to write.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void write( byte[] bases, int offset, int length ) throws IOException {
|
||||||
|
int packedBases = 0;
|
||||||
|
int positionInPack = 0;
|
||||||
|
|
||||||
|
for( int base = offset; base < offset+length; base++ ) {
|
||||||
|
packedBases = packBase(bases[base], packedBases, positionInPack);
|
||||||
|
|
||||||
|
// Increment the packed counter. If all possible bases have been squeezed into this byte, write it out.
|
||||||
|
positionInPack = ++positionInPack % basesPerType;
|
||||||
|
if( positionInPack == 0 ) {
|
||||||
|
writePackedBases(packedBases);
|
||||||
|
packedBases = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( positionInPack > 0 )
|
||||||
|
writePackedBases(packedBases);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flush the contents of the OutputStream to disk.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void flush() throws IOException {
|
||||||
|
targetOutputStream.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the given output stream.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void close() throws IOException {
|
||||||
|
targetOutputStream.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pack the given base into the basepack.
|
||||||
|
* @param base The base to pack.
|
||||||
|
* @param basePack Target for the pack operation.
|
||||||
|
* @param position Position within the pack to which to add the base.
|
||||||
|
* @return The packed integer.
|
||||||
|
*/
|
||||||
|
private int packBase( byte base, int basePack, int position ) {
|
||||||
|
basePack |= (PackUtils.packBase(base) << 2*(basesPerType-position-1));
|
||||||
|
return basePack;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write the given packed base structure to the output file.
|
||||||
|
* @param packedBases Packed bases to write.
|
||||||
|
* @throws IOException on error writing to the file.
|
||||||
|
*/
|
||||||
|
private void writePackedBases(int packedBases) throws IOException {
|
||||||
|
buffer.rewind();
|
||||||
|
if( type == Integer.class )
|
||||||
|
buffer.putInt(packedBases);
|
||||||
|
else if( type == Byte.class )
|
||||||
|
buffer.put((byte)packedBases);
|
||||||
|
else
|
||||||
|
throw new ReviewedStingException("Cannot pack bases into type " + type.getName());
|
||||||
|
targetOutputStream.write(buffer.array());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,64 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.alignment.reference.packing;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
|
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a .PAC file from a given reference.
|
||||||
|
*
|
||||||
|
* @author hanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class CreatePACFromReference {
|
||||||
|
public static void main( String argv[] ) throws IOException {
|
||||||
|
if( argv.length != 3 ) {
|
||||||
|
System.out.println("USAGE: CreatePACFromReference <input>.fasta <output pac> <output rpac>");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read in the first sequence in the input file
|
||||||
|
String inputFileName = argv[0];
|
||||||
|
File inputFile = new File(inputFileName);
|
||||||
|
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile);
|
||||||
|
ReferenceSequence sequence = reference.nextSequence();
|
||||||
|
|
||||||
|
// Target file for output
|
||||||
|
PackUtils.writeReferenceSequence( new File(argv[1]), sequence.getBases() );
|
||||||
|
|
||||||
|
// Reverse the bases in the reference
|
||||||
|
PackUtils.reverse(sequence.getBases());
|
||||||
|
|
||||||
|
// Target file for output
|
||||||
|
PackUtils.writeReferenceSequence( new File(argv[2]), sequence.getBases() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,135 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.packing;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utilities designed for packing / unpacking bases.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class PackUtils {
|
||||||
|
/**
|
||||||
|
* How many possible bases can be encoded?
|
||||||
|
*/
|
||||||
|
public static final int ALPHABET_SIZE = 4;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bits does it take to store a single base?
|
||||||
|
*/
|
||||||
|
public static final int BITS_PER_BASE = (int)(Math.log(ALPHABET_SIZE)/Math.log(2));
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bits fit into a single byte?
|
||||||
|
*/
|
||||||
|
public static final int BITS_PER_BYTE = 8;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes a reference sequence to a PAC file.
|
||||||
|
* @param outputFile Filename for the PAC file.
|
||||||
|
* @param referenceSequence Reference sequence to write.
|
||||||
|
* @throws IOException If there's a problem writing to the output file.
|
||||||
|
*/
|
||||||
|
public static void writeReferenceSequence( File outputFile, byte[] referenceSequence ) throws IOException {
|
||||||
|
OutputStream outputStream = new FileOutputStream(outputFile);
|
||||||
|
|
||||||
|
BasePackedOutputStream<Byte> basePackedOutputStream = new BasePackedOutputStream<Byte>(Byte.class, outputStream, ByteOrder.BIG_ENDIAN);
|
||||||
|
basePackedOutputStream.write(referenceSequence);
|
||||||
|
|
||||||
|
outputStream.write(referenceSequence.length%PackUtils.ALPHABET_SIZE);
|
||||||
|
|
||||||
|
outputStream.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bits can a given type hold?
|
||||||
|
* @param type Type to test.
|
||||||
|
* @return Number of bits that the given type can hold.
|
||||||
|
*/
|
||||||
|
public static int bitsInType( Class<?> type ) {
|
||||||
|
try {
|
||||||
|
long typeSize = type.getField("MAX_VALUE").getLong(null) - type.getField("MIN_VALUE").getLong(null)+1;
|
||||||
|
long intTypeSize = (long)Integer.MAX_VALUE - (long)Integer.MIN_VALUE + 1;
|
||||||
|
if( typeSize > intTypeSize )
|
||||||
|
throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName());
|
||||||
|
return (int)(Math.log(typeSize)/Math.log(2));
|
||||||
|
}
|
||||||
|
catch( NoSuchFieldException ex ) {
|
||||||
|
throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName(),ex);
|
||||||
|
}
|
||||||
|
catch( IllegalAccessException ex ) {
|
||||||
|
throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName(),ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the two-bit representation of a base. A=00b, C=01b, G=10b, T=11b.
|
||||||
|
* @param base ASCII value for the base to pack.
|
||||||
|
* @return A byte from 0-3 indicating the base's packed value.
|
||||||
|
*/
|
||||||
|
public static byte packBase(byte base) {
|
||||||
|
switch( base ) {
|
||||||
|
case 'A':
|
||||||
|
return 0;
|
||||||
|
case 'C':
|
||||||
|
return 1;
|
||||||
|
case 'G':
|
||||||
|
return 2;
|
||||||
|
case 'T':
|
||||||
|
return 3;
|
||||||
|
default:
|
||||||
|
throw new ReviewedStingException("Unknown base type: " + base);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a two-bit representation of a base into an ASCII representation of a base.
|
||||||
|
* @param pack Byte from 0-3 indicating which base is represented.
|
||||||
|
* @return An ASCII value representing the packed base.
|
||||||
|
*/
|
||||||
|
public static byte unpackBase(byte pack) {
|
||||||
|
switch( pack ) {
|
||||||
|
case 0:
|
||||||
|
return 'A';
|
||||||
|
case 1:
|
||||||
|
return 'C';
|
||||||
|
case 2:
|
||||||
|
return 'G';
|
||||||
|
case 3:
|
||||||
|
return 'T';
|
||||||
|
default:
|
||||||
|
throw new ReviewedStingException("Unknown pack type: " + pack);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reverses an unpacked sequence of bases.
|
||||||
|
* @param bases bases to reverse.
|
||||||
|
*/
|
||||||
|
public static void reverse( byte[] bases ) {
|
||||||
|
for( int i = 0, j = bases.length-1; i < j; i++, j-- ) {
|
||||||
|
byte temp = bases[j];
|
||||||
|
bases[j] = bases[i];
|
||||||
|
bases[i] = temp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a structure of size <code>size</code> that should be split
|
||||||
|
* into <code>partitionSize</code> partitions, how many partitions should
|
||||||
|
* be created? Size of last partition will be <= partitionSize.
|
||||||
|
* @param size Total size of the data structure.
|
||||||
|
* @param partitionSize Size of an individual partition.
|
||||||
|
* @return Number of partitions that would be created.
|
||||||
|
*/
|
||||||
|
public static int numberOfPartitions( long size, long partitionSize ) {
|
||||||
|
return (int)((size+partitionSize-1) / partitionSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,104 @@
|
||||||
|
package org.broadinstitute.sting.alignment.reference.packing;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
import java.nio.channels.FileChannel;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read a set of integers packed into
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class UnsignedIntPackedInputStream {
|
||||||
|
/**
|
||||||
|
* Ultimate target for the occurrence array.
|
||||||
|
*/
|
||||||
|
private final FileInputStream targetInputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Target channel from which to pull file data.
|
||||||
|
*/
|
||||||
|
private final FileChannel targetInputChannel;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The byte order in which integer input data appears.
|
||||||
|
*/
|
||||||
|
private final ByteOrder byteOrder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bytes are required to store an integer?
|
||||||
|
*/
|
||||||
|
private final int bytesPerInteger = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new PackedIntInputStream, writing to the given target file.
|
||||||
|
* @param inputFile target input file.
|
||||||
|
* @param byteOrder Endianness to use when writing a list of integers.
|
||||||
|
* @throws java.io.IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public UnsignedIntPackedInputStream(File inputFile, ByteOrder byteOrder) throws IOException {
|
||||||
|
this(new FileInputStream(inputFile),byteOrder);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read ints from the given InputStream.
|
||||||
|
* @param inputStream Input stream from which to read ints.
|
||||||
|
* @param byteOrder Endianness to use when writing a list of integers.
|
||||||
|
*/
|
||||||
|
public UnsignedIntPackedInputStream(FileInputStream inputStream, ByteOrder byteOrder) {
|
||||||
|
this.targetInputStream = inputStream;
|
||||||
|
this.targetInputChannel = inputStream.getChannel();
|
||||||
|
this.byteOrder = byteOrder;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read a datum from the input stream.
|
||||||
|
* @return The next input datum in the stream.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public long read() throws IOException {
|
||||||
|
long[] data = new long[1];
|
||||||
|
read(data);
|
||||||
|
return data[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read the data from the input stream.
|
||||||
|
* @param data placeholder for input data.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void read( long[] data ) throws IOException {
|
||||||
|
read( data, 0, data.length );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read the data from the input stream, starting at the given offset.
|
||||||
|
* @param data placeholder for input data.
|
||||||
|
* @param offset place in the array to start reading in data.
|
||||||
|
* @param length number of ints to read in.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void read( long[] data, int offset, int length ) throws IOException {
|
||||||
|
ByteBuffer readBuffer = ByteBuffer.allocate(bytesPerInteger*length).order(byteOrder);
|
||||||
|
|
||||||
|
targetInputChannel.read(readBuffer,targetInputChannel.position());
|
||||||
|
readBuffer.flip();
|
||||||
|
targetInputChannel.position(targetInputChannel.position()+readBuffer.remaining());
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
while(i < length)
|
||||||
|
data[offset+i++] = readBuffer.getInt() & 0xFFFFFFFFL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the given output stream.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void close() throws IOException {
|
||||||
|
targetInputStream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,121 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.alignment.reference.packing;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes an list of integers to the output file.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class UnsignedIntPackedOutputStream {
|
||||||
|
/**
|
||||||
|
* Ultimate target for the occurrence array.
|
||||||
|
*/
|
||||||
|
private final OutputStream targetOutputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A fixed-size buffer for int-packed data.
|
||||||
|
*/
|
||||||
|
private final ByteBuffer buffer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new PackedIntOutputStream, writing to the given target file.
|
||||||
|
* @param outputFile target output file.
|
||||||
|
* @param byteOrder Endianness to use when writing a list of integers.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public UnsignedIntPackedOutputStream(File outputFile, ByteOrder byteOrder) throws IOException {
|
||||||
|
this(new FileOutputStream(outputFile),byteOrder);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write packed ints to the given OutputStream.
|
||||||
|
* @param outputStream Output stream to which to write packed ints.
|
||||||
|
* @param byteOrder Endianness to use when writing a list of integers.
|
||||||
|
*/
|
||||||
|
public UnsignedIntPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) {
|
||||||
|
this.targetOutputStream = outputStream;
|
||||||
|
buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write the data to the output stream.
|
||||||
|
* @param datum datum to write.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void write( long datum ) throws IOException {
|
||||||
|
buffer.rewind();
|
||||||
|
buffer.putInt((int)datum);
|
||||||
|
targetOutputStream.write(buffer.array());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write the data to the output stream.
|
||||||
|
* @param data data to write. occurrences.length must match alphabet size.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void write( long[] data ) throws IOException {
|
||||||
|
for(long datum: data)
|
||||||
|
write(datum);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write the given chunk of data to the input stream.
|
||||||
|
* @param data data to write.
|
||||||
|
* @param offset position at which to start.
|
||||||
|
* @param length number of ints to write.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void write( long[] data, int offset, int length ) throws IOException {
|
||||||
|
for( int i = offset; i < offset+length; i++ )
|
||||||
|
write(data[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flush the contents of the OutputStream to disk.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void flush() throws IOException {
|
||||||
|
targetOutputStream.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the given output stream.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void close() throws IOException {
|
||||||
|
targetOutputStream.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -25,20 +25,21 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.analyzecovariates;
|
package org.broadinstitute.sting.analyzecovariates;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.*;
|
import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import java.io.*;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
|
||||||
|
|
@ -25,12 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.commandline;
|
package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
import java.lang.annotation.Documented;
|
import java.lang.annotation.*;
|
||||||
import java.lang.annotation.ElementType;
|
|
||||||
import java.lang.annotation.Inherited;
|
|
||||||
import java.lang.annotation.Retention;
|
|
||||||
import java.lang.annotation.RetentionPolicy;
|
|
||||||
import java.lang.annotation.Target;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
|
||||||
|
|
@ -27,10 +27,10 @@ package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A group of argument definitions.
|
* A group of argument definitions.
|
||||||
|
|
|
||||||
|
|
@ -27,10 +27,10 @@ package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A collection of argument definitions.
|
* A collection of argument definitions.
|
||||||
|
|
|
||||||
|
|
@ -25,12 +25,12 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.commandline;
|
package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Multiplex;
|
import org.broadinstitute.sting.gatk.walkers.Multiplex;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Multiplexer;
|
import org.broadinstitute.sting.gatk.walkers.Multiplexer;
|
||||||
import org.apache.log4j.Logger;
|
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.lang.annotation.Annotation;
|
import java.lang.annotation.Annotation;
|
||||||
|
|
|
||||||
|
|
@ -25,14 +25,20 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.commandline;
|
package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
import org.apache.log4j.*;
|
import org.apache.log4j.FileAppender;
|
||||||
|
import org.apache.log4j.Level;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.apache.log4j.PatternLayout;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||||
import org.broadinstitute.sting.utils.help.HelpFormatter;
|
import org.broadinstitute.sting.utils.help.HelpFormatter;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.EnumSet;
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
public abstract class CommandLineProgram {
|
public abstract class CommandLineProgram {
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,11 +25,17 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.commandline;
|
package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
import org.apache.log4j.*;
|
import org.apache.log4j.Appender;
|
||||||
|
import org.apache.log4j.ConsoleAppender;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.apache.log4j.PatternLayout;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
import java.lang.annotation.Annotation;
|
import java.lang.annotation.Annotation;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Enumeration;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Static utility methods for working with command-line arguments.
|
* Static utility methods for working with command-line arguments.
|
||||||
|
|
|
||||||
|
|
@ -26,8 +26,6 @@ package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
import java.util.Collection;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Specifies that a value was missing when attempting to populate an argument.
|
* Specifies that a value was missing when attempting to populate an argument.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -25,16 +25,16 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.commandline;
|
package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
|
||||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||||
import org.broadinstitute.sting.utils.help.HelpFormatter;
|
import org.broadinstitute.sting.utils.help.HelpFormatter;
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
|
|
||||||
import java.lang.reflect.*;
|
import java.lang.reflect.Field;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -27,10 +27,8 @@ package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.List;
|
import java.util.regex.Pattern;
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds a pattern, along with how to get to the argument definitions that could match that pattern.
|
* Holds a pattern, along with how to get to the argument definitions that could match that pattern.
|
||||||
|
|
|
||||||
|
|
@ -25,30 +25,21 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk;
|
package org.broadinstitute.sting.gatk;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Tags;
|
|
||||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
|
||||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
|
||||||
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor;
|
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||||
|
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||||
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
|
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
|
||||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
|
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
|
||||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
|
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
|
||||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
import net.sf.picard.filter.SamRecordFilter;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
||||||
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author aaron
|
* @author aaron
|
||||||
|
|
|
||||||
|
|
@ -26,13 +26,15 @@
|
||||||
package org.broadinstitute.sting.gatk;
|
package org.broadinstitute.sting.gatk;
|
||||||
|
|
||||||
import org.broad.tribble.TribbleException;
|
import org.broad.tribble.TribbleException;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Attribution;
|
import org.broadinstitute.sting.gatk.walkers.Attribution;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
||||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
|
||||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
|
||||||
import org.broadinstitute.sting.commandline.*;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||||
|
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,24 +28,14 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.commandline.ArgumentException;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.commandline.ArgumentSource;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
|
||||||
import org.broadinstitute.sting.commandline.ParsingEngine;
|
|
||||||
import org.broadinstitute.sting.commandline.Tags;
|
|
||||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
import org.broadinstitute.sting.gatk.datasources.reads.*;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.MonolithicShardStrategy;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.executive.MicroScheduler;
|
import org.broadinstitute.sting.gatk.executive.MicroScheduler;
|
||||||
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
||||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||||
|
|
@ -65,6 +55,7 @@ import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
|
||||||
|
|
@ -25,13 +25,12 @@
|
||||||
package org.broadinstitute.sting.gatk;
|
package org.broadinstitute.sting.gatk;
|
||||||
|
|
||||||
import net.sf.picard.filter.SamRecordFilter;
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Collections;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds a bunch of basic information about the traversal.
|
* Holds a bunch of basic information about the traversal.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
package org.broadinstitute.sting.gatk;
|
package org.broadinstitute.sting.gatk;
|
||||||
|
|
||||||
import net.sf.picard.filter.SamRecordFilter;
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
|
|
||||||
|
|
@ -25,19 +25,18 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk;
|
package org.broadinstitute.sting.gatk;
|
||||||
|
|
||||||
import net.sf.picard.filter.SamRecordFilter;
|
|
||||||
import org.broadinstitute.sting.commandline.Hidden;
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
||||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.help.DescriptionTaglet;
|
import org.broadinstitute.sting.utils.help.DescriptionTaglet;
|
||||||
import org.broadinstitute.sting.utils.help.DisplayNameTaglet;
|
import org.broadinstitute.sting.utils.help.DisplayNameTaglet;
|
||||||
import org.broadinstitute.sting.utils.help.SummaryTaglet;
|
import org.broadinstitute.sting.utils.help.SummaryTaglet;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,17 +26,16 @@
|
||||||
package org.broadinstitute.sting.gatk.arguments;
|
package org.broadinstitute.sting.gatk.arguments;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
import org.broadinstitute.sting.commandline.Hidden;
|
|
||||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||||
|
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||||
import org.simpleframework.xml.*;
|
import org.simpleframework.xml.*;
|
||||||
import org.simpleframework.xml.core.Persister;
|
import org.simpleframework.xml.core.Persister;
|
||||||
import org.simpleframework.xml.stream.Format;
|
import org.simpleframework.xml.stream.Format;
|
||||||
|
|
|
||||||
|
|
@ -26,13 +26,13 @@
|
||||||
package org.broadinstitute.sting.gatk.contexts;
|
package org.broadinstitute.sting.gatk.contexts;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Useful class for forwarding on locusContext data from this iterator
|
* Useful class for forwarding on locusContext data from this iterator
|
||||||
|
|
|
||||||
|
|
@ -27,8 +27,8 @@ package org.broadinstitute.sting.gatk.contexts;
|
||||||
|
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.pileup.*;
|
import org.broadinstitute.sting.utils.pileup.*;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,11 +25,12 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.contexts;
|
package org.broadinstitute.sting.gatk.contexts;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import com.google.java.contract.Ensures;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import com.google.java.contract.Requires;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
|
||||||
import net.sf.samtools.util.StringUtil;
|
import net.sf.samtools.util.StringUtil;
|
||||||
import com.google.java.contract.*;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The section of the reference that overlaps with the given
|
* The section of the reference that overlaps with the given
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,14 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 13, 2009
|
* Date: May 13, 2009
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,12 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Reference;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,15 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Presents data sharded by locus to the traversal engine.
|
* Presents data sharded by locus to the traversal engine.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
|
||||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||||
|
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 21, 2009
|
* Date: May 21, 2009
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,8 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,13 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Present data sharded by read to a traversal engine.
|
* Present data sharded by read to a traversal engine.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,10 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 22, 2009
|
* Date: May 22, 2009
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,16 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 22, 2009
|
* Date: May 22, 2009
|
||||||
|
|
|
||||||
|
|
@ -25,9 +25,9 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,14 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 8, 2009
|
* Date: May 8, 2009
|
||||||
|
|
|
||||||
|
|
@ -24,9 +24,6 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileReader;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.apache.commons.lang.ArrayUtils;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -35,9 +32,7 @@ import java.io.IOException;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.ByteOrder;
|
import java.nio.ByteOrder;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
|
||||||
|
|
@ -39,12 +39,7 @@ import java.io.RandomAccessFile;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.ByteOrder;
|
import java.nio.ByteOrder;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.BitSet;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Writes schedules for a single BAM file to a target output file.
|
* Writes schedules for a single BAM file to a target output file.
|
||||||
|
|
|
||||||
|
|
@ -26,20 +26,10 @@ package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
import net.sf.picard.util.PeekableIterator;
|
import net.sf.picard.util.PeekableIterator;
|
||||||
import net.sf.samtools.GATKBAMFileSpan;
|
import net.sf.samtools.GATKBAMFileSpan;
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
|
|
||||||
import java.io.File;
|
import java.util.*;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.NoSuchElementException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assign intervals to the most appropriate blocks, keeping as little as possible in memory at once.
|
* Assign intervals to the most appropriate blocks, keeping as little as possible in memory at once.
|
||||||
|
|
|
||||||
|
|
@ -29,19 +29,11 @@ import net.sf.samtools.GATKBAMFileSpan;
|
||||||
import net.sf.samtools.SAMFileSpan;
|
import net.sf.samtools.SAMFileSpan;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.SortedMap;
|
|
||||||
import java.util.TreeMap;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a small section of a BAM file, and every associated interval.
|
* Represents a small section of a BAM file, and every associated interval.
|
||||||
|
|
|
||||||
|
|
@ -23,24 +23,18 @@
|
||||||
*/
|
*/
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
import net.sf.samtools.Bin;
|
import net.sf.samtools.*;
|
||||||
|
|
||||||
import net.sf.samtools.GATKBAMFileSpan;
|
|
||||||
import net.sf.samtools.GATKBin;
|
|
||||||
import net.sf.samtools.GATKChunk;
|
|
||||||
import net.sf.samtools.LinearIndex;
|
|
||||||
import net.sf.samtools.SAMException;
|
|
||||||
import net.sf.samtools.util.RuntimeIOException;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.ref.SoftReference;
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.ByteOrder;
|
import java.nio.ByteOrder;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A basic interface for querying BAM indices.
|
* A basic interface for querying BAM indices.
|
||||||
|
|
|
||||||
|
|
@ -24,11 +24,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
import net.sf.samtools.Bin;
|
import net.sf.samtools.*;
|
||||||
import net.sf.samtools.GATKBAMFileSpan;
|
|
||||||
import net.sf.samtools.GATKBin;
|
|
||||||
import net.sf.samtools.GATKChunk;
|
|
||||||
import net.sf.samtools.LinearIndex;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
|
|
||||||
|
|
@ -35,16 +35,7 @@ import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.NoSuchElementException;
|
|
||||||
import java.util.PriorityQueue;
|
|
||||||
import java.util.Queue;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shard intervals based on position within the BAM file.
|
* Shard intervals based on position within the BAM file.
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileSpan;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
@ -7,8 +8,6 @@ import org.broadinstitute.sting.utils.Utils;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileSpan;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handles locus shards of BAM information.
|
* Handles locus shards of BAM information.
|
||||||
* @author aaron
|
* @author aaron
|
||||||
|
|
|
||||||
|
|
@ -24,17 +24,18 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
import net.sf.samtools.GATKBAMFileSpan;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMFileSpan;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import java.util.List;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import java.util.Map;
|
||||||
import net.sf.samtools.SAMFileSpan;
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A sharding strategy for loci based on reading of the index.
|
* A sharding strategy for loci based on reading of the index.
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,8 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.NoSuchElementException;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a giant shard representing all the data in the input BAM(s).
|
* Create a giant shard representing all the data in the input BAM(s).
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,17 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileSpan;
|
import net.sf.samtools.SAMFileSpan;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* User: aaron
|
* User: aaron
|
||||||
|
|
|
||||||
|
|
@ -25,12 +25,11 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileSpan;
|
import net.sf.samtools.SAMFileSpan;
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The sharding strategy for reads using a simple counting mechanism. Each read shard
|
* The sharding strategy for reads using a simple counting mechanism. Each read shard
|
||||||
* has a specific number of reads (default to 10K) which is configured in the constructor.
|
* has a specific number of reads (default to 10K) which is configured in the constructor.
|
||||||
|
|
|
||||||
|
|
@ -24,21 +24,19 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import net.sf.picard.sam.MergingSamRecordIterator;
|
||||||
|
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
import net.sf.picard.filter.SamRecordFilter;
|
|
||||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
|
||||||
import net.sf.picard.sam.MergingSamRecordIterator;
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.*;
|
|
||||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
|
||||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||||
|
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
|
import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.*;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
|
|
|
||||||
|
|
@ -2,17 +2,13 @@ package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileSpan;
|
import net.sf.samtools.SAMFileSpan;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,10 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -25,17 +25,17 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads.utilities;
|
package org.broadinstitute.sting.gatk.datasources.reads.utilities;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
import net.sf.samtools.BAMIndex;
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.instrumentation.Sizeof;
|
import org.broadinstitute.sting.utils.instrumentation.Sizeof;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.lang.reflect.Field;
|
import java.lang.reflect.Field;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import net.sf.samtools.*;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -24,12 +24,12 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads.utilities;
|
package org.broadinstitute.sting.gatk.datasources.reads.utilities;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
|
||||||
import net.sf.samtools.SAMFileWriter;
|
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMFileWriter;
|
||||||
import net.sf.samtools.SAMFileWriterFactory;
|
import net.sf.samtools.SAMFileWriterFactory;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,6 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads.utilities;
|
package org.broadinstitute.sting.gatk.datasources.reads.utilities;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileReader;
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,16 +25,15 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.reference;
|
package org.broadinstitute.sting.gatk.datasources.reference;
|
||||||
|
|
||||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
import net.sf.picard.reference.FastaSequenceIndexBuilder;
|
|
||||||
import net.sf.picard.sam.CreateSequenceDictionary;
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|
||||||
import net.sf.picard.reference.FastaSequenceIndex;
|
import net.sf.picard.reference.FastaSequenceIndex;
|
||||||
|
import net.sf.picard.reference.FastaSequenceIndexBuilder;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import net.sf.picard.sam.CreateSequenceDictionary;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.utils.file.FSLockWithShared;
|
import org.broadinstitute.sting.utils.file.FSLockWithShared;
|
||||||
import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;
|
import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;
|
||||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue