diff --git a/build.xml b/build.xml index 91eb1f8e9..fe4c7a3f4 100644 --- a/build.xml +++ b/build.xml @@ -70,8 +70,6 @@ - - @@ -147,11 +145,7 @@ - - - - - @@ -179,13 +173,23 @@ - - + + + + + - + + + + + + + + @@ -215,12 +219,6 @@ - - - - - - @@ -371,7 +369,6 @@ - @@ -388,7 +385,6 @@ - @@ -696,7 +692,6 @@ - @@ -1046,6 +1041,7 @@ + diff --git a/ivy.xml b/ivy.xml index ce724bc3c..3f3d1c97f 100644 --- a/ivy.xml +++ b/ivy.xml @@ -48,6 +48,9 @@ + + + diff --git a/public/c/SeparateQltout.cc b/public/c/SeparateQltout.cc new file mode 100644 index 000000000..7644c9603 --- /dev/null +++ b/public/c/SeparateQltout.cc @@ -0,0 +1,70 @@ +#include "MainTools.h" +#include "Basevector.h" +#include "lookup/LookAlign.h" +#include "lookup/SerialQltout.h" + +unsigned int MatchingEnd(look_align &la, vecbasevector &candidates, vecbasevector &ref) { + //la.PrintParseable(cout); + + for (int i = 0; i < candidates.size(); i++) { + look_align newla = la; + + if (newla.rc1) { candidates[i].ReverseComplement(); } + newla.ResetFromAlign(newla.a, candidates[i], ref[la.target_id]); + + //newla.PrintParseable(cout, &candidates[i], &ref[newla.target_id]); + //cout << newla.Errors() << " " << la.Errors() << endl; + + if (newla.Errors() == la.Errors()) { + return i; + } + } + + //FatalErr("Query id " + ToString(la.query_id) + " had no matches."); + + return candidates.size() + 1; +} + +int main(int argc, char **argv) { + RunTime(); + + BeginCommandArguments; + CommandArgument_String(ALIGNS); + CommandArgument_String(FASTB_END_1); + CommandArgument_String(FASTB_END_2); + CommandArgument_String(REFERENCE); + + CommandArgument_String(ALIGNS_END_1_OUT); + CommandArgument_String(ALIGNS_END_2_OUT); + EndCommandArguments; + + vecbasevector ref(REFERENCE); + vecbasevector reads1(FASTB_END_1); + vecbasevector reads2(FASTB_END_2); + + ofstream aligns1stream(ALIGNS_END_1_OUT.c_str()); + ofstream aligns2stream(ALIGNS_END_2_OUT.c_str()); + + basevector bv; + + SerialQltout sqltout(ALIGNS); + look_align la; + while (sqltout.Next(la)) { + vecbasevector candidates(2); + candidates[0] = reads1[la.query_id]; + candidates[1] = reads2[la.query_id]; + + unsigned int matchingend = MatchingEnd(la, candidates, ref); + if (matchingend < 2) { + bv = (matchingend == 0) ? reads1[la.query_id] : reads2[la.query_id]; + + //la.PrintParseable(cout, &bv, &ref[la.target_id]); + la.PrintParseable(((matchingend == 0) ? aligns1stream : aligns2stream), &bv, &ref[la.target_id]); + } + } + + aligns1stream.close(); + aligns2stream.close(); + + return 0; +} diff --git a/public/c/bwa/Makefile b/public/c/bwa/Makefile new file mode 100644 index 000000000..6399a0e6d --- /dev/null +++ b/public/c/bwa/Makefile @@ -0,0 +1,21 @@ +CXX=g++ +CXXFLAGS=-g -Wall -O2 -m64 -fPIC + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -I$(BWA_HOME) -I$(JAVA_INCLUDE) $< -o $@ + +all: init lib + +init: + @echo Please make sure the following platforms are set correctly on your machine. + @echo BWA_HOME=$(BWA_HOME) + @echo JAVA_INCLUDE=$(JAVA_INCLUDE) + @echo TARGET_LIB=$(TARGET_LIB) + @echo EXTRA_LIBS=$(EXTRA_LIBS) + @echo LIBTOOL_COMMAND=$(LIBTOOL_COMMAND) + +lib: org_broadinstitute_sting_alignment_bwa_c_BWACAligner.o bwa_gateway.o + $(LIBTOOL_COMMAND) $? -o $(TARGET_LIB) -L$(BWA_HOME) -lbwacore $(EXTRA_LIBS) + +clean: + rm *.o libbwa.* diff --git a/public/c/bwa/build_linux.sh b/public/c/bwa/build_linux.sh new file mode 100755 index 000000000..b3631a28d --- /dev/null +++ b/public/c/bwa/build_linux.sh @@ -0,0 +1,7 @@ +#!/bin/sh +export BWA_HOME="/humgen/gsa-scr1/hanna/src/bwa-trunk/bwa" +export JAVA_INCLUDE="/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include -I/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include/linux" +export TARGET_LIB="libbwa.so" +export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread" +export LIBTOOL_COMMAND="g++ -shared -Wl,-soname,libbwa.so" +make diff --git a/public/c/bwa/build_mac.sh b/public/c/bwa/build_mac.sh new file mode 100644 index 000000000..bfed900bb --- /dev/null +++ b/public/c/bwa/build_mac.sh @@ -0,0 +1,7 @@ +#!/bin/sh +export BWA_HOME="/Users/mhanna/src/bwa" +export JAVA_INCLUDE="/System/Library/Frameworks/JavaVM.framework/Headers" +export TARGET_LIB="libbwa.dylib" +export EXTRA_LIBS="-lc -lz -lsupc++" +export LIBTOOL_COMMAND="libtool -dynamic" +make diff --git a/public/c/bwa/bwa_gateway.cpp b/public/c/bwa/bwa_gateway.cpp new file mode 100644 index 000000000..00f5aa5bc --- /dev/null +++ b/public/c/bwa/bwa_gateway.cpp @@ -0,0 +1,277 @@ +#include +#include +#include + +#include "bwase.h" +#include "bwa_gateway.h" + +BWA::BWA(const char* ann_filename, + const char* amb_filename, + const char* pac_filename, + const char* forward_bwt_filename, + const char* forward_sa_filename, + const char* reverse_bwt_filename, + const char* reverse_sa_filename) +{ + // Load the bns (?) and reference + bns = bns_restore_core(ann_filename,amb_filename,pac_filename); + reference = new ubyte_t[bns->l_pac/4+1]; + rewind(bns->fp_pac); + fread(reference, 1, bns->l_pac/4+1, bns->fp_pac); + fclose(bns->fp_pac); + bns->fp_pac = NULL; + + // Load the BWTs (both directions) and suffix arrays (both directions) + bwts[0] = bwt_restore_bwt(forward_bwt_filename); + bwt_restore_sa(forward_sa_filename, bwts[0]); + bwts[1] = bwt_restore_bwt(reverse_bwt_filename); + bwt_restore_sa(reverse_sa_filename, bwts[1]); + load_default_options(); + + // Always reinitialize the random seed whenever a new set of files are loaded. + initialize_random_seed(); + + // initialize the bwase subsystem + bwase_initialize(); +} + +BWA::~BWA() { + delete[] reference; + bns_destroy(bns); + bwt_destroy(bwts[0]); + bwt_destroy(bwts[1]); +} + +void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& paths, unsigned& num_paths, unsigned& best_path_count, unsigned& second_best_path_count) +{ + bwa_seq_t* sequence = create_sequence(bases, read_length); + + // Calculate the suffix array interval for each sequence, storing the result in sequence->aln (and sequence->n_aln). + // This method will destroy the contents of seq and rseq. + bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options); + + paths = new bwt_aln1_t[sequence->n_aln]; + memcpy(paths,sequence->aln,sequence->n_aln*sizeof(bwt_aln1_t)); + num_paths = sequence->n_aln; + + // Call aln2seq to initialize the type of match present. + bwa_aln2seq(sequence->n_aln,sequence->aln,sequence); + best_path_count = sequence->c1; + second_best_path_count = sequence->c2; + + bwa_free_read_seq(1,sequence); +} + +Alignment* BWA::generate_single_alignment(const char* bases, const unsigned read_length) { + bwa_seq_t* sequence = create_sequence(bases,read_length); + + // Calculate paths. + bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options); + + // Check for no alignments found and return null. + if(sequence->n_aln == 0) { + bwa_free_read_seq(1,sequence); + return NULL; + } + + // bwa_cal_sa_reg_gap destroys the bases / read length. Copy them back in. + copy_bases_into_sequence(sequence,bases,read_length); + + // Pick best alignment and propagate its information into the sequence. + bwa_aln2seq(sequence->n_aln,sequence->aln,sequence); + + // Generate the best alignment from the sequence. + Alignment* alignment = new Alignment; + *alignment = generate_final_alignment_from_sequence(sequence); + + bwa_free_read_seq(1,sequence); + + return alignment; +} + +void BWA::generate_alignments_from_paths(const char* bases, + const unsigned read_length, + bwt_aln1_t* paths, + const unsigned num_paths, + const unsigned best_count, + const unsigned second_best_count, + Alignment*& alignments, + unsigned& num_alignments) +{ + bwa_seq_t* sequence = create_sequence(bases,read_length); + + sequence->aln = paths; + sequence->n_aln = num_paths; + + // (Ab)use bwa_aln2seq to propagate values stored in the path out into the sequence itself. + bwa_aln2seq(sequence->n_aln,sequence->aln,sequence); + + // But overwrite key parts of the sequence in case the user passed back only a smaller subset + // of the paths. + sequence->c1 = best_count; + sequence->c2 = second_best_count; + sequence->type = sequence->c1 > 1 ? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE; + + num_alignments = 0; + for(unsigned i = 0; i < (unsigned)sequence->n_aln; i++) + num_alignments += (sequence->aln + i)->l - (sequence->aln + i)->k + 1; + + alignments = new Alignment[num_alignments]; + unsigned alignment_idx = 0; + + for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) { + // Stub in a 'working' path, so that only the desired alignment is local-aligned. + const bwt_aln1_t* path = paths + path_idx; + bwt_aln1_t working_path = *path; + + // Loop through all alignments, aligning each one individually. + for(unsigned sa_idx = path->k; sa_idx <= path->l; sa_idx++) { + working_path.k = working_path.l = sa_idx; + sequence->aln = &working_path; + sequence->n_aln = 1; + + sequence->sa = sa_idx; + sequence->strand = path->a; + sequence->score = path->score; + + // Each time through bwa_refine_gapped, seq gets reversed. Revert the reverse. + // TODO: Fix the interface to bwa_refine_gapped so its easier to work with. + if(alignment_idx > 0) + seq_reverse(sequence->len, sequence->seq, 0); + + // Copy the local alignment data into the alignment object. + *(alignments + alignment_idx) = generate_final_alignment_from_sequence(sequence); + + alignment_idx++; + } + } + + sequence->aln = NULL; + sequence->n_aln = 0; + + bwa_free_read_seq(1,sequence); +} + +Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) { + // Calculate the local coordinate and local alignment. + bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr); + bwa_refine_gapped(bns, 1, sequence, reference, NULL); + + // Copy the local alignment data into the alignment object. + Alignment alignment; + + // Populate basic path info + alignment.edit_distance = sequence->nm; + alignment.num_mismatches = sequence->n_mm; + alignment.num_gap_opens = sequence->n_gapo; + alignment.num_gap_extensions = sequence->n_gape; + alignment.num_best = sequence->c1; + alignment.num_second_best = sequence->c2; + + // Final alignment position. + alignment.type = sequence->type; + bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig); + alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1; + alignment.negative_strand = sequence->strand; + alignment.mapping_quality = sequence->mapQ; + + // Cigar step. + alignment.cigar = NULL; + if(sequence->cigar) { + alignment.cigar = new uint16_t[sequence->n_cigar]; + memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t)); + } + alignment.n_cigar = sequence->n_cigar; + + // MD tag with a better breakdown of differences in the cigar + alignment.md = strdup(sequence->md); + delete[] sequence->md; + sequence->md = NULL; + + return alignment; +} + +void BWA::load_default_options() +{ + options.s_mm = 3; + options.s_gapo = 11; + options.s_gape = 4; + options.mode = 3; + options.indel_end_skip = 5; + options.max_del_occ = 10; + options.max_entries = 2000000; + options.fnr = 0.04; + options.max_diff = -1; + options.max_gapo = 1; + options.max_gape = 6; + options.max_seed_diff = 2; + options.seed_len = 2147483647; + options.n_threads = 1; + options.max_top2 = 30; + options.trim_qual = 0; +} + +void BWA::initialize_random_seed() +{ + srand48(bns->seed); +} + +void BWA::set_max_edit_distance(float edit_distance) { + if(edit_distance > 0 && edit_distance < 1) { + options.fnr = edit_distance; + options.max_diff = -1; + } + else { + options.fnr = -1.0; + options.max_diff = (int)edit_distance; + } +} + +void BWA::set_max_gap_opens(int max_gap_opens) { options.max_gapo = max_gap_opens; } +void BWA::set_max_gap_extensions(int max_gap_extensions) { options.max_gape = max_gap_extensions; } +void BWA::set_disallow_indel_within_range(int indel_range) { options.indel_end_skip = indel_range; } +void BWA::set_mismatch_penalty(int penalty) { options.s_mm = penalty; } +void BWA::set_gap_open_penalty(int penalty) { options.s_gapo = penalty; } +void BWA::set_gap_extension_penalty(int penalty) { options.s_gape = penalty; } + +/** + * Create a sequence with a set of reasonable initial defaults. + * Will leave seq and rseq empty. + */ +bwa_seq_t* BWA::create_sequence(const char* bases, const unsigned read_length) +{ + bwa_seq_t* sequence = new bwa_seq_t; + + sequence->tid = -1; + + sequence->name = 0; + + copy_bases_into_sequence(sequence, bases, read_length); + + sequence->qual = 0; + sequence->aln = 0; + sequence->md = 0; + + sequence->cigar = NULL; + sequence->n_cigar = 0; + + sequence->multi = NULL; + sequence->n_multi = 0; + + return sequence; +} + +void BWA::copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length) +{ + // seq, rseq will ultimately be freed by bwa_cal_sa_reg_gap + sequence->seq = new ubyte_t[read_length]; + sequence->rseq = new ubyte_t[read_length]; + for(unsigned i = 0; i < read_length; i++) sequence->seq[i] = nst_nt4_table[(unsigned)bases[i]]; + memcpy(sequence->rseq,sequence->seq,read_length); + + // BWA expects the read bases to arrive reversed. + seq_reverse(read_length,sequence->seq,0); + seq_reverse(read_length,sequence->rseq,1); + + sequence->full_len = sequence->len = read_length; +} diff --git a/public/c/bwa/bwa_gateway.h b/public/c/bwa/bwa_gateway.h new file mode 100644 index 000000000..2d26ec650 --- /dev/null +++ b/public/c/bwa/bwa_gateway.h @@ -0,0 +1,83 @@ +#ifndef BWA_GATEWAY +#define BWA_GATEWAY + +#include + +#include "bntseq.h" +#include "bwt.h" +#include "bwtaln.h" + +class Alignment { + public: + uint32_t type; + int contig; + bwtint_t pos; + bool negative_strand; + uint32_t mapping_quality; + + uint16_t *cigar; + int n_cigar; + + uint8_t num_mismatches; + uint8_t num_gap_opens; + uint8_t num_gap_extensions; + uint16_t edit_distance; + + uint32_t num_best; + uint32_t num_second_best; + + char* md; +}; + +class BWA { + private: + bntseq_t *bns; + ubyte_t* reference; + bwt_t* bwts[2]; + gap_opt_t options; + + void load_default_options(); + void initialize_random_seed(); + bwa_seq_t* create_sequence(const char* bases, const unsigned read_length); + void copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length); + Alignment generate_final_alignment_from_sequence(bwa_seq_t* sequence); + + public: + BWA(const char* ann_filename, + const char* amb_filename, + const char* pac_filename, + const char* forward_bwt_filename, + const char* forward_sa_filename, + const char* reverse_bwt_filename, + const char* reverse_sa_filename); + ~BWA(); + + // Parameterize the aligner. + void set_max_edit_distance(float edit_distance); + void set_max_gap_opens(int max_gap_opens); + void set_max_gap_extensions(int max_gap_extensions); + void set_disallow_indel_within_range(int indel_range); + void set_mismatch_penalty(int penalty); + void set_gap_open_penalty(int penalty); + void set_gap_extension_penalty(int penalty); + + // Perform the alignment + Alignment* generate_single_alignment(const char* bases, + const unsigned read_length); + void find_paths(const char* bases, + const unsigned read_length, + bwt_aln1_t*& paths, + unsigned& num_paths, + unsigned& best_path_count, + unsigned& second_best_path_count); + void generate_alignments_from_paths(const char* bases, + const unsigned read_length, + bwt_aln1_t* paths, + const unsigned num_paths, + const unsigned best_count, + const unsigned second_best_count, + Alignment*& alignments, + unsigned& num_alignments); +}; + +#endif // BWA_GATEWAY diff --git a/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp new file mode 100644 index 000000000..1ccbef0d4 --- /dev/null +++ b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp @@ -0,0 +1,437 @@ +#include +#include +#include + +#include "bntseq.h" +#include "bwt.h" +#include "bwtaln.h" +#include "bwa_gateway.h" +#include "org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h" + +typedef void (BWA::*int_setter)(int value); +typedef void (BWA::*float_setter)(float value); + +static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment); +static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name); +static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter); +static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter); +static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message); + +JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create(JNIEnv* env, jobject instance, jobject bwtFiles, jobject configuration) +{ + jstring java_ann = get_configuration_file(env,bwtFiles,"annFile"); + if(java_ann == NULL) return 0L; + jstring java_amb = get_configuration_file(env,bwtFiles,"ambFile"); + if(java_amb == NULL) return 0L; + jstring java_pac = get_configuration_file(env,bwtFiles,"pacFile"); + if(java_pac == NULL) return 0L; + jstring java_forward_bwt = get_configuration_file(env,bwtFiles,"forwardBWTFile"); + if(java_forward_bwt == NULL) return 0L; + jstring java_forward_sa = get_configuration_file(env,bwtFiles,"forwardSAFile"); + if(java_forward_sa == NULL) return 0L; + jstring java_reverse_bwt = get_configuration_file(env,bwtFiles,"reverseBWTFile"); + if(java_reverse_bwt == NULL) return 0L; + jstring java_reverse_sa = get_configuration_file(env,bwtFiles,"reverseSAFile"); + if(java_reverse_sa == NULL) return 0L; + + const char* ann_filename = env->GetStringUTFChars(java_ann,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* amb_filename = env->GetStringUTFChars(java_amb,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* pac_filename = env->GetStringUTFChars(java_pac,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* forward_bwt_filename = env->GetStringUTFChars(java_forward_bwt,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* forward_sa_filename = env->GetStringUTFChars(java_forward_sa,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* reverse_bwt_filename = env->GetStringUTFChars(java_reverse_bwt,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* reverse_sa_filename = env->GetStringUTFChars(java_reverse_sa,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + + BWA* bwa = new BWA(ann_filename, + amb_filename, + pac_filename, + forward_bwt_filename, + forward_sa_filename, + reverse_bwt_filename, + reverse_sa_filename); + + Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(env,instance,(jlong)bwa,configuration); + if(env->ExceptionCheck()) return 0L; + + env->ReleaseStringUTFChars(java_ann,ann_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_amb,amb_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_pac,pac_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_forward_bwt,forward_bwt_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_forward_sa,forward_sa_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_reverse_bwt,reverse_bwt_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_reverse_sa,reverse_sa_filename); + if(env->ExceptionCheck()) return 0L; + + return (jlong)bwa; +} + +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_destroy(JNIEnv* env, jobject instance, jlong java_bwa) +{ + BWA* bwa = (BWA*)java_bwa; + delete bwa; +} + +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(JNIEnv *env, jobject instance, jlong java_bwa, jobject configuration) { + BWA* bwa = (BWA*)java_bwa; + set_float_configuration_param(env, configuration, "maximumEditDistance", bwa, &BWA::set_max_edit_distance); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "maximumGapOpens", bwa, &BWA::set_max_gap_opens); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "maximumGapExtensions", bwa, &BWA::set_max_gap_extensions); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "disallowIndelWithinRange", bwa, &BWA::set_disallow_indel_within_range); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "mismatchPenalty", bwa, &BWA::set_mismatch_penalty); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "gapOpenPenalty", bwa, &BWA::set_gap_open_penalty); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty); + if(env->ExceptionCheck()) return; +} + +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) +{ + BWA* bwa = (BWA*)java_bwa; + + const jsize read_length = env->GetArrayLength(java_bases); + if(env->ExceptionCheck()) return NULL; + + jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); + if(read_bases == NULL) return NULL; + + bwt_aln1_t* paths = NULL; + unsigned num_paths = 0; + + unsigned best_path_count, second_best_path_count; + bwa->find_paths((const char*)read_bases,read_length,paths,num_paths,best_path_count,second_best_path_count); + + jobjectArray java_paths = env->NewObjectArray(num_paths, env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath"), NULL); + if(java_paths == NULL) return NULL; + + for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) { + bwt_aln1_t& path = *(paths + path_idx); + + jclass java_path_class = env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath"); + if(java_path_class == NULL) return NULL; + + jmethodID java_path_constructor = env->GetMethodID(java_path_class, "", "(IIIZJJIII)V"); + if(java_path_constructor == NULL) return NULL; + + // Note that k/l are being cast to long. Bad things will happen if JNI assumes that they're ints. + jobject java_path = env->NewObject(java_path_class, + java_path_constructor, + path.n_mm, + path.n_gapo, + path.n_gape, + path.a, + (jlong)path.k, + (jlong)path.l, + path.score, + best_path_count, + second_best_path_count); + if(java_path == NULL) return NULL; + + env->SetObjectArrayElement(java_paths,path_idx,java_path); + if(env->ExceptionCheck()) return NULL; + + env->DeleteLocalRef(java_path_class); + if(env->ExceptionCheck()) return NULL; + } + + delete[] paths; + + env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE); + + return env->ExceptionCheck() ? NULL : java_paths; +} + +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases, jobjectArray java_paths) +{ + BWA* bwa = (BWA*)java_bwa; + + const jsize read_length = env->GetArrayLength(java_bases); + if(env->ExceptionCheck()) return NULL; + + jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); + if(read_bases == NULL) return NULL; + + const jsize num_paths = env->GetArrayLength(java_paths); + bwt_aln1_t* paths = new bwt_aln1_t[num_paths]; + unsigned best_count = 0, second_best_count = 0; + + for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) { + jobject java_path = env->GetObjectArrayElement(java_paths,path_idx); + jclass java_path_class = env->GetObjectClass(java_path); + if(java_path_class == NULL) return NULL; + + bwt_aln1_t& path = *(paths + path_idx); + + jfieldID mismatches_field = env->GetFieldID(java_path_class, "numMismatches", "I"); + if(mismatches_field == NULL) return NULL; + path.n_mm = env->GetIntField(java_path,mismatches_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID gap_opens_field = env->GetFieldID(java_path_class, "numGapOpens", "I"); + if(gap_opens_field == NULL) return NULL; + path.n_gapo = env->GetIntField(java_path,gap_opens_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID gap_extensions_field = env->GetFieldID(java_path_class, "numGapExtensions", "I"); + if(gap_extensions_field == NULL) return NULL; + path.n_gape = env->GetIntField(java_path,gap_extensions_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID negative_strand_field = env->GetFieldID(java_path_class, "negativeStrand", "Z"); + if(negative_strand_field == NULL) return NULL; + path.a = env->GetBooleanField(java_path,negative_strand_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID k_field = env->GetFieldID(java_path_class, "k", "J"); + if(k_field == NULL) return NULL; + path.k = env->GetLongField(java_path,k_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID l_field = env->GetFieldID(java_path_class, "l", "J"); + if(l_field == NULL) return NULL; + path.l = env->GetLongField(java_path,l_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID score_field = env->GetFieldID(java_path_class, "score", "I"); + if(score_field == NULL) return NULL; + path.score = env->GetIntField(java_path,score_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID best_count_field = env->GetFieldID(java_path_class, "bestCount", "I"); + if(best_count_field == NULL) return NULL; + best_count = env->GetIntField(java_path,best_count_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID second_best_count_field = env->GetFieldID(java_path_class, "secondBestCount", "I"); + if(second_best_count_field == NULL) return NULL; + second_best_count = env->GetIntField(java_path,second_best_count_field); + if(env->ExceptionCheck()) return NULL; + } + + Alignment* alignments = NULL; + unsigned num_alignments = 0; + bwa->generate_alignments_from_paths((const char*)read_bases,read_length,paths,num_paths,best_count,second_best_count,alignments,num_alignments); + + jobjectArray java_alignments = env->NewObjectArray(num_alignments, env->FindClass("org/broadinstitute/sting/alignment/Alignment"), NULL); + if(java_alignments == NULL) return NULL; + + for(unsigned alignment_idx = 0; alignment_idx < (unsigned)num_alignments; alignment_idx++) { + Alignment& alignment = *(alignments + alignment_idx); + jobject java_alignment = convert_to_java_alignment(env,read_bases,read_length,alignment); + if(java_alignment == NULL) return NULL; + env->SetObjectArrayElement(java_alignments,alignment_idx,java_alignment); + if(env->ExceptionCheck()) return NULL; + } + + delete[] alignments; + delete[] paths; + + env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE); + + return env->ExceptionCheck() ? NULL : java_alignments; +} + +JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) { + BWA* bwa = (BWA*)java_bwa; + + const jsize read_length = env->GetArrayLength(java_bases); + if(env->ExceptionCheck()) return NULL; + + jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); + if(read_bases == NULL) return NULL; + + Alignment* best_alignment = bwa->generate_single_alignment((const char*)read_bases,read_length); + jobject java_best_alignment = (best_alignment != NULL) ? convert_to_java_alignment(env,read_bases,read_length,*best_alignment) : NULL; + delete best_alignment; + + env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE); + + return java_best_alignment; +} + +static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment) { + unsigned cigar_length; + if(alignment.type == BWA_TYPE_NO_MATCH) cigar_length = 0; + else if(!alignment.cigar) cigar_length = 1; + else cigar_length = alignment.n_cigar; + + jcharArray java_cigar_operators = env->NewCharArray(cigar_length); + if(java_cigar_operators == NULL) return NULL; + jintArray java_cigar_lengths = env->NewIntArray(cigar_length); + if(java_cigar_lengths == NULL) return NULL; + + if(alignment.cigar) { + for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) { + jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14]; + jint cigar_length = alignment.cigar[cigar_idx]&0x3fff; + + env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator); + if(env->ExceptionCheck()) return NULL; + env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length); + if(env->ExceptionCheck()) return NULL; + } + } + else { + if(alignment.type != BWA_TYPE_NO_MATCH) { + jchar cigar_operator = 'M'; + env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator); + if(env->ExceptionCheck()) return NULL; + env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length); + if(env->ExceptionCheck()) return NULL; + } + } + delete[] alignment.cigar; + + jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment"); + if(java_alignment_class == NULL) return NULL; + + jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "", "(IIZI[C[IILjava/lang/String;IIIII)V"); + if(java_alignment_constructor == NULL) return NULL; + + jstring java_md = env->NewStringUTF(alignment.md); + if(java_md == NULL) return NULL; + delete[] alignment.md; + + jobject java_alignment = env->NewObject(java_alignment_class, + java_alignment_constructor, + alignment.contig, + alignment.pos, + alignment.negative_strand, + alignment.mapping_quality, + java_cigar_operators, + java_cigar_lengths, + alignment.edit_distance, + java_md, + alignment.num_mismatches, + alignment.num_gap_opens, + alignment.num_gap_extensions, + alignment.num_best, + alignment.num_second_best); + if(java_alignment == NULL) return NULL; + + env->DeleteLocalRef(java_alignment_class); + if(env->ExceptionCheck()) return NULL; + + return java_alignment; +} + +static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name) { + jclass configuration_class = env->GetObjectClass(configuration); + if(configuration_class == NULL) return NULL; + + jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/io/File;"); + if(configuration_field == NULL) return NULL; + + jobject configuration_file = (jobject)env->GetObjectField(configuration,configuration_field); + + jclass file_class = env->FindClass("java/io/File"); + if(file_class == NULL) return NULL; + + jmethodID path_extractor = env->GetMethodID(file_class,"getAbsolutePath", "()Ljava/lang/String;"); + if(path_extractor == NULL) return NULL; + + jstring path = (jstring)env->CallObjectMethod(configuration_file,path_extractor); + if(path == NULL) return NULL; + + env->DeleteLocalRef(configuration_class); + env->DeleteLocalRef(file_class); + env->DeleteLocalRef(configuration_file); + + return path; +} + +static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) { + jclass configuration_class = env->GetObjectClass(configuration); + if(configuration_class == NULL) return; + + jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Integer;"); + if(configuration_field == NULL) return; + + jobject boxed_value = env->GetObjectField(configuration,configuration_field); + if(env->ExceptionCheck()) return; + + if(boxed_value != NULL) { + jclass int_box_class = env->FindClass("java/lang/Integer"); + if(int_box_class == NULL) return; + + jmethodID int_extractor = env->GetMethodID(int_box_class,"intValue", "()I"); + if(int_extractor == NULL) return; + + jint value = env->CallIntMethod(boxed_value,int_extractor); + if(env->ExceptionCheck()) return; + + if(value < 0) + { + throw_config_value_exception(env,field_name,"cannot be set to a negative value"); + return; + } + + (bwa->*setter)(value); + + env->DeleteLocalRef(int_box_class); + } + + env->DeleteLocalRef(boxed_value); + env->DeleteLocalRef(configuration_class); +} + +static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter) +{ + jclass configuration_class = env->GetObjectClass(configuration); + if(configuration_class == NULL) return; + + jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Float;"); + if(configuration_field == NULL) return; + + jobject boxed_value = env->GetObjectField(configuration,configuration_field); + if(boxed_value != NULL) { + jclass float_box_class = env->FindClass("java/lang/Float"); + if(float_box_class == NULL) return; + + jmethodID float_extractor = env->GetMethodID(float_box_class,"floatValue", "()F"); + if(float_extractor == NULL) return; + + jfloat value = env->CallFloatMethod(boxed_value,float_extractor); + if(env->ExceptionCheck()) return; + + if(value < 0) + { + throw_config_value_exception(env,field_name,"cannot be set to a negative value"); + return; + } + + (bwa->*setter)(value); + + env->DeleteLocalRef(float_box_class); + } + + env->DeleteLocalRef(boxed_value); + env->DeleteLocalRef(configuration_class); +} + +static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message) +{ + char* buffer = new char[strlen(field_name)+1+strlen(message)+1]; + sprintf(buffer,"%s %s",field_name,message); + jclass sting_exception_class = env->FindClass("org/broadinstitute/sting/utils/StingException"); + if(sting_exception_class == NULL) return; + env->ThrowNew(sting_exception_class, buffer); + delete[] buffer; +} diff --git a/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h new file mode 100644 index 000000000..0c44e430a --- /dev/null +++ b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h @@ -0,0 +1,61 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_broadinstitute_sting_alignment_bwa_c_BWACAligner */ + +#ifndef _Included_org_broadinstitute_sting_alignment_bwa_c_BWACAligner +#define _Included_org_broadinstitute_sting_alignment_bwa_c_BWACAligner +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: create + * Signature: (Lorg/broadinstitute/sting/alignment/bwa/BWTFiles;Lorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)J + */ +JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create + (JNIEnv *, jobject, jobject, jobject); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: updateConfiguration + * Signature: (JLorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration + (JNIEnv *, jobject, jlong, jobject); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: destroy + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_destroy + (JNIEnv *, jobject, jlong); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: getPaths + * Signature: (J[B)[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath; + */ +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths + (JNIEnv *, jobject, jlong, jbyteArray); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: convertPathsToAlignments + * Signature: (J[B[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath;)[Lorg/broadinstitute/sting/alignment/Alignment; + */ +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments + (JNIEnv *, jobject, jlong, jbyteArray, jobjectArray); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: getBestAlignment + * Signature: (J[B)Lorg/broadinstitute/sting/alignment/Alignment; + */ +JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment + (JNIEnv *, jobject, jlong, jbyteArray); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/public/c/libenvironhack/Makefile b/public/c/libenvironhack/Makefile new file mode 100644 index 000000000..302ff8e31 --- /dev/null +++ b/public/c/libenvironhack/Makefile @@ -0,0 +1,10 @@ +CC=gcc +CCFLAGS=-Wall -dynamiclib -arch i386 -arch x86_64 + +libenvironhack.dylib: libenvironhack.c + $(CC) $(CCFLAGS) -init _init_environ $< -o $@ + +all: libenvironhack.dylib + +clean: + rm -f libenvironhack.dylib diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java b/public/c/libenvironhack/libenvironhack.c old mode 100755 new mode 100644 similarity index 52% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java rename to public/c/libenvironhack/libenvironhack.c index 7e1d86105..8b2a2640e --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java +++ b/public/c/libenvironhack/libenvironhack.c @@ -22,25 +22,16 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.walkers.variantutils; +/* +LSF 7.0.6 on the mac is missing the unsatisfied exported symbol for environ which was removed on MacOS X 10.5+. +nm $LSF_LIBDIR/liblsf.dylib | grep environ +See "man environ" for more info, along with http://lists.apple.com/archives/java-dev/2007/Dec/msg00096.html +*/ -import org.broadinstitute.sting.WalkerTest; -import org.testng.annotations.Test; +#include -import java.io.File; -import java.util.Arrays; +char **environ = (char **)0; -public class BatchMergeIntegrationTest extends WalkerTest { - @Test - public void testBatchMerge1() { - String bam = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam"; - String alleles = validationDataLocation + "batch.merge.alleles.vcf"; - WalkerTestSpec spec = new WalkerTestSpec( - "-T UnifiedGenotyper -NO_HEADER -BTI alleles -stand_call_conf 0.0 -glm BOTH -G none -nsl -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -o %s -R " + b37KGReference - + " -B:alleles,VCF " + alleles - + " -I " + bam, - 1, - Arrays.asList("f4ed8f4ef2cba96823c06e90e9d0de35")); - executeTest("testBatchMerge UG genotype given alleles:" + new File(bam).getName() + " with " + new File(alleles).getName(), spec); - } -} \ No newline at end of file +void init_environ(void) { + environ = (*_NSGetEnviron()); +} diff --git a/public/c/libenvironhack/libenvironhack.dylib b/public/c/libenvironhack/libenvironhack.dylib new file mode 100755 index 000000000..a45e038b4 Binary files /dev/null and b/public/c/libenvironhack/libenvironhack.dylib differ diff --git a/public/java/src/org/broadinstitute/sting/alignment/Aligner.java b/public/java/src/org/broadinstitute/sting/alignment/Aligner.java new file mode 100644 index 000000000..4bf05cb75 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/Aligner.java @@ -0,0 +1,49 @@ +package org.broadinstitute.sting.alignment; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; + +/** + * Create perfect alignments from the read to the genome represented by the given BWT / suffix array. + * + * @author mhanna + * @version 0.1 + */ +public interface Aligner { + /** + * Close this instance of the BWA pointer and delete its resources. + */ + public void close(); + + /** + * Allow the aligner to choose one alignment randomly from the pile of best alignments. + * @param bases Bases to align. + * @return An align + */ + public Alignment getBestAlignment(final byte[] bases); + + /** + * Align the read to the reference. + * @param read Read to align. + * @param header Optional header to drop in place. + * @return A list of the alignments. + */ + public SAMRecord align(final SAMRecord read, final SAMFileHeader header); + + /** + * Get a iterator of alignments, batched by mapping quality. + * @param bases List of bases. + * @return Iterator to alignments. + */ + public Iterable getAllAlignments(final byte[] bases); + + /** + * Get a iterator of aligned reads, batched by mapping quality. + * @param read Read to align. + * @param newHeader Optional new header to use when aligning the read. If present, it must be null. + * @return Iterator to alignments. + */ + public Iterable alignAll(final SAMRecord read, final SAMFileHeader newHeader); +} + + diff --git a/public/java/src/org/broadinstitute/sting/alignment/Alignment.java b/public/java/src/org/broadinstitute/sting/alignment/Alignment.java new file mode 100644 index 000000000..c63f5615f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/Alignment.java @@ -0,0 +1,221 @@ +package org.broadinstitute.sting.alignment; + +import net.sf.samtools.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +/** + * Represents an alignment of a read to a site in the reference genome. + * + * @author mhanna + * @version 0.1 + */ +public class Alignment { + protected int contigIndex; + protected long alignmentStart; + protected boolean negativeStrand; + protected int mappingQuality; + + protected char[] cigarOperators; + protected int[] cigarLengths; + + protected int editDistance; + protected String mismatchingPositions; + + protected int numMismatches; + protected int numGapOpens; + protected int numGapExtensions; + protected int bestCount; + protected int secondBestCount; + + /** + * Gets the index of the given contig. + * @return the inde + */ + public int getContigIndex() { return contigIndex; } + + /** + * Gets the starting position for the given alignment. + * @return Starting position. + */ + public long getAlignmentStart() { return alignmentStart; } + + /** + * Is the given alignment on the reverse strand? + * @return True if the alignment is on the reverse strand. + */ + public boolean isNegativeStrand() { return negativeStrand; } + + /** + * Gets the score of this alignment. + * @return The score. + */ + public int getMappingQuality() { return mappingQuality; } + + /** + * Gets the edit distance; will eventually end up in the NM SAM tag + * if this alignment makes it that far. + * @return The edit distance. + */ + public int getEditDistance() { return editDistance; } + + /** + * A string representation of which positions mismatch; contents of MD tag. + * @return String representation of mismatching positions. + */ + public String getMismatchingPositions() { return mismatchingPositions; } + + /** + * Gets the number of mismatches in the read. + * @return Number of mismatches. + */ + public int getNumMismatches() { return numMismatches; } + + /** + * Get the number of gap opens. + * @return Number of gap opens. + */ + public int getNumGapOpens() { return numGapOpens; } + + /** + * Get the number of gap extensions. + * @return Number of gap extensions. + */ + public int getNumGapExtensions() { return numGapExtensions; } + + /** + * Get the number of best alignments. + * @return Number of top scoring alignments. + */ + public int getBestCount() { return bestCount; } + + /** + * Get the number of second best alignments. + * @return Number of second best scoring alignments. + */ + public int getSecondBestCount() { return secondBestCount; } + + /** + * Gets the cigar for this alignment. + * @return sam-jdk formatted alignment. + */ + public Cigar getCigar() { + Cigar cigar = new Cigar(); + for(int i = 0; i < cigarOperators.length; i++) { + CigarOperator operator = CigarOperator.characterToEnum(cigarOperators[i]); + cigar.add(new CigarElement(cigarLengths[i],operator)); + } + return cigar; + } + + /** + * Temporarily implement getCigarString() for debugging; the TextCigarCodec is unfortunately + * package-protected. + * @return + */ + public String getCigarString() { + Cigar cigar = getCigar(); + if(cigar.isEmpty()) return "*"; + + StringBuilder cigarString = new StringBuilder(); + for(CigarElement element: cigar.getCigarElements()) { + cigarString.append(element.getLength()); + cigarString.append(element.getOperator()); + } + return cigarString.toString(); + } + + /** + * Stub for inheritance. + */ + public Alignment() {} + + /** + * Create a new alignment object. + * @param contigIndex The contig to which this read aligned. + * @param alignmentStart The point within the contig to which this read aligned. + * @param negativeStrand Forward or reverse alignment of the given read. + * @param mappingQuality How good does BWA think this mapping is? + * @param cigarOperators The ordered operators in the cigar string. + * @param cigarLengths The lengths to which each operator applies. + * @param editDistance The edit distance (cumulative) of the read. + * @param mismatchingPositions String representation of which bases in the read mismatch. + * @param numMismatches Number of total mismatches in the read. + * @param numGapOpens Number of gap opens in the read. + * @param numGapExtensions Number of gap extensions in the read. + * @param bestCount Number of best alignments in the read. + * @param secondBestCount Number of second best alignments in the read. + */ + public Alignment(int contigIndex, + int alignmentStart, + boolean negativeStrand, + int mappingQuality, + char[] cigarOperators, + int[] cigarLengths, + int editDistance, + String mismatchingPositions, + int numMismatches, + int numGapOpens, + int numGapExtensions, + int bestCount, + int secondBestCount) { + this.contigIndex = contigIndex; + this.alignmentStart = alignmentStart; + this.negativeStrand = negativeStrand; + this.mappingQuality = mappingQuality; + this.cigarOperators = cigarOperators; + this.cigarLengths = cigarLengths; + this.editDistance = editDistance; + this.mismatchingPositions = mismatchingPositions; + this.numMismatches = numMismatches; + this.numGapOpens = numGapOpens; + this.numGapExtensions = numGapExtensions; + this.bestCount = bestCount; + this.secondBestCount = secondBestCount; + } + + /** + * Creates a read directly from an alignment. + * @param alignment The alignment to convert to a read. + * @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags. + * @param newSAMHeader The new SAM header to use in creating this read. Can be null, but if so, the sequence + * dictionary in the + * @return A mapped alignment. + */ + public static SAMRecord convertToRead(Alignment alignment, SAMRecord unmappedRead, SAMFileHeader newSAMHeader) { + SAMRecord read; + try { + read = (SAMRecord)unmappedRead.clone(); + } + catch(CloneNotSupportedException ex) { + throw new ReviewedStingException("Unable to create aligned read from template."); + } + + if(newSAMHeader != null) + read.setHeader(newSAMHeader); + + // If we're realigning a previously aligned record, strip out the placement of the alignment. + read.setReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME); + read.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START); + read.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME); + read.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START); + + if(alignment != null) { + read.setReadUnmappedFlag(false); + read.setReferenceIndex(alignment.getContigIndex()); + read.setAlignmentStart((int)alignment.getAlignmentStart()); + read.setReadNegativeStrandFlag(alignment.isNegativeStrand()); + read.setMappingQuality(alignment.getMappingQuality()); + read.setCigar(alignment.getCigar()); + if(alignment.isNegativeStrand()) { + read.setReadBases(BaseUtils.simpleReverseComplement(read.getReadBases())); + read.setBaseQualities(Utils.reverse(read.getBaseQualities())); + } + read.setAttribute("NM",alignment.getEditDistance()); + read.setAttribute("MD",alignment.getMismatchingPositions()); + } + + return read; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java new file mode 100644 index 000000000..c6755e878 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Iterator; + +/** + * Validates consistency of the aligner interface by taking reads already aligned by BWA in a BAM file, stripping them + * of their alignment data, realigning them, and making sure one of the best resulting realignments matches the original + * alignment from the input file. + * + * @author mhanna + * @version 0.1 + */ +public class AlignmentValidationWalker extends ReadWalker { + /** + * The supporting BWT index generated using BWT. + */ + @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false) + private String prefix = null; + + /** + * The instance used to generate alignments. + */ + private BWACAligner aligner = null; + + /** + * Create an aligner object. The aligner object will load and hold the BWT until close() is called. + */ + @Override + public void initialize() { + if(prefix == null) + prefix = getToolkit().getArguments().referenceFile.getAbsolutePath(); + BWTFiles bwtFiles = new BWTFiles(prefix); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); + } + + /** + * Aligns a read to the given reference. + * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. + * @param read Read to align. + * @return Number of reads aligned by this map (aka 1). + */ + @Override + public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + //logger.info(String.format("examining read %s", read.getReadName())); + + byte[] bases = read.getReadBases(); + if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases); + + boolean matches = true; + Iterable alignments = aligner.getAllAlignments(bases); + Iterator alignmentIterator = alignments.iterator(); + + if(!alignmentIterator.hasNext()) { + matches = read.getReadUnmappedFlag(); + } + else { + Alignment[] alignmentsOfBestQuality = alignmentIterator.next(); + for(Alignment alignment: alignmentsOfBestQuality) { + matches = (alignment.getContigIndex() == read.getReferenceIndex()); + matches &= (alignment.getAlignmentStart() == read.getAlignmentStart()); + matches &= (alignment.isNegativeStrand() == read.getReadNegativeStrandFlag()); + matches &= (alignment.getCigar().equals(read.getCigar())); + matches &= (alignment.getMappingQuality() == read.getMappingQuality()); + if(matches) break; + } + } + + if(!matches) { + logger.error("Found mismatch!"); + logger.error(String.format("Read %s:",read.getReadName())); + logger.error(String.format(" Contig index: %d",read.getReferenceIndex())); + logger.error(String.format(" Alignment start: %d", read.getAlignmentStart())); + logger.error(String.format(" Negative strand: %b", read.getReadNegativeStrandFlag())); + logger.error(String.format(" Cigar: %s%n", read.getCigarString())); + logger.error(String.format(" Mapping quality: %s%n", read.getMappingQuality())); + for(Alignment[] alignmentsByScore: alignments) { + for(int i = 0; i < alignmentsByScore.length; i++) { + logger.error(String.format("Alignment %d:",i)); + logger.error(String.format(" Contig index: %d",alignmentsByScore[i].getContigIndex())); + logger.error(String.format(" Alignment start: %d", alignmentsByScore[i].getAlignmentStart())); + logger.error(String.format(" Negative strand: %b", alignmentsByScore[i].isNegativeStrand())); + logger.error(String.format(" Cigar: %s", alignmentsByScore[i].getCigarString())); + logger.error(String.format(" Mapping quality: %s%n", alignmentsByScore[i].getMappingQuality())); + } + } + throw new ReviewedStingException(String.format("Read %s mismatches!", read.getReadName())); + } + + return 1; + } + + /** + * Initial value for reduce. In this case, validated reads will be counted. + * @return 0, indicating no reads yet validated. + */ + @Override + public Integer reduceInit() { return 0; } + + /** + * Calculates the number of reads processed. + * @param value Number of reads processed by this map. + * @param sum Number of reads processed before this map. + * @return Number of reads processed up to and including this map. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + /** + * Cleanup. + * @param result Number of reads processed. + */ + @Override + public void onTraversalDone(Integer result) { + aligner.close(); + super.onTraversalDone(result); + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java new file mode 100644 index 000000000..7064e637f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment; + +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceDictionary; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.WalkerName; + +import java.io.File; + +/** + * Aligns reads to a given reference using Heng Li's BWA aligner, presenting the resulting alignments in SAM or BAM format. + * Mimics the steps 'bwa aln' followed by 'bwa samse' using the BWA/C implementation. + * + * @author mhanna + * @version 0.1 + */ +@WalkerName("Align") +public class AlignmentWalker extends ReadWalker { + @Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " + + "generated by bwa index -d bwtsw. If unspecified, will default " + + "to the reference specified via the -R argument.",required=false) + private File targetReferenceFile = null; + + @Output + private StingSAMFileWriter out = null; + + /** + * The actual aligner. + */ + private BWACAligner aligner = null; + + /** + * New header to use, if desired. + */ + private SAMFileHeader header; + + /** + * Create an aligner object. The aligner object will load and hold the BWT until close() is called. + */ + @Override + public void initialize() { + if(targetReferenceFile == null) + targetReferenceFile = getToolkit().getArguments().referenceFile; + BWTFiles bwtFiles = new BWTFiles(targetReferenceFile.getAbsolutePath()); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); + + // Take the header of the SAM file, tweak it by adding in the reference dictionary and specifying that the target file is unsorted. + header = getToolkit().getSAMFileHeader().clone(); + SAMSequenceDictionary referenceDictionary = + ReferenceSequenceFileFactory.getReferenceSequenceFile(targetReferenceFile).getSequenceDictionary(); + header.setSequenceDictionary(referenceDictionary); + header.setSortOrder(SAMFileHeader.SortOrder.unsorted); + + out.writeHeader(header); + } + + /** + * Aligns a read to the given reference. + * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. + * @param read Read to align. + * @return Number of alignments found for this read. + */ + @Override + public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + SAMRecord alignedRead = aligner.align(read,header); + out.addAlignment(alignedRead); + return 1; + } + + /** + * Initial value for reduce. In this case, alignments will be counted. + * @return 0, indicating no alignments yet found. + */ + @Override + public Integer reduceInit() { return 0; } + + /** + * Calculates the number of alignments found. + * @param value Number of alignments found by this map. + * @param sum Number of alignments found before this map. + * @return Number of alignments found up to and including this map. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + /** + * Cleanup. + * @param result Number of reads processed. + */ + @Override + public void onTraversalDone(Integer result) { + aligner.close(); + super.onTraversalDone(result); + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java new file mode 100644 index 000000000..57d92319f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; + +import java.io.PrintStream; +import java.util.Iterator; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * Counts the number of best alignments as presented by BWA and outputs a histogram of number of placements vs. the + * frequency of that number of placements. + * + * @author mhanna + * @version 0.1 + */ +public class CountBestAlignmentsWalker extends ReadWalker { + /** + * The supporting BWT index generated using BWT. + */ + @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false) + private String prefix = null; + + @Output + private PrintStream out = null; + + /** + * The actual aligner. + */ + private Aligner aligner = null; + + private SortedMap alignmentFrequencies = new TreeMap(); + + /** + * Create an aligner object. The aligner object will load and hold the BWT until close() is called. + */ + @Override + public void initialize() { + if(prefix == null) + prefix = getToolkit().getArguments().referenceFile.getAbsolutePath(); + BWTFiles bwtFiles = new BWTFiles(prefix); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); + } + + /** + * Aligns a read to the given reference. + * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. + * @param read Read to align. + * @return Number of alignments found for this read. + */ + @Override + public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + Iterator alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator(); + if(alignmentIterator.hasNext()) { + int numAlignments = alignmentIterator.next().length; + if(alignmentFrequencies.containsKey(numAlignments)) + alignmentFrequencies.put(numAlignments,alignmentFrequencies.get(numAlignments)+1); + else + alignmentFrequencies.put(numAlignments,1); + } + return 1; + } + + /** + * Initial value for reduce. In this case, validated reads will be counted. + * @return 0, indicating no reads yet validated. + */ + @Override + public Integer reduceInit() { return 0; } + + /** + * Calculates the number of reads processed. + * @param value Number of reads processed by this map. + * @param sum Number of reads processed before this map. + * @return Number of reads processed up to and including this map. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + /** + * Cleanup. + * @param result Number of reads processed. + */ + @Override + public void onTraversalDone(Integer result) { + aligner.close(); + for(Map.Entry alignmentFrequency: alignmentFrequencies.entrySet()) + out.printf("%d\t%d%n", alignmentFrequency.getKey(), alignmentFrequency.getValue()); + super.onTraversalDone(result); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java new file mode 100644 index 000000000..ddbf784f5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java @@ -0,0 +1,38 @@ +package org.broadinstitute.sting.alignment.bwa; + +import org.broadinstitute.sting.alignment.Aligner; + +/** + * Align reads using BWA. + * + * @author mhanna + * @version 0.1 + */ +public abstract class BWAAligner implements Aligner { + /** + * The supporting files used by BWA. + */ + protected BWTFiles bwtFiles; + + /** + * The current configuration for the BWA aligner. + */ + protected BWAConfiguration configuration; + + /** + * Create a new BWAAligner. Purpose of this call is to ensure that all BWA constructors accept the correct + * parameters. + * @param bwtFiles The many files representing BWTs persisted to disk. + * @param configuration Configuration parameters for the alignment. + */ + public BWAAligner(BWTFiles bwtFiles, BWAConfiguration configuration) { + this.bwtFiles = bwtFiles; + this.configuration = configuration; + } + + /** + * Update the configuration passed to the BWA aligner. + * @param configuration New configuration to set. + */ + public abstract void updateConfiguration(BWAConfiguration configuration); +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java new file mode 100644 index 000000000..73441cb6a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java @@ -0,0 +1,44 @@ +package org.broadinstitute.sting.alignment.bwa; + +/** + * Configuration for the BWA/C aligner. + * + * @author mhanna + * @version 0.1 + */ +public class BWAConfiguration { + /** + * The maximum edit distance used by BWA. + */ + public Float maximumEditDistance = null; + + /** + * How many gap opens are acceptable within this alignment? + */ + public Integer maximumGapOpens = null; + + /** + * How many gap extensions are acceptable within this alignment? + */ + public Integer maximumGapExtensions = null; + + /** + * Do we disallow indels within a certain range from the start / end? + */ + public Integer disallowIndelWithinRange = null; + + /** + * What is the scoring penalty for a mismatch? + */ + public Integer mismatchPenalty = null; + + /** + * What is the scoring penalty for a gap open? + */ + public Integer gapOpenPenalty = null; + + /** + * What is the scoring penalty for a gap extension? + */ + public Integer gapExtensionPenalty = null; +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java new file mode 100644 index 000000000..a0589ac84 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java @@ -0,0 +1,234 @@ +package org.broadinstitute.sting.alignment.bwa; + +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.SAMSequenceRecord; +import net.sf.samtools.util.StringUtil; +import org.broadinstitute.sting.alignment.reference.bwt.*; +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.IOException; + +/** + * Support files for BWT. + * + * @author mhanna + * @version 0.1 + */ +public class BWTFiles { + /** + * ANN (?) file name. + */ + public final File annFile; + + /** + * AMB (?) file name. + */ + public final File ambFile; + + /** + * Packed reference sequence file. + */ + public final File pacFile; + + /** + * Reverse of packed reference sequence file. + */ + public final File rpacFile; + + /** + * Forward BWT file. + */ + public final File forwardBWTFile; + + /** + * Forward suffix array file. + */ + public final File forwardSAFile; + + /** + * Reverse BWT file. + */ + public final File reverseBWTFile; + + /** + * Reverse suffix array file. + */ + public final File reverseSAFile; + + /** + * Where these files autogenerated on the fly? + */ + public final boolean autogenerated; + + /** + * Create a new BWA configuration file using the given prefix. + * @param prefix Prefix to use when creating the configuration. Must not be null. + */ + public BWTFiles(String prefix) { + if(prefix == null) + throw new ReviewedStingException("Prefix must not be null."); + annFile = new File(prefix + ".ann"); + ambFile = new File(prefix + ".amb"); + pacFile = new File(prefix + ".pac"); + rpacFile = new File(prefix + ".rpac"); + forwardBWTFile = new File(prefix + ".bwt"); + forwardSAFile = new File(prefix + ".sa"); + reverseBWTFile = new File(prefix + ".rbwt"); + reverseSAFile = new File(prefix + ".rsa"); + autogenerated = false; + } + + /** + * Hand-create a new BWTFiles object, specifying a unique file object for each type. + * @param annFile ANN (alternate dictionary) file. + * @param ambFile AMB (holes) files. + * @param pacFile Packed representation of the forward reference sequence. + * @param forwardBWTFile BWT representation of the forward reference sequence. + * @param forwardSAFile SA representation of the forward reference sequence. + * @param rpacFile Packed representation of the reversed reference sequence. + * @param reverseBWTFile BWT representation of the reversed reference sequence. + * @param reverseSAFile SA representation of the reversed reference sequence. + */ + private BWTFiles(File annFile, + File ambFile, + File pacFile, + File forwardBWTFile, + File forwardSAFile, + File rpacFile, + File reverseBWTFile, + File reverseSAFile) { + this.annFile = annFile; + this.ambFile = ambFile; + this.pacFile = pacFile; + this.forwardBWTFile = forwardBWTFile; + this.forwardSAFile = forwardSAFile; + this.rpacFile = rpacFile; + this.reverseBWTFile = reverseBWTFile; + this.reverseSAFile = reverseSAFile; + autogenerated = true; + } + + /** + * Close out this files object, in the process deleting any temporary filse + * that were created. + */ + public void close() { + if(autogenerated) { + boolean success = true; + success = annFile.delete(); + success &= ambFile.delete(); + success &= pacFile.delete(); + success &= forwardBWTFile.delete(); + success &= forwardSAFile.delete(); + success &= rpacFile.delete(); + success &= reverseBWTFile.delete(); + success &= reverseSAFile.delete(); + + if(!success) + throw new ReviewedStingException("Unable to clean up autogenerated representation"); + } + } + + /** + * Create a new set of BWT files from the given reference sequence. + * @param referenceSequence Sequence from which to build metadata. + * @return A new object representing encoded representations of each sequence. + */ + public static BWTFiles createFromReferenceSequence(byte[] referenceSequence) { + byte[] normalizedReferenceSequence = new byte[referenceSequence.length]; + System.arraycopy(referenceSequence,0,normalizedReferenceSequence,0,referenceSequence.length); + normalizeReferenceSequence(normalizedReferenceSequence); + + File annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile; + try { + // Write the ann and amb for this reference sequence. + annFile = File.createTempFile("bwt",".ann"); + ambFile = File.createTempFile("bwt",".amb"); + + SAMSequenceDictionary dictionary = new SAMSequenceDictionary(); + dictionary.addSequence(new SAMSequenceRecord("autogenerated",normalizedReferenceSequence.length)); + + ANNWriter annWriter = new ANNWriter(annFile); + annWriter.write(dictionary); + annWriter.close(); + + AMBWriter ambWriter = new AMBWriter(ambFile); + ambWriter.writeEmpty(dictionary); + ambWriter.close(); + + // Write the encoded files for the forward version of this reference sequence. + pacFile = File.createTempFile("bwt",".pac"); + bwtFile = File.createTempFile("bwt",".bwt"); + saFile = File.createTempFile("bwt",".sa"); + + writeEncodedReferenceSequence(normalizedReferenceSequence,pacFile,bwtFile,saFile); + + // Write the encoded files for the reverse version of this reference sequence. + byte[] reverseReferenceSequence = Utils.reverse(normalizedReferenceSequence); + + rpacFile = File.createTempFile("bwt",".rpac"); + rbwtFile = File.createTempFile("bwt",".rbwt"); + rsaFile = File.createTempFile("bwt",".rsa"); + + writeEncodedReferenceSequence(reverseReferenceSequence,rpacFile,rbwtFile,rsaFile); + } + catch(IOException ex) { + throw new ReviewedStingException("Unable to write autogenerated reference sequence to temporary files"); + } + + // Make sure that, at the very least, all temporary files are deleted on exit. + annFile.deleteOnExit(); + ambFile.deleteOnExit(); + pacFile.deleteOnExit(); + bwtFile.deleteOnExit(); + saFile.deleteOnExit(); + rpacFile.deleteOnExit(); + rbwtFile.deleteOnExit(); + rsaFile.deleteOnExit(); + + return new BWTFiles(annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile); + } + + /** + * Write the encoded form of the reference sequence. In the case of BWA, the encoded reference + * sequence is the reference itself in PAC format, the BWT, and the suffix array. + * @param referenceSequence The reference sequence to encode. + * @param pacFile Target for the PAC-encoded reference. + * @param bwtFile Target for the BWT representation of the reference. + * @param suffixArrayFile Target for the suffix array encoding of the reference. + * @throws java.io.IOException In case of issues writing to the file. + */ + private static void writeEncodedReferenceSequence(byte[] referenceSequence, + File pacFile, + File bwtFile, + File suffixArrayFile) throws IOException { + PackUtils.writeReferenceSequence(pacFile,referenceSequence); + + BWT bwt = BWT.createFromReferenceSequence(referenceSequence); + BWTWriter bwtWriter = new BWTWriter(bwtFile); + bwtWriter.write(bwt); + bwtWriter.close(); + + SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence); + SuffixArrayWriter suffixArrayWriter = new SuffixArrayWriter(suffixArrayFile); + suffixArrayWriter.write(suffixArray); + suffixArrayWriter.close(); + } + + /** + * Convert the given reference sequence into a form suitable for building into + * on-the-fly sequences. + * @param referenceSequence The reference sequence to normalize. + * @throws org.broadinstitute.sting.utils.exceptions.ReviewedStingException if normalized sequence cannot be generated. + */ + private static void normalizeReferenceSequence(byte[] referenceSequence) { + StringUtil.toUpperCase(referenceSequence); + for(byte base: referenceSequence) { + if(base != 'A' && base != 'C' && base != 'G' && base != 'T') + throw new ReviewedStingException(String.format("Base type %c is not supported when building references on-the-fly",(char)base)); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java new file mode 100644 index 000000000..165314259 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java @@ -0,0 +1,259 @@ +package org.broadinstitute.sting.alignment.bwa.c; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.alignment.bwa.BWAAligner; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Arrays; +import java.util.Iterator; + +/** + * An aligner using the BWA/C implementation. + * + * @author mhanna + * @version 0.1 + */ +public class BWACAligner extends BWAAligner { + static { + System.loadLibrary("bwa"); + } + + /** + * A pointer to the C++ object representing the BWA engine. + */ + private long thunkPointer = 0; + + public BWACAligner(BWTFiles bwtFiles, BWAConfiguration configuration) { + super(bwtFiles,configuration); + if(thunkPointer != 0) + throw new ReviewedStingException("BWA/C attempting to reinitialize."); + + if(!bwtFiles.annFile.exists()) throw new ReviewedStingException("ANN file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.ambFile.exists()) throw new ReviewedStingException("AMB file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.pacFile.exists()) throw new ReviewedStingException("PAC file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.forwardBWTFile.exists()) throw new ReviewedStingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.forwardSAFile.exists()) throw new ReviewedStingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.reverseBWTFile.exists()) throw new ReviewedStingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.reverseSAFile.exists()) throw new ReviewedStingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it."); + + thunkPointer = create(bwtFiles,configuration); + } + + /** + * Create an aligner object using an array of bytes as a reference. + * @param referenceSequence Reference sequence to encode ad-hoc. + * @param configuration Configuration for the given aligner. + */ + public BWACAligner(byte[] referenceSequence, BWAConfiguration configuration) { + this(BWTFiles.createFromReferenceSequence(referenceSequence),configuration); + // Now that the temporary files are created, the temporary files can be destroyed. + bwtFiles.close(); + } + + /** + * Update the configuration passed to the BWA aligner. + * @param configuration New configuration to set. + */ + @Override + public void updateConfiguration(BWAConfiguration configuration) { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C: attempting to update configuration of uninitialized aligner."); + updateConfiguration(thunkPointer,configuration); + } + + /** + * Close this instance of the BWA pointer and delete its resources. + */ + @Override + public void close() { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C close attempted, but BWA/C is not properly initialized."); + destroy(thunkPointer); + } + + /** + * Allow the aligner to choose one alignment randomly from the pile of best alignments. + * @param bases Bases to align. + * @return An align + */ + @Override + public Alignment getBestAlignment(final byte[] bases) { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C getBestAlignment attempted, but BWA/C is not properly initialized."); + return getBestAlignment(thunkPointer,bases); + } + + /** + * Get the best aligned read, chosen randomly from the pile of best alignments. + * @param read Read to align. + * @param newHeader New header to apply to this SAM file. Can be null, but if so, read header must be valid. + * @return Read with injected alignment data. + */ + @Override + public SAMRecord align(final SAMRecord read, final SAMFileHeader newHeader) { + if(bwtFiles.autogenerated) + throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable"); + return Alignment.convertToRead(getBestAlignment(read.getReadBases()),read,newHeader); + } + + /** + * Get a iterator of alignments, batched by mapping quality. + * @param bases List of bases. + * @return Iterator to alignments. + */ + @Override + public Iterable getAllAlignments(final byte[] bases) { + final BWAPath[] paths = getPaths(bases); + return new Iterable() { + public Iterator iterator() { + return new Iterator() { + /** + * The last position accessed. + */ + private int position = 0; + + /** + * Whether all alignments have been seen based on the current position. + * @return True if any more alignments are pending. False otherwise. + */ + public boolean hasNext() { return position < paths.length; } + + /** + * Return the next cross-section of alignments, based on mapping quality. + * @return Array of the next set of alignments of a given mapping quality. + */ + public Alignment[] next() { + if(position >= paths.length) + throw new UnsupportedOperationException("Out of alignments to return."); + int score = paths[position].score; + int startingPosition = position; + while(position < paths.length && paths[position].score == score) position++; + return convertPathsToAlignments(bases,Arrays.copyOfRange(paths,startingPosition,position)); + } + + /** + * Unsupported. + */ + public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); } + }; + } + }; + } + + /** + * Get a iterator of aligned reads, batched by mapping quality. + * @param read Read to align. + * @param newHeader Optional new header to use when aligning the read. If present, it must be null. + * @return Iterator to alignments. + */ + @Override + public Iterable alignAll(final SAMRecord read, final SAMFileHeader newHeader) { + if(bwtFiles.autogenerated) + throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable"); + final Iterable alignments = getAllAlignments(read.getReadBases()); + return new Iterable() { + public Iterator iterator() { + final Iterator alignmentIterator = alignments.iterator(); + return new Iterator() { + /** + * Whether all alignments have been seen based on the current position. + * @return True if any more alignments are pending. False otherwise. + */ + public boolean hasNext() { return alignmentIterator.hasNext(); } + + /** + * Return the next cross-section of alignments, based on mapping quality. + * @return Array of the next set of alignments of a given mapping quality. + */ + public SAMRecord[] next() { + Alignment[] alignmentsOfQuality = alignmentIterator.next(); + SAMRecord[] reads = new SAMRecord[alignmentsOfQuality.length]; + for(int i = 0; i < alignmentsOfQuality.length; i++) { + reads[i] = Alignment.convertToRead(alignmentsOfQuality[i],read,newHeader); + } + return reads; + } + + /** + * Unsupported. + */ + public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); } + }; + } + }; + } + + /** + * Get the paths associated with the given base string. + * @param bases List of bases. + * @return A set of paths through the BWA. + */ + public BWAPath[] getPaths(byte[] bases) { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C getPaths attempted, but BWA/C is not properly initialized."); + return getPaths(thunkPointer,bases); + } + + /** + * Create a pointer to the BWA/C thunk. + * @param files BWT source files. + * @param configuration Configuration of the aligner. + * @return Pointer to the BWA/C thunk. + */ + protected native long create(BWTFiles files, BWAConfiguration configuration); + + /** + * Update the configuration passed to the BWA aligner. For internal use only. + * @param thunkPointer pointer to BWA object. + * @param configuration New configuration to set. + */ + protected native void updateConfiguration(long thunkPointer, BWAConfiguration configuration); + + /** + * Destroy the BWA/C thunk. + * @param thunkPointer Pointer to the allocated thunk. + */ + protected native void destroy(long thunkPointer); + + /** + * Do the extra steps involved in converting a local alignment to a global alignment. + * @param bases ASCII representation of byte array. + * @param paths Paths through the current BWT. + * @return A list of alignments. + */ + protected Alignment[] convertPathsToAlignments(byte[] bases, BWAPath[] paths) { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C convertPathsToAlignments attempted, but BWA/C is not properly initialized."); + return convertPathsToAlignments(thunkPointer,bases,paths); + } + + /** + * Caller to the path generation functionality within BWA/C. Call this method's getPaths() wrapper (above) instead. + * @param thunkPointer pointer to the C++ object managing BWA/C. + * @param bases ASCII representation of byte array. + * @return A list of paths through the specified BWT. + */ + protected native BWAPath[] getPaths(long thunkPointer, byte[] bases); + + /** + * Do the extra steps involved in converting a local alignment to a global alignment. + * Call this method's convertPathsToAlignments() wrapper (above) instead. + * @param thunkPointer pointer to the C++ object managing BWA/C. + * @param bases ASCII representation of byte array. + * @param paths Paths through the current BWT. + * @return A list of alignments. + */ + protected native Alignment[] convertPathsToAlignments(long thunkPointer, byte[] bases, BWAPath[] paths); + + /** + * Gets the best alignment from BWA/C, randomly selected from all best-aligned reads. + * @param thunkPointer Pointer to BWA thunk. + * @param bases bases to align. + * @return The best alignment from BWA/C. + */ + protected native Alignment getBestAlignment(long thunkPointer, byte[] bases); +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWAPath.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWAPath.java new file mode 100755 index 000000000..347d4344f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWAPath.java @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment.bwa.c; + +/** + * Models a BWA path. + * + * @author mhanna + * @version 0.1 + */ +public class BWAPath { + /** + * Number of mismatches encountered along this path. + */ + public final int numMismatches; + + /** + * Number of gap opens encountered along this path. + */ + public final int numGapOpens; + + /** + * Number of gap extensions along this path. + */ + public final int numGapExtensions; + + /** + * Whether this alignment was found on the positive or negative strand. + */ + public final boolean negativeStrand; + + /** + * Starting coordinate in the BWT. + */ + public final long k; + + /** + * Ending coordinate in the BWT. + */ + public final long l; + + /** + * The score of this path. + */ + public final int score; + + /** + * The number of best alignments seen along this path. + */ + public final int bestCount; + + /** + * The number of second best alignments seen along this path. + */ + public final int secondBestCount; + + /** + * Create a new path with the given attributes. + * @param numMismatches Number of mismatches along path. + * @param numGapOpens Number of gap opens along path. + * @param numGapExtensions Number of gap extensions along path. + * @param k Index to first coordinate within BWT. + * @param l Index to last coordinate within BWT. + * @param score Score of this alignment. Not the mapping quality. + */ + public BWAPath(int numMismatches, int numGapOpens, int numGapExtensions, boolean negativeStrand, long k, long l, int score, int bestCount, int secondBestCount) { + this.numMismatches = numMismatches; + this.numGapOpens = numGapOpens; + this.numGapExtensions = numGapExtensions; + this.negativeStrand = negativeStrand; + this.k = k; + this.l = l; + this.score = score; + this.bestCount = bestCount; + this.secondBestCount = secondBestCount; + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java new file mode 100644 index 000000000..2d568a96a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java @@ -0,0 +1,164 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.*; +import org.broadinstitute.sting.alignment.Aligner; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileNotFoundException; + +/** + * A test harness to ensure that the perfect aligner works. + * + * @author mhanna + * @version 0.1 + */ +public class AlignerTestHarness { + public static void main( String argv[] ) throws FileNotFoundException { + if( argv.length != 6 ) { + System.out.println("PerfectAlignerTestHarness "); + System.exit(1); + } + + File referenceFile = new File(argv[0]); + File bwtFile = new File(argv[1]); + File rbwtFile = new File(argv[2]); + File suffixArrayFile = new File(argv[3]); + File reverseSuffixArrayFile = new File(argv[4]); + File bamFile = new File(argv[5]); + + align(referenceFile,bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile,bamFile); + } + + private static void align(File referenceFile, File bwtFile, File rbwtFile, File suffixArrayFile, File reverseSuffixArrayFile, File bamFile) throws FileNotFoundException { + Aligner aligner = new BWAJavaAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile); + int count = 0; + + SAMFileReader reader = new SAMFileReader(bamFile); + reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); + + int mismatches = 0; + int failures = 0; + + for(SAMRecord read: reader) { + count++; + if( count > 200000 ) break; + //if( count < 366000 ) continue; + //if( count > 2 ) break; + //if( !read.getReadName().endsWith("SL-XBC:1:82:506:404#0") ) + // continue; + //if( !read.getReadName().endsWith("SL-XBC:1:36:30:1926#0") ) + // continue; + //if( !read.getReadName().endsWith("SL-XBC:1:60:1342:1340#0") ) + // continue; + + SAMRecord alignmentCleaned = null; + try { + alignmentCleaned = (SAMRecord)read.clone(); + } + catch( CloneNotSupportedException ex ) { + throw new ReviewedStingException("SAMRecord clone not supported", ex); + } + + if( alignmentCleaned.getReadNegativeStrandFlag() ) + alignmentCleaned.setReadBases(BaseUtils.simpleReverseComplement(alignmentCleaned.getReadBases())); + + alignmentCleaned.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); + alignmentCleaned.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START); + alignmentCleaned.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY); + alignmentCleaned.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR); + + // Clear everything except flags pertaining to pairing and set 'unmapped' status to true. + alignmentCleaned.setFlags(alignmentCleaned.getFlags() & 0x00A1 | 0x000C); + + Iterable alignments = aligner.getAllAlignments(alignmentCleaned.getReadBases()); + if(!alignments.iterator().hasNext() ) { + //throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count)); + System.out.printf("Unable to align read %s to reference; count = %d%n",read.getReadName(),count); + failures++; + } + + Alignment foundAlignment = null; + for(Alignment[] alignmentsOfQuality: alignments) { + for(Alignment alignment: alignmentsOfQuality) { + if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() ) + continue; + if( read.getAlignmentStart() != alignment.getAlignmentStart() ) + continue; + + foundAlignment = alignment; + } + } + + if( foundAlignment != null ) { + //System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), foundAlignment.getAlignmentStart(), foundAlignment.getMismatches(), foundAlignment.getGapOpens(), foundAlignment.getGapExtensions()); + } + else { + System.out.printf("Error aligning read %s%n", read.getReadName()); + + mismatches++; + + IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile); + + System.out.printf("read = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(read.getReadString(),read.getCigar(),CigarOperator.DELETION), + read.getAlignmentStart(), + read.getReadNegativeStrandFlag()); + int numDeletions = numDeletionsInCigar(read.getCigar()); + String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()+numDeletions-1).getBases()); + System.out.printf("expected ref = %s%n", formatBasesBasedOnCigar(expectedRef,read.getCigar(),CigarOperator.INSERTION)); + + for(Alignment[] alignmentsOfQuality: alignments) { + for(Alignment alignment: alignmentsOfQuality) { + System.out.println(); + + Cigar cigar = ((BWAAlignment)alignment).getCigar(); + + System.out.printf("read = %s%n", formatBasesBasedOnCigar(read.getReadString(),cigar,CigarOperator.DELETION)); + + int deletionCount = ((BWAAlignment)alignment).getNumberOfBasesMatchingState(AlignmentState.DELETION); + String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignment.getAlignmentStart(),alignment.getAlignmentStart()+read.getReadLength()+deletionCount-1).getBases()); + System.out.printf("actual ref = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(alignedRef,cigar,CigarOperator.INSERTION), + alignment.getAlignmentStart(), + alignment.isNegativeStrand()); + } + } + + //throw new StingException(String.format("Read %s was placed at incorrect location; count = %d%n",read.getReadName(),count)); + } + + + if( count % 1000 == 0 ) + System.out.printf("%d reads examined.%n",count); + } + + System.out.printf("%d reads examined; %d mismatches; %d failures.%n",count,mismatches,failures); + } + + private static String formatBasesBasedOnCigar( String bases, Cigar cigar, CigarOperator toBlank ) { + StringBuilder formatted = new StringBuilder(); + int readIndex = 0; + for(CigarElement cigarElement: cigar.getCigarElements()) { + if(cigarElement.getOperator() == toBlank) { + int number = cigarElement.getLength(); + while( number-- > 0 ) formatted.append(' '); + } + else { + int number = cigarElement.getLength(); + while( number-- > 0 ) formatted.append(bases.charAt(readIndex++)); + } + } + return formatted.toString(); + } + + private static int numDeletionsInCigar( Cigar cigar ) { + int numDeletions = 0; + for(CigarElement cigarElement: cigar.getCigarElements()) { + if(cigarElement.getOperator() == CigarOperator.DELETION) + numDeletions += cigarElement.getLength(); + } + return numDeletions; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentMatchSequence.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentMatchSequence.java new file mode 100644 index 000000000..f1e3c31b6 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentMatchSequence.java @@ -0,0 +1,150 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Iterator; + +/** + * Represents a sequence of matches. + * + * @author mhanna + * @version 0.1 + */ +public class AlignmentMatchSequence implements Cloneable { + /** + * Stores the particular match entries in the order they occur. + */ + private Deque entries = new ArrayDeque(); + + /** + * Clone the given match sequence. + * @return A deep copy of the current match sequence. + */ + public AlignmentMatchSequence clone() { + AlignmentMatchSequence copy = null; + try { + copy = (AlignmentMatchSequence)super.clone(); + } + catch( CloneNotSupportedException ex ) { + throw new ReviewedStingException("Unable to clone AlignmentMatchSequence."); + } + + copy.entries = new ArrayDeque(); + for( AlignmentMatchSequenceEntry entry: entries ) + copy.entries.add(entry.clone()); + + return copy; + } + + public Cigar convertToCigar(boolean negativeStrand) { + Cigar cigar = new Cigar(); + Iterator iterator = negativeStrand ? entries.descendingIterator() : entries.iterator(); + while( iterator.hasNext() ) { + AlignmentMatchSequenceEntry entry = iterator.next(); + CigarOperator operator; + switch( entry.getAlignmentState() ) { + case MATCH_MISMATCH: operator = CigarOperator.MATCH_OR_MISMATCH; break; + case INSERTION: operator = CigarOperator.INSERTION; break; + case DELETION: operator = CigarOperator.DELETION; break; + default: throw new ReviewedStingException("convertToCigar: cannot process state: " + entry.getAlignmentState()); + } + cigar.add( new CigarElement(entry.count,operator) ); + } + return cigar; + } + + /** + * All a new alignment of the given state. + * @param state State to add to the sequence. + */ + public void addNext( AlignmentState state ) { + AlignmentMatchSequenceEntry last = entries.peekLast(); + // If the last entry is the same as this one, increment it. Otherwise, add a new entry. + if( last != null && last.alignmentState == state ) + last.increment(); + else + entries.add(new AlignmentMatchSequenceEntry(state)); + } + + /** + * Gets the current state of this alignment (what's the state of the last base?) + * @return State of the most recently aligned base. + */ + public AlignmentState getCurrentState() { + if( entries.size() == 0 ) + return AlignmentState.MATCH_MISMATCH; + return entries.peekLast().getAlignmentState(); + } + + /** + * How many bases in the read match the given state. + * @param state State to test. + * @return number of bases which match that state. + */ + public int getNumberOfBasesMatchingState(AlignmentState state) { + int matches = 0; + for( AlignmentMatchSequenceEntry entry: entries ) { + if( entry.getAlignmentState() == state ) + matches += entry.count; + } + return matches; + } + + /** + * Stores an individual match sequence entry. + */ + private class AlignmentMatchSequenceEntry implements Cloneable { + /** + * The state of the alignment throughout a given point in the sequence. + */ + private final AlignmentState alignmentState; + + /** + * The number of bases having this particular state. + */ + private int count; + + /** + * Create a new sequence entry with the given state. + * @param alignmentState The state that this sequence should contain. + */ + AlignmentMatchSequenceEntry( AlignmentState alignmentState ) { + this.alignmentState = alignmentState; + this.count = 1; + } + + /** + * Clone the given match sequence entry. + * @return A deep copy of the current match sequence entry. + */ + public AlignmentMatchSequenceEntry clone() { + try { + return (AlignmentMatchSequenceEntry)super.clone(); + } + catch( CloneNotSupportedException ex ) { + throw new ReviewedStingException("Unable to clone AlignmentMatchSequenceEntry."); + } + } + + /** + * Retrieves the current state of the alignment. + * @return The state of the current sequence. + */ + AlignmentState getAlignmentState() { + return alignmentState; + } + + /** + * Increment the count of alignments having this particular state. + */ + void increment() { + count++; + } + } +} + diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentState.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentState.java new file mode 100644 index 000000000..92c603335 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentState.java @@ -0,0 +1,13 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +/** + * The state of a given base in the alignment. + * + * @author mhanna + * @version 0.1 + */ +public enum AlignmentState { + MATCH_MISMATCH, + INSERTION, + DELETION +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java new file mode 100644 index 000000000..f3b515dba --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java @@ -0,0 +1,190 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import net.sf.samtools.Cigar; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +/** + * An alignment object to be used incrementally as the BWA aligner + * inspects the read. + * + * @author mhanna + * @version 0.1 + */ +public class BWAAlignment extends Alignment implements Cloneable { + /** + * Track the number of alignments that have been created. + */ + private static long numCreated; + + /** + * Which number alignment is this? + */ + private long creationNumber; + + /** + * The aligner performing the alignments. + */ + protected BWAJavaAligner aligner; + + /** + * The sequence of matches/mismatches/insertions/deletions. + */ + private AlignmentMatchSequence alignmentMatchSequence = new AlignmentMatchSequence(); + + /** + * Working variable. How many bases have been matched at this point. + */ + protected int position; + + /** + * Working variable. How many mismatches have been encountered at this point. + */ + private int mismatches; + + /** + * Number of gap opens in alignment. + */ + private int gapOpens; + + /** + * Number of gap extensions in alignment. + */ + private int gapExtensions; + + /** + * Working variable. The lower bound of the alignment within the BWT. + */ + protected long loBound; + + /** + * Working variable. The upper bound of the alignment within the BWT. + */ + protected long hiBound; + + protected void setAlignmentStart(long position) { + this.alignmentStart = position; + } + + protected void setNegativeStrand(boolean negativeStrand) { + this.negativeStrand = negativeStrand; + } + + /** + * Cache the score. + */ + private int score; + + public Cigar getCigar() { + return alignmentMatchSequence.convertToCigar(isNegativeStrand()); + } + + /** + * Gets the current state of this alignment (state of the last base viewed).. + * @return Current state of the alignment. + */ + public AlignmentState getCurrentState() { + return alignmentMatchSequence.getCurrentState(); + } + + /** + * Adds the given state to the current alignment. + * @param state State to add to the given alignment. + */ + public void addState( AlignmentState state ) { + alignmentMatchSequence.addNext(state); + } + + /** + * Gets the BWA score of this alignment. + * @return BWA-style scores. 0 is best. + */ + public int getScore() { + return score; + } + + public int getMismatches() { return mismatches; } + public int getGapOpens() { return gapOpens; } + public int getGapExtensions() { return gapExtensions; } + + public void incrementMismatches() { + this.mismatches++; + updateScore(); + } + + public void incrementGapOpens() { + this.gapOpens++; + updateScore(); + } + + public void incrementGapExtensions() { + this.gapExtensions++; + updateScore(); + } + + /** + * Updates the score based on new information about matches / mismatches. + */ + private void updateScore() { + score = mismatches*aligner.MISMATCH_PENALTY + gapOpens*aligner.GAP_OPEN_PENALTY + gapExtensions*aligner.GAP_EXTENSION_PENALTY; + } + + /** + * Create a new alignment with the given parent aligner. + * @param aligner Aligner being used. + */ + public BWAAlignment( BWAJavaAligner aligner ) { + this.aligner = aligner; + this.creationNumber = numCreated++; + } + + /** + * Clone the alignment. + * @return New instance of the alignment. + */ + public BWAAlignment clone() { + BWAAlignment newAlignment = null; + try { + newAlignment = (BWAAlignment)super.clone(); + } + catch( CloneNotSupportedException ex ) { + throw new ReviewedStingException("Unable to clone BWAAlignment."); + } + newAlignment.creationNumber = numCreated++; + newAlignment.alignmentMatchSequence = alignmentMatchSequence.clone(); + + return newAlignment; + } + + /** + * How many bases in the read match the given state. + * @param state State to test. + * @return number of bases which match that state. + */ + public int getNumberOfBasesMatchingState(AlignmentState state) { + return alignmentMatchSequence.getNumberOfBasesMatchingState(state); + } + + /** + * Compare this alignment to another alignment. + * @param rhs Other alignment to which to compare. + * @return < 0 if this < other, == 0 if this == other, > 0 if this > other + */ + public int compareTo(Alignment rhs) { + BWAAlignment other = (BWAAlignment)rhs; + + // If the scores are different, disambiguate using the score. + if(score != other.score) + return score > other.score ? 1 : -1; + + // Otherwise, use the order in which the elements were created. + if(creationNumber != other.creationNumber) + return creationNumber > other.creationNumber ? -1 : 1; + + return 0; + } + + public String toString() { + return String.format("position: %d, strand: %b, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d, creationNumber: %d", position, negativeStrand, alignmentMatchSequence.getCurrentState(), mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore(), creationNumber); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java new file mode 100644 index 000000000..fbeac9192 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java @@ -0,0 +1,393 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.alignment.bwa.BWAAligner; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.reference.bwt.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.PriorityQueue; + +/** + * Create imperfect alignments from the read to the genome represented by the given BWT / suffix array. + * + * @author mhanna + * @version 0.1 + */ +public class BWAJavaAligner extends BWAAligner { + /** + * BWT in the forward direction. + */ + private BWT forwardBWT; + + /** + * BWT in the reverse direction. + */ + private BWT reverseBWT; + + /** + * Suffix array in the forward direction. + */ + private SuffixArray forwardSuffixArray; + + /** + * Suffix array in the reverse direction. + */ + private SuffixArray reverseSuffixArray; + + /** + * Maximum edit distance (-n option from original BWA). + */ + private final int MAXIMUM_EDIT_DISTANCE = 4; + + /** + * Maximum number of gap opens (-o option from original BWA). + */ + private final int MAXIMUM_GAP_OPENS = 1; + + /** + * Maximum number of gap extensions (-e option from original BWA). + */ + private final int MAXIMUM_GAP_EXTENSIONS = 6; + + /** + * Penalty for straight mismatches (-M option from original BWA). + */ + public final int MISMATCH_PENALTY = 3; + + /** + * Penalty for gap opens (-O option from original BWA). + */ + public final int GAP_OPEN_PENALTY = 11; + + /** + * Penalty for gap extensions (-E option from original BWA). + */ + public final int GAP_EXTENSION_PENALTY = 4; + + /** + * Skip the ends of indels. + */ + public final int INDEL_END_SKIP = 5; + + public BWAJavaAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) { + super(null,null); + forwardBWT = new BWTReader(forwardBWTFile).read(); + reverseBWT = new BWTReader(reverseBWTFile).read(); + forwardSuffixArray = new SuffixArrayReader(forwardSuffixArrayFile,forwardBWT).read(); + reverseSuffixArray = new SuffixArrayReader(reverseSuffixArrayFile,reverseBWT).read(); + } + + /** + * Close this instance of the BWA pointer and delete its resources. + */ + @Override + public void close() { + throw new UnsupportedOperationException("BWA aligner can't currently be closed."); + } + + /** + * Update the current parameters of this aligner. + * @param configuration New configuration to set. + */ + public void updateConfiguration(BWAConfiguration configuration) { + throw new UnsupportedOperationException("Configuration of the BWA aligner can't currently be changed."); + } + + /** + * Allow the aligner to choose one alignment randomly from the pile of best alignments. + * @param bases Bases to align. + * @return An align + */ + public Alignment getBestAlignment(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + /** + * Align the read to the reference. + * @param read Read to align. + * @param header Optional header to drop in place. + * @return A list of the alignments. + */ + public SAMRecord align(final SAMRecord read, final SAMFileHeader header) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + /** + * Get a iterator of alignments, batched by mapping quality. + * @param bases List of bases. + * @return Iterator to alignments. + */ + public Iterable getAllAlignments(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + /** + * Get a iterator of aligned reads, batched by mapping quality. + * @param read Read to align. + * @param newHeader Optional new header to use when aligning the read. If present, it must be null. + * @return Iterator to alignments. + */ + public Iterable alignAll(final SAMRecord read, final SAMFileHeader newHeader) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + + public List align( SAMRecord read ) { + List successfulMatches = new ArrayList(); + + Byte[] uncomplementedBases = normalizeBases(read.getReadBases()); + Byte[] complementedBases = normalizeBases(Utils.reverse(BaseUtils.simpleReverseComplement(read.getReadBases()))); + + List forwardLowerBounds = LowerBound.create(uncomplementedBases,forwardBWT); + List reverseLowerBounds = LowerBound.create(complementedBases,reverseBWT); + + // Seed the best score with any score that won't overflow on comparison. + int bestScore = Integer.MAX_VALUE - MISMATCH_PENALTY; + int bestDiff = MAXIMUM_EDIT_DISTANCE+1; + int maxDiff = MAXIMUM_EDIT_DISTANCE; + + PriorityQueue alignments = new PriorityQueue(); + + // Create a fictional initial alignment, with the position just off the end of the read, and the limits + // set as the entire BWT. + alignments.add(createSeedAlignment(reverseBWT)); + alignments.add(createSeedAlignment(forwardBWT)); + + while(!alignments.isEmpty()) { + BWAAlignment alignment = alignments.remove(); + + // From bwtgap.c in the original BWT; if the rank is worse than the best score + the mismatch PENALTY, move on. + if( alignment.getScore() > bestScore + MISMATCH_PENALTY ) + break; + + Byte[] bases = alignment.isNegativeStrand() ? complementedBases : uncomplementedBases; + BWT bwt = alignment.isNegativeStrand() ? forwardBWT : reverseBWT; + List lowerBounds = alignment.isNegativeStrand() ? reverseLowerBounds : forwardLowerBounds; + + // if z < D(i) then return {} + int mismatches = maxDiff - alignment.getMismatches() - alignment.getGapOpens() - alignment.getGapExtensions(); + if( alignment.position < lowerBounds.size()-1 && mismatches < lowerBounds.get(alignment.position+1).value ) + continue; + + if(mismatches == 0) { + exactMatch(alignment,bases,bwt); + if(alignment.loBound > alignment.hiBound) + continue; + } + + // Found a valid alignment; store it and move on. + if(alignment.position >= read.getReadLength()-1) { + for(long bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++) { + BWAAlignment finalAlignment = alignment.clone(); + + if( finalAlignment.isNegativeStrand() ) + finalAlignment.setAlignmentStart(forwardSuffixArray.get(bwtIndex) + 1); + else { + int sizeAlongReference = read.getReadLength() - + finalAlignment.getNumberOfBasesMatchingState(AlignmentState.INSERTION) + + finalAlignment.getNumberOfBasesMatchingState(AlignmentState.DELETION); + finalAlignment.setAlignmentStart(reverseBWT.length() - reverseSuffixArray.get(bwtIndex) - sizeAlongReference + 1); + } + + successfulMatches.add(finalAlignment); + + bestScore = Math.min(finalAlignment.getScore(),bestScore); + bestDiff = Math.min(finalAlignment.getMismatches()+finalAlignment.getGapOpens()+finalAlignment.getGapExtensions(),bestDiff); + maxDiff = bestDiff + 1; + } + + continue; + } + + //System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d, base = %s%n", alignments.size(), alignment, lowerBounds.get(alignment.position+1).value, alignment.position >= 0 ? (char)bases[alignment.position].byteValue() : ""); + /* + System.out.printf("#1\t[%d,%d,%d,%c]\t[%d,%d,%d]\t[%d,%d]\t[%d,%d]%n",alignments.size(), + alignment.negativeStrand?1:0, + bases.length-alignment.position-1, + alignment.getCurrentState().toString().charAt(0), + alignment.getMismatches(), + alignment.getGapOpens(), + alignment.getGapExtensions(), + lowerBounds.get(alignment.position+1).value, + lowerBounds.get(alignment.position+1).width, + alignment.loBound, + alignment.hiBound); + */ + + // Temporary -- look ahead to see if the next alignment is bounded. + boolean allowDifferences = mismatches > 0; + boolean allowMismatches = mismatches > 0; + + if( allowDifferences && + alignment.position+1 >= INDEL_END_SKIP-1+alignment.getGapOpens()+alignment.getGapExtensions() && + read.getReadLength()-1-(alignment.position+1) >= INDEL_END_SKIP+alignment.getGapOpens()+alignment.getGapExtensions() ) { + if( alignment.getCurrentState() == AlignmentState.MATCH_MISMATCH ) { + if( alignment.getGapOpens() < MAXIMUM_GAP_OPENS ) { + // Add a potential insertion extension. + BWAAlignment insertionAlignment = createInsertionAlignment(alignment); + insertionAlignment.incrementGapOpens(); + alignments.add(insertionAlignment); + + // Add a potential deletion by marking a deletion and augmenting the position. + List deletionAlignments = createDeletionAlignments(bwt,alignment); + for( BWAAlignment deletionAlignment: deletionAlignments ) + deletionAlignment.incrementGapOpens(); + alignments.addAll(deletionAlignments); + } + } + else if( alignment.getCurrentState() == AlignmentState.INSERTION ) { + if( alignment.getGapExtensions() < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) { + // Add a potential insertion extension. + BWAAlignment insertionAlignment = createInsertionAlignment(alignment); + insertionAlignment.incrementGapExtensions(); + alignments.add(insertionAlignment); + } + } + else if( alignment.getCurrentState() == AlignmentState.DELETION ) { + if( alignment.getGapExtensions() < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) { + // Add a potential deletion by marking a deletion and augmenting the position. + List deletionAlignments = createDeletionAlignments(bwt,alignment); + for( BWAAlignment deletionAlignment: deletionAlignments ) + deletionAlignment.incrementGapExtensions(); + alignments.addAll(deletionAlignments); + } + } + } + + // Mismatches + alignments.addAll(createMatchedAlignments(bwt,alignment,bases,allowDifferences&&allowMismatches)); + } + + return successfulMatches; + } + + /** + * Create an seeding alignment to use as a starting point when traversing. + * @param bwt source BWT. + * @return Seed alignment. + */ + private BWAAlignment createSeedAlignment(BWT bwt) { + BWAAlignment seed = new BWAAlignment(this); + seed.setNegativeStrand(bwt == forwardBWT); + seed.position = -1; + seed.loBound = 0; + seed.hiBound = bwt.length(); + return seed; + } + + /** + * Creates a new alignments representing direct matches / mismatches. + * @param bwt Source BWT with which to work. + * @param alignment Alignment for the previous position. + * @param bases The bases in the read. + * @param allowMismatch Should mismatching bases be allowed? + * @return New alignment representing this position if valid; null otherwise. + */ + private List createMatchedAlignments( BWT bwt, BWAAlignment alignment, Byte[] bases, boolean allowMismatch ) { + List newAlignments = new ArrayList(); + + List baseChoices = new ArrayList(); + Byte thisBase = bases[alignment.position+1]; + + if( allowMismatch ) + baseChoices.addAll(Bases.allOf()); + else + baseChoices.add(thisBase); + + if( thisBase != null ) { + // Keep rotating the current base to the last position until we've hit the current base. + for( ;; ) { + baseChoices.add(baseChoices.remove(0)); + if( thisBase.equals(baseChoices.get(baseChoices.size()-1)) ) + break; + + } + } + + for(byte base: baseChoices) { + BWAAlignment newAlignment = alignment.clone(); + + newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; + newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); + + // If this alignment is valid, skip it. + if( newAlignment.loBound > newAlignment.hiBound ) + continue; + + newAlignment.position++; + newAlignment.addState(AlignmentState.MATCH_MISMATCH); + if( bases[newAlignment.position] == null || base != bases[newAlignment.position] ) + newAlignment.incrementMismatches(); + + newAlignments.add(newAlignment); + } + + return newAlignments; + } + + /** + * Create a new alignment representing an insertion at this point in the read. + * @param alignment Alignment from which to derive the insertion. + * @return New alignment reflecting the insertion. + */ + private BWAAlignment createInsertionAlignment( BWAAlignment alignment ) { + // Add a potential insertion extension. + BWAAlignment newAlignment = alignment.clone(); + newAlignment.position++; + newAlignment.addState(AlignmentState.INSERTION); + return newAlignment; + } + + /** + * Create new alignments representing a deletion at this point in the read. + * @param bwt source BWT for inferring deletion info. + * @param alignment Alignment from which to derive the deletion. + * @return New alignments reflecting all possible deletions. + */ + private List createDeletionAlignments( BWT bwt, BWAAlignment alignment) { + List newAlignments = new ArrayList(); + for(byte base: Bases.instance) { + BWAAlignment newAlignment = alignment.clone(); + + newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; + newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); + + // If this alignment is valid, skip it. + if( newAlignment.loBound > newAlignment.hiBound ) + continue; + + newAlignment.addState(AlignmentState.DELETION); + + newAlignments.add(newAlignment); + } + + return newAlignments; + } + + /** + * Exactly match the given alignment against the given BWT. + * @param alignment Alignment to match. + * @param bases Bases to use. + * @param bwt BWT to use. + */ + private void exactMatch( BWAAlignment alignment, Byte[] bases, BWT bwt ) { + while( ++alignment.position < bases.length ) { + byte base = bases[alignment.position]; + alignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; + alignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); + if( alignment.loBound > alignment.hiBound ) + return; + } + } + + /** + * Make each base into A/C/G/T or null if unknown. + * @param bases Base string to normalize. + * @return Array of normalized bases. + */ + private Byte[] normalizeBases( byte[] bases ) { + Byte[] normalBases = new Byte[bases.length]; + for(int i = 0; i < bases.length; i++) + normalBases[i] = Bases.fromASCII(bases[i]); + return normalBases; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/LowerBound.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/LowerBound.java new file mode 100644 index 000000000..be7514255 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/LowerBound.java @@ -0,0 +1,88 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import org.broadinstitute.sting.alignment.reference.bwt.BWT; + +import java.util.ArrayList; +import java.util.List; + +/** + * At any point along the given read, what is a good lower bound for the + * total number of differences? + * + * @author mhanna + * @version 0.1 + */ +public class LowerBound { + /** + * Lower bound of the suffix array. + */ + public final long loIndex; + + /** + * Upper bound of the suffix array. + */ + public final long hiIndex; + + /** + * Width of the bwt from loIndex -> hiIndex, inclusive. + */ + public final long width; + + /** + * The lower bound at the given point. + */ + public final int value; + + /** + * Create a new lower bound with the given value. + * @param loIndex The lower bound of the BWT. + * @param hiIndex The upper bound of the BWT. + * @param value Value for the lower bound at this site. + */ + private LowerBound(long loIndex, long hiIndex, int value) { + this.loIndex = loIndex; + this.hiIndex = hiIndex; + this.width = hiIndex - loIndex + 1; + this.value = value; + } + + /** + * Create a non-optimal bound according to the algorithm specified in Figure 3 of the BWA paper. + * @param bases Bases of the read to use when creating a new BWT. + * @param bwt BWT to check against. + * @return A list of lower bounds at every point in the reference. + * + */ + public static List create(Byte[] bases, BWT bwt) { + List bounds = new ArrayList(); + + long loIndex = 0, hiIndex = bwt.length(); + int mismatches = 0; + for( int i = bases.length-1; i >= 0; i-- ) { + Byte base = bases[i]; + + // Ignore non-ACGT bases. + if( base != null ) { + loIndex = bwt.counts(base) + bwt.occurrences(base,loIndex-1) + 1; + hiIndex = bwt.counts(base) + bwt.occurrences(base,hiIndex); + } + + if( base == null || loIndex > hiIndex ) { + loIndex = 0; + hiIndex = bwt.length(); + mismatches++; + } + bounds.add(0,new LowerBound(loIndex,hiIndex,mismatches)); + } + + return bounds; + } + + /** + * Create a string representation of this bound. + * @return String version of this bound. + */ + public String toString() { + return String.format("LowerBound: w = %d, value = %d",width,value); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/package-info.java b/public/java/src/org/broadinstitute/sting/alignment/package-info.java new file mode 100644 index 000000000..60cf1e425 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/package-info.java @@ -0,0 +1,4 @@ +/** + * Analyses used to validate the correctness and performance the BWA Java bindings. + */ +package org.broadinstitute.sting.alignment; \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/AMBWriter.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/AMBWriter.java new file mode 100644 index 000000000..ec10415dd --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/AMBWriter.java @@ -0,0 +1,68 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.SAMSequenceRecord; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; + +/** + * Writes .amb files - a file indicating where 'holes' (indeterminant bases) + * exist in the contig. Currently, only empty, placeholder AMBs are supported. + * + * @author mhanna + * @version 0.1 + */ +public class AMBWriter { + /** + * Number of holes is fixed at zero. + */ + private static final int NUM_HOLES = 0; + + /** + * Input stream from which to read BWT data. + */ + private final PrintStream out; + + /** + * Create a new ANNWriter targeting the given file. + * @param file file into which ANN data should be written. + * @throws java.io.IOException if there is a problem opening the output file. + */ + public AMBWriter(File file) throws IOException { + out = new PrintStream(file); + } + + /** + * Create a new ANNWriter targeting the given OutputStream. + * @param stream Stream into which ANN data should be written. + */ + public AMBWriter(OutputStream stream) { + out = new PrintStream(stream); + } + + /** + * Write the contents of the given dictionary into the AMB file. + * Assumes that there are no holes in the dictionary. + * @param dictionary Dictionary to write. + */ + public void writeEmpty(SAMSequenceDictionary dictionary) { + long genomeLength = 0L; + for(SAMSequenceRecord sequence: dictionary.getSequences()) + genomeLength += sequence.getSequenceLength(); + + int sequences = dictionary.getSequences().size(); + + // Write the header + out.printf("%d %d %d%n",genomeLength,sequences,NUM_HOLES); + } + + /** + * Close the given output stream. + */ + public void close() { + out.close(); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/ANNWriter.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/ANNWriter.java new file mode 100644 index 000000000..8d692a9e7 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/ANNWriter.java @@ -0,0 +1,95 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.SAMSequenceRecord; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; + +/** + * Writes .ann files - an alternate sequence dictionary format + * used by BWA/C. For best results, the input sequence dictionary + * should be created with Picard's CreateSequenceDictionary.jar, + * TRUNCATE_NAMES_AT_WHITESPACE=false. + * + * @author mhanna + * @version 0.1 + */ +public class ANNWriter { + /** + * BWA uses a fixed seed of 11, written into every file. + */ + private static final int BNS_SEED = 11; + + /** + * A seemingly unused value that appears in every contig in the ANN. + */ + private static final int GI = 0; + + /** + * Input stream from which to read BWT data. + */ + private final PrintStream out; + + /** + * Create a new ANNWriter targeting the given file. + * @param file file into which ANN data should be written. + * @throws IOException if there is a problem opening the output file. + */ + public ANNWriter(File file) throws IOException { + out = new PrintStream(file); + } + + /** + * Create a new ANNWriter targeting the given OutputStream. + * @param stream Stream into which ANN data should be written. + */ + public ANNWriter(OutputStream stream) { + out = new PrintStream(stream); + } + + /** + * Write the contents of the given dictionary into the ANN file. + * Assumes that no ambs (blocks of indeterminate base) are present in the dictionary. + * @param dictionary Dictionary to write. + */ + public void write(SAMSequenceDictionary dictionary) { + long genomeLength = 0L; + for(SAMSequenceRecord sequence: dictionary.getSequences()) + genomeLength += sequence.getSequenceLength(); + + int sequences = dictionary.getSequences().size(); + + // Write the header + out.printf("%d %d %d%n",genomeLength,sequences,BNS_SEED); + + for(SAMSequenceRecord sequence: dictionary.getSequences()) { + String fullSequenceName = sequence.getSequenceName(); + String trimmedSequenceName = fullSequenceName; + String sequenceComment = "(null)"; + + long offset = 0; + + // Separate the sequence name from the sequence comment, based on BWA's definition. + // BWA's definition appears to accept a zero-length contig name, so mimic that behavior. + if(fullSequenceName.indexOf(' ') >= 0) { + trimmedSequenceName = fullSequenceName.substring(0,fullSequenceName.indexOf(' ')); + sequenceComment = fullSequenceName.substring(fullSequenceName.indexOf(' ')+1); + } + + // Write the sequence GI (?), name, and comment. + out.printf("%d %s %s%n",GI,trimmedSequenceName,sequenceComment); + // Write the sequence offset, length, and ambs (currently fixed at 0). + out.printf("%d %d %d%n",offset,sequence.getSequenceLength(),0); + } + } + + /** + * Close the given output stream. + */ + public void close() { + out.close(); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWT.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWT.java new file mode 100644 index 000000000..7f8c48253 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWT.java @@ -0,0 +1,172 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +/** + * Represents the Burrows-Wheeler Transform of a reference sequence. + * + * @author mhanna + * @version 0.1 + */ +public class BWT { + /** + * Write an occurrence table after every SEQUENCE_BLOCK_SIZE bases. + * For this implementation to behave correctly, SEQUENCE_BLOCK_SIZE % 8 == 0 + */ + public static final int SEQUENCE_BLOCK_SIZE = 128; + + /** + * The inverse SA, used as a placeholder for determining where the special EOL character sits. + */ + protected final long inverseSA0; + + /** + * Cumulative counts for the entire BWT. + */ + protected final Counts counts; + + /** + * The individual sequence blocks, modelling how they appear on disk. + */ + protected final SequenceBlock[] sequenceBlocks; + + /** + * Creates a new BWT with the given inverse SA, counts, and sequence (in ASCII). + * @param inverseSA0 Inverse SA entry for the first element. Will be missing from the BWT sequence. + * @param counts Cumulative count of bases, in A,C,G,T order. + * @param sequenceBlocks The full BWT sequence, sans the '$'. + */ + public BWT( long inverseSA0, Counts counts, SequenceBlock[] sequenceBlocks ) { + this.inverseSA0 = inverseSA0; + this.counts = counts; + this.sequenceBlocks = sequenceBlocks; + } + + /** + * Creates a new BWT with the given inverse SA, occurrences, and sequence (in ASCII). + * @param inverseSA0 Inverse SA entry for the first element. Will be missing from the BWT sequence. + * @param counts Count of bases, in A,C,G,T order. + * @param sequence The full BWT sequence, sans the '$'. + */ + public BWT( long inverseSA0, Counts counts, byte[] sequence ) { + this(inverseSA0,counts,generateSequenceBlocks(sequence)); + } + + /** + * Extract the full sequence from the list of block. + * @return The full BWT string as a byte array. + */ + public byte[] getSequence() { + byte[] sequence = new byte[(int)counts.getTotal()]; + for( SequenceBlock block: sequenceBlocks ) + System.arraycopy(block.sequence,0,sequence,block.sequenceStart,block.sequenceLength); + return sequence; + } + + /** + * Get the total counts of bases lexicographically smaller than the given base, for Ferragina and Manzini's search. + * @param base The base. + * @return Total counts for all bases lexicographically smaller than this base. + */ + public long counts(byte base) { + return counts.getCumulative(base); + } + + /** + * Get the total counts of bases lexicographically smaller than the given base, for Ferragina and Manzini's search. + * @param base The base. + * @param index The position to search within the BWT. + * @return Total counts for all bases lexicographically smaller than this base. + */ + public long occurrences(byte base,long index) { + SequenceBlock block = getSequenceBlock(index); + int position = getSequencePosition(index); + long accumulator = block.occurrences.get(base); + for(int i = 0; i <= position; i++) { + if(base == block.sequence[i]) + accumulator++; + } + return accumulator; + } + + /** + * The number of bases in the BWT as a whole. + * @return Number of bases. + */ + public long length() { + return counts.getTotal(); + } + + /** + * Create a new BWT from the given reference sequence. + * @param referenceSequence Sequence from which to derive the BWT. + * @return reference sequence-derived BWT. + */ + public static BWT createFromReferenceSequence(byte[] referenceSequence) { + SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence); + + byte[] bwt = new byte[(int)suffixArray.length()-1]; + int bwtIndex = 0; + for(long suffixArrayIndex = 0; suffixArrayIndex < suffixArray.length(); suffixArrayIndex++) { + if(suffixArray.get(suffixArrayIndex) == 0) + continue; + bwt[bwtIndex++] = referenceSequence[(int)suffixArray.get(suffixArrayIndex)-1]; + } + + return new BWT(suffixArray.inverseSA0,suffixArray.occurrences,bwt); + } + + /** + * Gets the base at a given position in the BWT. + * @param index The index to use. + * @return The base at that location. + */ + protected byte getBase(long index) { + if(index == inverseSA0) + throw new ReviewedStingException(String.format("Base at index %d does not have a text representation",index)); + + SequenceBlock block = getSequenceBlock(index); + int position = getSequencePosition(index); + return block.sequence[position]; + } + + private SequenceBlock getSequenceBlock(long index) { + // If the index is above the SA-1[0], remap it to the appropriate coordinate space. + if(index > inverseSA0) index--; + return sequenceBlocks[(int)(index/SEQUENCE_BLOCK_SIZE)]; + } + + private int getSequencePosition(long index) { + // If the index is above the SA-1[0], remap it to the appropriate coordinate space. + if(index > inverseSA0) index--; + return (int)(index%SEQUENCE_BLOCK_SIZE); + } + + /** + * Create a set of sequence blocks from one long sequence. + * @param sequence Sequence from which to derive blocks. + * @return Array of sequence blocks containing data from the sequence. + */ + private static SequenceBlock[] generateSequenceBlocks( byte[] sequence ) { + Counts occurrences = new Counts(); + + int numSequenceBlocks = PackUtils.numberOfPartitions(sequence.length,SEQUENCE_BLOCK_SIZE); + SequenceBlock[] sequenceBlocks = new SequenceBlock[numSequenceBlocks]; + + for( int block = 0; block < numSequenceBlocks; block++ ) { + int blockStart = block*SEQUENCE_BLOCK_SIZE; + int blockLength = Math.min(SEQUENCE_BLOCK_SIZE, sequence.length-blockStart); + byte[] subsequence = new byte[blockLength]; + + System.arraycopy(sequence,blockStart,subsequence,0,blockLength); + + sequenceBlocks[block] = new SequenceBlock(blockStart,blockLength,occurrences.clone(),subsequence); + + for( byte base: subsequence ) + occurrences.increment(base); + } + + return sequenceBlocks; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTReader.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTReader.java new file mode 100644 index 000000000..5c4f6d39d --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTReader.java @@ -0,0 +1,89 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.BasePackedInputStream; +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedInputStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteOrder; +/** + * Reads a BWT from a given file. + * + * @author mhanna + * @version 0.1 + */ +public class BWTReader { + /** + * Input stream from which to read BWT data. + */ + private FileInputStream inputStream; + + /** + * Create a new BWT reader. + * @param inputFile File in which the BWT is stored. + */ + public BWTReader( File inputFile ) { + try { + this.inputStream = new FileInputStream(inputFile); + } + catch( FileNotFoundException ex ) { + throw new ReviewedStingException("Unable to open input file", ex); + } + } + + /** + * Read a BWT from the input stream. + * @return The BWT stored in the input stream. + */ + public BWT read() { + UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN); + BasePackedInputStream basePackedInputStream = new BasePackedInputStream(Integer.class, inputStream, ByteOrder.LITTLE_ENDIAN); + + long inverseSA0; + long[] count; + SequenceBlock[] sequenceBlocks; + + try { + inverseSA0 = uintPackedInputStream.read(); + count = new long[PackUtils.ALPHABET_SIZE]; + uintPackedInputStream.read(count); + + long bwtSize = count[PackUtils.ALPHABET_SIZE-1]; + sequenceBlocks = new SequenceBlock[PackUtils.numberOfPartitions(bwtSize,BWT.SEQUENCE_BLOCK_SIZE)]; + + for( int block = 0; block < sequenceBlocks.length; block++ ) { + int sequenceStart = block* BWT.SEQUENCE_BLOCK_SIZE; + int sequenceLength = (int)Math.min(BWT.SEQUENCE_BLOCK_SIZE,bwtSize-sequenceStart); + + long[] occurrences = new long[PackUtils.ALPHABET_SIZE]; + byte[] bwt = new byte[sequenceLength]; + + uintPackedInputStream.read(occurrences); + basePackedInputStream.read(bwt); + + sequenceBlocks[block] = new SequenceBlock(sequenceStart,sequenceLength,new Counts(occurrences,false),bwt); + } + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to read BWT from input stream.", ex); + } + + return new BWT(inverseSA0, new Counts(count,true), sequenceBlocks); + } + + /** + * Close the input stream. + */ + public void close() { + try { + inputStream.close(); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to close input file", ex); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTSupplementaryFileGenerator.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTSupplementaryFileGenerator.java new file mode 100644 index 000000000..3370f79c8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTSupplementaryFileGenerator.java @@ -0,0 +1,60 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import net.sf.samtools.SAMSequenceDictionary; + +import java.io.File; +import java.io.IOException; + +/** + * Generate BWA supplementary files (.ann, .amb) from the command line. + * + * @author mhanna + * @version 0.1 + */ +public class BWTSupplementaryFileGenerator { + enum SupplementaryFileType { ANN, AMB } + + public static void main(String[] args) throws IOException { + if(args.length < 3) + usage("Incorrect number of arguments supplied"); + + File fastaFile = new File(args[0]); + File outputFile = new File(args[1]); + SupplementaryFileType outputType = null; + try { + outputType = Enum.valueOf(SupplementaryFileType.class,args[2]); + } + catch(IllegalArgumentException ex) { + usage("Invalid output type: " + args[2]); + } + + ReferenceSequenceFile sequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(fastaFile); + SAMSequenceDictionary dictionary = sequenceFile.getSequenceDictionary(); + + switch(outputType) { + case ANN: + ANNWriter annWriter = new ANNWriter(outputFile); + annWriter.write(dictionary); + annWriter.close(); + break; + case AMB: + AMBWriter ambWriter = new AMBWriter(outputFile); + ambWriter.writeEmpty(dictionary); + ambWriter.close(); + break; + default: + usage("Unsupported output type: " + outputType); + } + } + + /** + * Print usage information and exit. + */ + private static void usage(String message) { + System.err.println(message); + System.err.println("Usage: BWTSupplementaryFileGenerator "); + System.exit(1); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTWriter.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTWriter.java new file mode 100644 index 000000000..a813cdc9a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTWriter.java @@ -0,0 +1,71 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.BasePackedOutputStream; +import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedOutputStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.*; +import java.nio.ByteOrder; + +/** + * Writes an in-memory BWT to an outputstream. + * + * @author mhanna + * @version 0.1 + */ +public class BWTWriter { + /** + * Input stream from which to read BWT data. + */ + private final OutputStream outputStream; + + /** + * Create a new BWT writer. + * @param outputFile File in which the BWT is stored. + */ + public BWTWriter( File outputFile ) { + try { + this.outputStream = new BufferedOutputStream(new FileOutputStream(outputFile)); + } + catch( FileNotFoundException ex ) { + throw new ReviewedStingException("Unable to open output file", ex); + } + } + + /** + * Write a BWT to the output stream. + * @param bwt Transform to be written to the output stream. + */ + public void write( BWT bwt ) { + UnsignedIntPackedOutputStream intPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN); + BasePackedOutputStream basePackedOutputStream = new BasePackedOutputStream(Integer.class, outputStream, ByteOrder.LITTLE_ENDIAN); + + try { + intPackedOutputStream.write(bwt.inverseSA0); + intPackedOutputStream.write(bwt.counts.toArray(true)); + + for( SequenceBlock block: bwt.sequenceBlocks ) { + intPackedOutputStream.write(block.occurrences.toArray(false)); + basePackedOutputStream.write(block.sequence); + } + + // The last block is the last set of counts in the structure. + intPackedOutputStream.write(bwt.counts.toArray(false)); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to read BWT from input stream.", ex); + } + } + + /** + * Close the input stream. + */ + public void close() { + try { + outputStream.close(); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to close input file", ex); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java new file mode 100644 index 000000000..bc0a5b63d --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java @@ -0,0 +1,108 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.*; + +/** + * Enhanced enum representation of a base. + * + * @author mhanna + * @version 0.1 + */ +public class Bases implements Iterable +{ + public static byte A = 'A'; + public static byte C = 'C'; + public static byte G = 'G'; + public static byte T = 'T'; + + public static final Bases instance = new Bases(); + + private static final List allBases; + + /** + * Representation of the base broken down by packed value. + */ + private static final Map basesByPack = new HashMap(); + + static { + List bases = new ArrayList(); + bases.add(A); + bases.add(C); + bases.add(G); + bases.add(T); + allBases = Collections.unmodifiableList(bases); + + for(int i = 0; i < allBases.size(); i++) + basesByPack.put(i,allBases.get(i)); + } + + /** + * Create a new base with the given ascii representation and + * pack value. + */ + private Bases() { + } + + /** + * Return all possible bases. + * @return Byte representation of all bases. + */ + public static Collection allOf() { + return allBases; + } + + /** + * Gets the number of known bases. + * @return The number of known bases. + */ + public static int size() { + return allBases.size(); + } + + /** + * Gets an iterator over the total number of known base types. + * @return Iterator over all known bases. + */ + public Iterator iterator() { + return basesByPack.values().iterator(); + } + + /** + * Get the given base from the packed representation. + * @param pack Packed representation. + * @return base. + */ + public static byte fromPack( int pack ) { return basesByPack.get(pack); } + + /** + * Convert the given base to its packed value. + * @param ascii ASCII representation of the base. + * @return Packed value. + */ + public static int toPack( byte ascii ) + { + for( Map.Entry entry: basesByPack.entrySet() ) { + if( entry.getValue().equals(ascii) ) + return entry.getKey(); + } + throw new ReviewedStingException(String.format("Base %c is an invalid base to pack", (char)ascii)); + } + + /** + * Convert the ASCII representation of a base to its 'normalized' representation. + * @param base The base itself. + * @return The byte, if present. Null if unknown. + */ + public static Byte fromASCII( byte base ) { + Byte found = null; + for( Byte normalized: allBases ) { + if( normalized.equals(base) ) { + found = normalized; + break; + } + } + return found; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Counts.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Counts.java new file mode 100644 index 000000000..268b11ac4 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Counts.java @@ -0,0 +1,151 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.HashMap; +import java.util.Map; + +/** + * Counts of how many bases of each type have been seen. + * + * @author mhanna + * @version 0.1 + */ +public class Counts implements Cloneable { + /** + * Internal representation of counts, broken down by ASCII value. + */ + private Map counts = new HashMap(); + + /** + * Internal representation of cumulative counts, broken down by ASCII value. + */ + private Map cumulativeCounts = new HashMap(); + + /** + * Create an empty Counts object with values A=0,C=0,G=0,T=0. + */ + public Counts() + { + for(byte base: Bases.instance) { + counts.put(base,0L); + cumulativeCounts.put(base,0L); + } + } + + /** + * Create a counts data structure with the given initial values. + * @param data Count data, broken down by base. + * @param cumulative Whether the counts are cumulative, (count_G=numA+numC+numG,for example). + */ + public Counts( long[] data, boolean cumulative ) { + if(cumulative) { + long priorCount = 0; + for(byte base: Bases.instance) { + long count = data[Bases.toPack(base)]; + counts.put(base,count-priorCount); + cumulativeCounts.put(base,priorCount); + priorCount = count; + } + } + else { + long priorCount = 0; + for(byte base: Bases.instance) { + long count = data[Bases.toPack(base)]; + counts.put(base,count); + cumulativeCounts.put(base,priorCount); + priorCount += count; + } + } + } + + /** + * Convert to an array for persistence. + * @param cumulative Use a cumulative representation. + * @return Array of count values. + */ + public long[] toArray(boolean cumulative) { + long[] countArray = new long[counts.size()]; + if(cumulative) { + int index = 0; + boolean first = true; + for(byte base: Bases.instance) { + if(first) { + first = false; + continue; + } + countArray[index++] = getCumulative(base); + } + countArray[countArray.length-1] = getTotal(); + } + else { + int index = 0; + for(byte base: Bases.instance) + countArray[index++] = counts.get(base); + } + return countArray; + } + + /** + * Create a unique copy of the current object. + * @return A duplicate of this object. + */ + public Counts clone() { + Counts other; + try { + other = (Counts)super.clone(); + } + catch(CloneNotSupportedException ex) { + throw new ReviewedStingException("Unable to clone counts object", ex); + } + other.counts = new HashMap(counts); + other.cumulativeCounts = new HashMap(cumulativeCounts); + return other; + } + + /** + * Increment the number of bases seen at the given location. + * @param base Base to increment. + */ + public void increment(byte base) { + counts.put(base,counts.get(base)+1); + boolean increment = false; + for(byte cumulative: Bases.instance) { + if(increment) cumulativeCounts.put(cumulative,cumulativeCounts.get(cumulative)+1); + increment |= (cumulative == base); + } + } + + /** + * Gets a count of the number of bases seen at a given location. + * Note that counts in this case are not cumulative (counts for A,C,G,T + * are independent). + * @param base Base for which to query counts. + * @return Number of bases of this type seen. + */ + public long get(byte base) { + return counts.get(base); + } + + /** + * Gets a count of the number of bases seen before this base. + * Note that counts in this case are cumulative. + * @param base Base for which to query counts. + * @return Number of bases of this type seen. + */ + public long getCumulative(byte base) { + return cumulativeCounts.get(base); + } + + /** + * How many total bases are represented by this count structure? + * @return Total bases represented. + */ + public long getTotal() { + int accumulator = 0; + for(byte base: Bases.instance) { + accumulator += get(base); + } + return accumulator; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/CreateBWTFromReference.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/CreateBWTFromReference.java new file mode 100755 index 000000000..801ab3a0b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/CreateBWTFromReference.java @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITHoc THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.picard.reference.ReferenceSequence; +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.IOException; + +/** + * Create a suffix array data structure. + * + * @author mhanna + * @version 0.1 + */ +public class CreateBWTFromReference { + private byte[] loadReference( File inputFile ) { + // Read in the first sequence in the input file + ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile); + ReferenceSequence sequence = reference.nextSequence(); + return sequence.getBases(); + } + + private byte[] loadReverseReference( File inputFile ) { + ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile); + ReferenceSequence sequence = reference.nextSequence(); + PackUtils.reverse(sequence.getBases()); + return sequence.getBases(); + } + + private Counts countOccurrences( byte[] sequence ) { + Counts occurrences = new Counts(); + for( byte base: sequence ) + occurrences.increment(base); + return occurrences; + } + + private long[] createSuffixArray( byte[] sequence ) { + return SuffixArray.createFromReferenceSequence(sequence).sequence; + } + + private long[] invertSuffixArray( long[] suffixArray ) { + long[] inverseSuffixArray = new long[suffixArray.length]; + for( int i = 0; i < suffixArray.length; i++ ) + inverseSuffixArray[(int)suffixArray[i]] = i; + return inverseSuffixArray; + } + + private long[] createCompressedSuffixArray( int[] suffixArray, int[] inverseSuffixArray ) { + long[] compressedSuffixArray = new long[suffixArray.length]; + compressedSuffixArray[0] = inverseSuffixArray[0]; + for( int i = 1; i < suffixArray.length; i++ ) + compressedSuffixArray[i] = inverseSuffixArray[suffixArray[i]+1]; + return compressedSuffixArray; + } + + private long[] createInversedCompressedSuffixArray( int[] compressedSuffixArray ) { + long[] inverseCompressedSuffixArray = new long[compressedSuffixArray.length]; + for( int i = 0; i < compressedSuffixArray.length; i++ ) + inverseCompressedSuffixArray[compressedSuffixArray[i]] = i; + return inverseCompressedSuffixArray; + } + + public static void main( String argv[] ) throws IOException { + if( argv.length != 5 ) { + System.out.println("USAGE: CreateBWTFromReference .fasta "); + return; + } + + String inputFileName = argv[0]; + File inputFile = new File(inputFileName); + + String bwtFileName = argv[1]; + File bwtFile = new File(bwtFileName); + + String rbwtFileName = argv[2]; + File rbwtFile = new File(rbwtFileName); + + String saFileName = argv[3]; + File saFile = new File(saFileName); + + String rsaFileName = argv[4]; + File rsaFile = new File(rsaFileName); + + CreateBWTFromReference creator = new CreateBWTFromReference(); + + byte[] sequence = creator.loadReference(inputFile); + byte[] reverseSequence = creator.loadReverseReference(inputFile); + + // Count the occurences of each given base. + Counts occurrences = creator.countOccurrences(sequence); + System.out.printf("Occurrences: a=%d, c=%d, g=%d, t=%d%n",occurrences.getCumulative(Bases.A), + occurrences.getCumulative(Bases.C), + occurrences.getCumulative(Bases.G), + occurrences.getCumulative(Bases.T)); + + // Generate the suffix array and print diagnostics. + long[] suffixArrayData = creator.createSuffixArray(sequence); + long[] reverseSuffixArrayData = creator.createSuffixArray(reverseSequence); + + // Invert the suffix array and print diagnostics. + long[] inverseSuffixArray = creator.invertSuffixArray(suffixArrayData); + long[] reverseInverseSuffixArray = creator.invertSuffixArray(reverseSuffixArrayData); + + SuffixArray suffixArray = new SuffixArray( inverseSuffixArray[0], occurrences, suffixArrayData ); + SuffixArray reverseSuffixArray = new SuffixArray( reverseInverseSuffixArray[0], occurrences, reverseSuffixArrayData ); + + /* + // Create the data structure for the compressed suffix array and print diagnostics. + int[] compressedSuffixArray = creator.createCompressedSuffixArray(suffixArray.sequence,inverseSuffixArray); + int reconstructedInverseSA = compressedSuffixArray[0]; + for( int i = 0; i < 8; i++ ) { + System.out.printf("compressedSuffixArray[%d] = %d (SA-1[%d] = %d)%n", i, compressedSuffixArray[i], i, reconstructedInverseSA); + reconstructedInverseSA = compressedSuffixArray[reconstructedInverseSA]; + } + + // Create the data structure for the inverse compressed suffix array and print diagnostics. + int[] inverseCompressedSuffixArray = creator.createInversedCompressedSuffixArray(compressedSuffixArray); + for( int i = 0; i < 8; i++ ) { + System.out.printf("inverseCompressedSuffixArray[%d] = %d%n", i, inverseCompressedSuffixArray[i]); + } + */ + + // Create the BWT. + BWT bwt = BWT.createFromReferenceSequence(sequence); + BWT reverseBWT = BWT.createFromReferenceSequence(reverseSequence); + + byte[] bwtSequence = bwt.getSequence(); + System.out.printf("BWT: %s... (length = %d)%n", new String(bwtSequence,0,80),bwt.length()); + + BWTWriter bwtWriter = new BWTWriter(bwtFile); + bwtWriter.write(bwt); + bwtWriter.close(); + + BWTWriter reverseBWTWriter = new BWTWriter(rbwtFile); + reverseBWTWriter.write(reverseBWT); + reverseBWTWriter.close(); + + /* + SuffixArrayWriter saWriter = new SuffixArrayWriter(saFile); + saWriter.write(suffixArray); + saWriter.close(); + + SuffixArrayWriter reverseSAWriter = new SuffixArrayWriter(rsaFile); + reverseSAWriter.write(reverseSuffixArray); + reverseSAWriter.close(); + */ + + File existingBWTFile = new File(inputFileName+".bwt"); + BWTReader existingBWTReader = new BWTReader(existingBWTFile); + BWT existingBWT = existingBWTReader.read(); + + byte[] existingBWTSequence = existingBWT.getSequence(); + System.out.printf("Existing BWT: %s... (length = %d)%n",new String(existingBWTSequence,0,80),existingBWT.length()); + + for( int i = 0; i < bwt.length(); i++ ) { + if( bwtSequence[i] != existingBWTSequence[i] ) + throw new ReviewedStingException("BWT mismatch at " + i); + } + + File existingSAFile = new File(inputFileName+".sa"); + SuffixArrayReader existingSuffixArrayReader = new SuffixArrayReader(existingSAFile,existingBWT); + SuffixArray existingSuffixArray = existingSuffixArrayReader.read(); + + for(int i = 0; i < suffixArray.length(); i++) { + if( i % 10000 == 0 ) + System.out.printf("Validating suffix array entry %d%n", i); + if( suffixArray.get(i) != existingSuffixArray.get(i) ) + throw new ReviewedStingException(String.format("Suffix array mismatch at %d; SA is %d; should be %d",i,existingSuffixArray.get(i),suffixArray.get(i))); + } + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SequenceBlock.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SequenceBlock.java new file mode 100644 index 000000000..13714de1e --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SequenceBlock.java @@ -0,0 +1,41 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +/** + * Models a block of bases within the BWT. + */ +public class SequenceBlock { + /** + * Start position of this sequence within the BWT. + */ + public final int sequenceStart; + + /** + * Length of this sequence within the BWT. + */ + public final int sequenceLength; + + + /** + * Occurrences of each letter up to this sequence block. + */ + public final Counts occurrences; + + /** + * Sequence for this segment. + */ + public final byte[] sequence; + + /** + * Create a new block within this BWT. + * @param sequenceStart Starting position of this sequence within the BWT. + * @param sequenceLength Length of this sequence. + * @param occurrences How many of each base has been seen before this sequence began. + * @param sequence The actual sequence from the BWT. + */ + public SequenceBlock( int sequenceStart, int sequenceLength, Counts occurrences, byte[] sequence ) { + this.sequenceStart = sequenceStart; + this.sequenceLength = sequenceLength; + this.occurrences = occurrences; + this.sequence = sequence; + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArray.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArray.java new file mode 100644 index 000000000..49af98bb9 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArray.java @@ -0,0 +1,158 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.samtools.util.StringUtil; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Comparator; +import java.util.TreeSet; + +/** + * An in-memory representation of a suffix array. + * + * @author mhanna + * @version 0.1 + */ +public class SuffixArray { + public final long inverseSA0; + public final Counts occurrences; + + /** + * The elements of the sequence actually stored in memory. + */ + protected final long[] sequence; + + /** + * How often are individual elements in the sequence actually stored + * in memory, as opposed to being calculated on the fly? + */ + protected final int sequenceInterval; + + /** + * The BWT used to calculate missing portions of the sequence. + */ + protected final BWT bwt; + + public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence) { + this(inverseSA0,occurrences,sequence,1,null); + } + + /** + * Creates a new sequence array with the given inverse SA, occurrences, and values. + * @param inverseSA0 Inverse SA entry for the first element. + * @param occurrences Cumulative number of occurrences of A,C,G,T, in order. + * @param sequence The full suffix array. + * @param sequenceInterval How frequently is the sequence interval stored. + * @param bwt bwt used to infer the remaining entries in the BWT. + */ + public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence, int sequenceInterval, BWT bwt) { + this.inverseSA0 = inverseSA0; + this.occurrences = occurrences; + this.sequence = sequence; + this.sequenceInterval = sequenceInterval; + this.bwt = bwt; + + if(sequenceInterval != 1 && bwt == null) + throw new ReviewedStingException("A BWT must be provided if the sequence interval is not 1"); + } + + /** + * Retrieves the length of the sequence array. + * @return Length of the suffix array. + */ + public long length() { + if( bwt != null ) + return bwt.length()+1; + else + return sequence.length; + } + + /** + * Get the suffix array value at a given sequence. + * @param index Index at which to retrieve the suffix array vaule. + * @return The suffix array value at that entry. + */ + public long get(long index) { + int iterations = 0; + while(index%sequenceInterval != 0) { + // The inverseSA0 ('$') doesn't have a usable ASCII representation; it must be treated as a special case. + if(index == inverseSA0) + index = 0; + else { + byte base = bwt.getBase(index); + index = bwt.counts(base) + bwt.occurrences(base,index); + } + iterations++; + } + return (sequence[(int)(index/sequenceInterval)]+iterations) % length(); + } + + /** + * Create a suffix array from a given reference sequence. + * @param sequence The reference sequence to use when building the suffix array. + * @return a constructed suffix array. + */ + public static SuffixArray createFromReferenceSequence(byte[] sequence) { + // The builder for the suffix array. Use an integer in this case because + // Java arrays can only hold an integer. + TreeSet suffixArrayBuilder = new TreeSet(new SuffixArrayComparator(sequence)); + + Counts occurrences = new Counts(); + for( byte base: sequence ) + occurrences.increment(base); + + // Build out the suffix array using a custom comparator. + for( int i = 0; i <= sequence.length; i++ ) + suffixArrayBuilder.add(i); + + // Copy the suffix array into an array. + long[] suffixArray = new long[suffixArrayBuilder.size()]; + int i = 0; + for( Integer element: suffixArrayBuilder ) + suffixArray[i++] = element; + + // Find the first element in the inverse suffix array. + long inverseSA0 = -1; + for(i = 0; i < suffixArray.length; i++) { + if(suffixArray[i] == 0) + inverseSA0 = i; + } + if(inverseSA0 < 0) + throw new ReviewedStingException("Unable to find first inverse SA entry in generated suffix array."); + + return new SuffixArray(inverseSA0,occurrences,suffixArray); + } + + /** + * Compares two suffix arrays of the given sequence. Will return whichever string appears + * first in lexicographic order. + */ + private static class SuffixArrayComparator implements Comparator { + /** + * The data source for all suffix arrays. + */ + private final String sequence; + + /** + * Create a new comparator. + * @param sequence Reference sequence to use as basis for comparison. + */ + public SuffixArrayComparator( byte[] sequence ) { + // Processing the suffix array tends to be easier as a string. + this.sequence = StringUtil.bytesToString(sequence); + } + + /** + * Compare the two given suffix arrays. Criteria for comparison is the lexicographic order of + * the two substrings sequence[lhs:], sequence[rhs:]. + * @param lhs Left-hand side of comparison. + * @param rhs Right-hand side of comparison. + * @return How the suffix arrays represented by lhs, rhs compare. + */ + public int compare( Integer lhs, Integer rhs ) { + String lhsSuffixArray = sequence.substring(lhs); + String rhsSuffixArray = sequence.substring(rhs); + return lhsSuffixArray.compareTo(rhsSuffixArray); + } + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayReader.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayReader.java new file mode 100644 index 000000000..b48e4c69c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayReader.java @@ -0,0 +1,85 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedInputStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteOrder; + +/** + * A reader for suffix arrays in permanent storage. + * + * @author mhanna + * @version 0.1 + */ +public class SuffixArrayReader { + /** + * Input stream from which to read suffix array data. + */ + private FileInputStream inputStream; + + /** + * BWT to use to fill in missing data. + */ + private BWT bwt; + + /** + * Create a new suffix array reader. + * @param inputFile File in which the suffix array is stored. + * @param bwt BWT to use when filling in missing data. + */ + public SuffixArrayReader(File inputFile, BWT bwt) { + try { + this.inputStream = new FileInputStream(inputFile); + this.bwt = bwt; + } + catch( FileNotFoundException ex ) { + throw new ReviewedStingException("Unable to open input file", ex); + } + } + + /** + * Read a suffix array from the input stream. + * @return The suffix array stored in the input stream. + */ + public SuffixArray read() { + UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN); + + long inverseSA0; + long[] occurrences; + long[] suffixArray; + int suffixArrayInterval; + + try { + inverseSA0 = uintPackedInputStream.read(); + occurrences = new long[PackUtils.ALPHABET_SIZE]; + uintPackedInputStream.read(occurrences); + // Throw away the suffix array size in bytes and use the occurrences table directly. + suffixArrayInterval = (int)uintPackedInputStream.read(); + suffixArray = new long[(int)((occurrences[occurrences.length-1]+suffixArrayInterval-1)/suffixArrayInterval)]; + uintPackedInputStream.read(suffixArray); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to read BWT from input stream.", ex); + } + + return new SuffixArray(inverseSA0, new Counts(occurrences,true), suffixArray, suffixArrayInterval, bwt); + } + + + /** + * Close the input stream. + */ + public void close() { + try { + inputStream.close(); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to close input file", ex); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayWriter.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayWriter.java new file mode 100644 index 000000000..b6f79be2f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayWriter.java @@ -0,0 +1,67 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedOutputStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.*; +import java.nio.ByteOrder; + +/** + * Javadoc goes here. + * + * @author mhanna + * @version 0.1 + */ +public class SuffixArrayWriter { + /** + * Input stream from which to read suffix array data. + */ + private OutputStream outputStream; + + /** + * Create a new suffix array reader. + * @param outputFile File in which the suffix array is stored. + */ + public SuffixArrayWriter( File outputFile ) { + try { + this.outputStream = new BufferedOutputStream(new FileOutputStream(outputFile)); + } + catch( FileNotFoundException ex ) { + throw new ReviewedStingException("Unable to open input file", ex); + } + } + + /** + * Write a suffix array to the output stream. + * @param suffixArray suffix array to write. + */ + public void write(SuffixArray suffixArray) { + UnsignedIntPackedOutputStream uintPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN); + + try { + uintPackedOutputStream.write(suffixArray.inverseSA0); + uintPackedOutputStream.write(suffixArray.occurrences.toArray(true)); + // How frequently the suffix array entry is placed. + uintPackedOutputStream.write(1); + // Length of the suffix array. + uintPackedOutputStream.write(suffixArray.length()-1); + uintPackedOutputStream.write(suffixArray.sequence,1,suffixArray.sequence.length-1); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to read BWT from input stream.", ex); + } + } + + + /** + * Close the input stream. + */ + public void close() { + try { + outputStream.close(); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to close input file", ex); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedInputStream.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedInputStream.java new file mode 100644 index 000000000..174a9853b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedInputStream.java @@ -0,0 +1,95 @@ +package org.broadinstitute.sting.alignment.reference.packing; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; + +/** + * Reads a packed version of the input stream. + * + * @author mhanna + * @version 0.1 + */ +public class BasePackedInputStream { + /** + * Type of object to unpack. + */ + private final Class type; + + /** + * Ultimate source for packed bases. + */ + private final FileInputStream targetInputStream; + + /** + * Channel source for packed bases. + */ + private final FileChannel targetInputChannel; + + /** + * A fixed-size buffer for word-packed data. + */ + private final ByteOrder byteOrder; + + /** + * How many bases are in a given packed word. + */ + private final int basesPerPackedWord = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BASE; + + /** + * How many bytes in an integer? + */ + private final int bytesPerInteger = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE; + + + public BasePackedInputStream( Class type, File inputFile, ByteOrder byteOrder ) throws FileNotFoundException { + this(type,new FileInputStream(inputFile),byteOrder); + } + + public BasePackedInputStream( Class type, FileInputStream inputStream, ByteOrder byteOrder ) { + if( type != Integer.class ) + throw new ReviewedStingException("Only bases packed into 32-bit words are currently supported by this input stream. Type specified: " + type.getName()); + this.type = type; + this.targetInputStream = inputStream; + this.targetInputChannel = inputStream.getChannel(); + this.byteOrder = byteOrder; + } + + /** + * Read the entire contents of the input stream. + * @param bwt array into which bases should be read. + * @throws IOException if an I/O error occurs. + */ + public void read(byte[] bwt) throws IOException { + read(bwt,0,bwt.length); + } + + /** + * Read the next length bases into the bwt array, starting at the given offset. + * @param bwt array holding the given data. + * @param offset target position in the bases array into which bytes should be written. + * @param length number of bases to read from the stream. + * @throws IOException if an I/O error occurs. + */ + public void read(byte[] bwt, int offset, int length) throws IOException { + int bufferWidth = ((bwt.length+basesPerPackedWord-1)/basesPerPackedWord)*bytesPerInteger; + ByteBuffer buffer = ByteBuffer.allocate(bufferWidth).order(byteOrder); + targetInputChannel.read(buffer); + targetInputChannel.position(targetInputChannel.position()+buffer.remaining()); + buffer.flip(); + + int packedWord = 0; + int i = 0; + while(i < length) { + if(i % basesPerPackedWord == 0) packedWord = buffer.getInt(); + int position = basesPerPackedWord - i%basesPerPackedWord - 1; + bwt[offset+i++] = PackUtils.unpackBase((byte)((packedWord >> position*PackUtils.BITS_PER_BASE) & 0x3)); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedOutputStream.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedOutputStream.java new file mode 100644 index 000000000..c62f40e51 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedOutputStream.java @@ -0,0 +1,140 @@ +package org.broadinstitute.sting.alignment.reference.packing; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.*; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * A general-purpose stream for writing packed bases. + * + * @author mhanna + * @version 0.1 + */ +public class BasePackedOutputStream { + /** + * Type of object to pack. + */ + private final Class type; + + /** + * How many bases can be stored in the given data structure? + */ + private final int basesPerType; + + /** + * Ultimate target for the packed bases. + */ + private final OutputStream targetOutputStream; + + /** + * A fixed-size buffer for word-packed data. + */ + private final ByteBuffer buffer; + + public BasePackedOutputStream( Class type, File outputFile, ByteOrder byteOrder ) throws FileNotFoundException { + this(type,new BufferedOutputStream(new FileOutputStream(outputFile)),byteOrder); + } + + /** + * Write packed bases to the given output stream. + * @param type Type of data to pack bases into. + * @param outputStream Output stream to which to write packed bases. + * @param byteOrder Switch between big endian / little endian when reading / writing files. + */ + public BasePackedOutputStream( Class type, OutputStream outputStream, ByteOrder byteOrder) { + this.targetOutputStream = outputStream; + this.type = type; + basesPerType = PackUtils.bitsInType(type)/PackUtils.BITS_PER_BASE; + this.buffer = ByteBuffer.allocate(basesPerType/PackUtils.ALPHABET_SIZE).order(byteOrder); + } + + /** + * Writes the given base to the output stream. Will write only this base; no packing will be performed. + * @param base List of bases to write. + * @throws IOException if an I/O error occurs. + */ + public void write( int base ) throws IOException { + write( new byte[] { (byte)base } ); + } + + /** + * Writes an array of bases to the target output stream. + * @param bases List of bases to write. + * @throws IOException if an I/O error occurs. + */ + public void write( byte[] bases ) throws IOException { + write(bases,0,bases.length); + } + + /** + * Writes a subset of the array of bases to the output stream. + * @param bases List of bases to write. + * @param offset site at which to start writing. + * @param length number of bases to write. + * @throws IOException if an I/O error occurs. + */ + public void write( byte[] bases, int offset, int length ) throws IOException { + int packedBases = 0; + int positionInPack = 0; + + for( int base = offset; base < offset+length; base++ ) { + packedBases = packBase(bases[base], packedBases, positionInPack); + + // Increment the packed counter. If all possible bases have been squeezed into this byte, write it out. + positionInPack = ++positionInPack % basesPerType; + if( positionInPack == 0 ) { + writePackedBases(packedBases); + packedBases = 0; + } + } + + if( positionInPack > 0 ) + writePackedBases(packedBases); + } + + /** + * Flush the contents of the OutputStream to disk. + * @throws IOException if an I/O error occurs. + */ + public void flush() throws IOException { + targetOutputStream.flush(); + } + + /** + * Closes the given output stream. + * @throws IOException if an I/O error occurs. + */ + public void close() throws IOException { + targetOutputStream.close(); + } + + /** + * Pack the given base into the basepack. + * @param base The base to pack. + * @param basePack Target for the pack operation. + * @param position Position within the pack to which to add the base. + * @return The packed integer. + */ + private int packBase( byte base, int basePack, int position ) { + basePack |= (PackUtils.packBase(base) << 2*(basesPerType-position-1)); + return basePack; + } + + /** + * Write the given packed base structure to the output file. + * @param packedBases Packed bases to write. + * @throws IOException on error writing to the file. + */ + private void writePackedBases(int packedBases) throws IOException { + buffer.rewind(); + if( type == Integer.class ) + buffer.putInt(packedBases); + else if( type == Byte.class ) + buffer.put((byte)packedBases); + else + throw new ReviewedStingException("Cannot pack bases into type " + type.getName()); + targetOutputStream.write(buffer.array()); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/CreatePACFromReference.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/CreatePACFromReference.java new file mode 100755 index 000000000..561535e29 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/CreatePACFromReference.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment.reference.packing; + +import net.sf.picard.reference.ReferenceSequence; +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFileFactory; + +import java.io.File; +import java.io.IOException; + +/** + * Generate a .PAC file from a given reference. + * + * @author hanna + * @version 0.1 + */ + +public class CreatePACFromReference { + public static void main( String argv[] ) throws IOException { + if( argv.length != 3 ) { + System.out.println("USAGE: CreatePACFromReference .fasta "); + return; + } + + // Read in the first sequence in the input file + String inputFileName = argv[0]; + File inputFile = new File(inputFileName); + ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile); + ReferenceSequence sequence = reference.nextSequence(); + + // Target file for output + PackUtils.writeReferenceSequence( new File(argv[1]), sequence.getBases() ); + + // Reverse the bases in the reference + PackUtils.reverse(sequence.getBases()); + + // Target file for output + PackUtils.writeReferenceSequence( new File(argv[2]), sequence.getBases() ); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/PackUtils.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/PackUtils.java new file mode 100644 index 000000000..972e31cf0 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/PackUtils.java @@ -0,0 +1,135 @@ +package org.broadinstitute.sting.alignment.reference.packing; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteOrder; + +/** + * Utilities designed for packing / unpacking bases. + * + * @author mhanna + * @version 0.1 + */ +public class PackUtils { + /** + * How many possible bases can be encoded? + */ + public static final int ALPHABET_SIZE = 4; + + /** + * How many bits does it take to store a single base? + */ + public static final int BITS_PER_BASE = (int)(Math.log(ALPHABET_SIZE)/Math.log(2)); + + /** + * How many bits fit into a single byte? + */ + public static final int BITS_PER_BYTE = 8; + + /** + * Writes a reference sequence to a PAC file. + * @param outputFile Filename for the PAC file. + * @param referenceSequence Reference sequence to write. + * @throws IOException If there's a problem writing to the output file. + */ + public static void writeReferenceSequence( File outputFile, byte[] referenceSequence ) throws IOException { + OutputStream outputStream = new FileOutputStream(outputFile); + + BasePackedOutputStream basePackedOutputStream = new BasePackedOutputStream(Byte.class, outputStream, ByteOrder.BIG_ENDIAN); + basePackedOutputStream.write(referenceSequence); + + outputStream.write(referenceSequence.length%PackUtils.ALPHABET_SIZE); + + outputStream.close(); + } + + + /** + * How many bits can a given type hold? + * @param type Type to test. + * @return Number of bits that the given type can hold. + */ + public static int bitsInType( Class type ) { + try { + long typeSize = type.getField("MAX_VALUE").getLong(null) - type.getField("MIN_VALUE").getLong(null)+1; + long intTypeSize = (long)Integer.MAX_VALUE - (long)Integer.MIN_VALUE + 1; + if( typeSize > intTypeSize ) + throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName()); + return (int)(Math.log(typeSize)/Math.log(2)); + } + catch( NoSuchFieldException ex ) { + throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName(),ex); + } + catch( IllegalAccessException ex ) { + throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName(),ex); + } + } + + /** + * Gets the two-bit representation of a base. A=00b, C=01b, G=10b, T=11b. + * @param base ASCII value for the base to pack. + * @return A byte from 0-3 indicating the base's packed value. + */ + public static byte packBase(byte base) { + switch( base ) { + case 'A': + return 0; + case 'C': + return 1; + case 'G': + return 2; + case 'T': + return 3; + default: + throw new ReviewedStingException("Unknown base type: " + base); + } + } + + /** + * Converts a two-bit representation of a base into an ASCII representation of a base. + * @param pack Byte from 0-3 indicating which base is represented. + * @return An ASCII value representing the packed base. + */ + public static byte unpackBase(byte pack) { + switch( pack ) { + case 0: + return 'A'; + case 1: + return 'C'; + case 2: + return 'G'; + case 3: + return 'T'; + default: + throw new ReviewedStingException("Unknown pack type: " + pack); + } + } + + /** + * Reverses an unpacked sequence of bases. + * @param bases bases to reverse. + */ + public static void reverse( byte[] bases ) { + for( int i = 0, j = bases.length-1; i < j; i++, j-- ) { + byte temp = bases[j]; + bases[j] = bases[i]; + bases[i] = temp; + } + } + + /** + * Given a structure of size size that should be split + * into partitionSize partitions, how many partitions should + * be created? Size of last partition will be <= partitionSize. + * @param size Total size of the data structure. + * @param partitionSize Size of an individual partition. + * @return Number of partitions that would be created. + */ + public static int numberOfPartitions( long size, long partitionSize ) { + return (int)((size+partitionSize-1) / partitionSize); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedInputStream.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedInputStream.java new file mode 100644 index 000000000..999e54451 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedInputStream.java @@ -0,0 +1,104 @@ +package org.broadinstitute.sting.alignment.reference.packing; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; + +/** + * Read a set of integers packed into + * + * @author mhanna + * @version 0.1 + */ +public class UnsignedIntPackedInputStream { + /** + * Ultimate target for the occurrence array. + */ + private final FileInputStream targetInputStream; + + /** + * Target channel from which to pull file data. + */ + private final FileChannel targetInputChannel; + + /** + * The byte order in which integer input data appears. + */ + private final ByteOrder byteOrder; + + /** + * How many bytes are required to store an integer? + */ + private final int bytesPerInteger = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE; + + /** + * Create a new PackedIntInputStream, writing to the given target file. + * @param inputFile target input file. + * @param byteOrder Endianness to use when writing a list of integers. + * @throws java.io.IOException if an I/O error occurs. + */ + public UnsignedIntPackedInputStream(File inputFile, ByteOrder byteOrder) throws IOException { + this(new FileInputStream(inputFile),byteOrder); + } + + /** + * Read ints from the given InputStream. + * @param inputStream Input stream from which to read ints. + * @param byteOrder Endianness to use when writing a list of integers. + */ + public UnsignedIntPackedInputStream(FileInputStream inputStream, ByteOrder byteOrder) { + this.targetInputStream = inputStream; + this.targetInputChannel = inputStream.getChannel(); + this.byteOrder = byteOrder; + } + + /** + * Read a datum from the input stream. + * @return The next input datum in the stream. + * @throws IOException if an I/O error occurs. + */ + public long read() throws IOException { + long[] data = new long[1]; + read(data); + return data[0]; + } + + /** + * Read the data from the input stream. + * @param data placeholder for input data. + * @throws IOException if an I/O error occurs. + */ + public void read( long[] data ) throws IOException { + read( data, 0, data.length ); + } + + /** + * Read the data from the input stream, starting at the given offset. + * @param data placeholder for input data. + * @param offset place in the array to start reading in data. + * @param length number of ints to read in. + * @throws IOException if an I/O error occurs. + */ + public void read( long[] data, int offset, int length ) throws IOException { + ByteBuffer readBuffer = ByteBuffer.allocate(bytesPerInteger*length).order(byteOrder); + + targetInputChannel.read(readBuffer,targetInputChannel.position()); + readBuffer.flip(); + targetInputChannel.position(targetInputChannel.position()+readBuffer.remaining()); + + int i = 0; + while(i < length) + data[offset+i++] = readBuffer.getInt() & 0xFFFFFFFFL; + } + + /** + * Closes the given output stream. + * @throws IOException if an I/O error occurs. + */ + public void close() throws IOException { + targetInputStream.close(); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedOutputStream.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedOutputStream.java new file mode 100755 index 000000000..b02024366 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedOutputStream.java @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment.reference.packing; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * Writes an list of integers to the output file. + * + * @author mhanna + * @version 0.1 + */ +public class UnsignedIntPackedOutputStream { + /** + * Ultimate target for the occurrence array. + */ + private final OutputStream targetOutputStream; + + /** + * A fixed-size buffer for int-packed data. + */ + private final ByteBuffer buffer; + + /** + * Create a new PackedIntOutputStream, writing to the given target file. + * @param outputFile target output file. + * @param byteOrder Endianness to use when writing a list of integers. + * @throws IOException if an I/O error occurs. + */ + public UnsignedIntPackedOutputStream(File outputFile, ByteOrder byteOrder) throws IOException { + this(new FileOutputStream(outputFile),byteOrder); + } + + /** + * Write packed ints to the given OutputStream. + * @param outputStream Output stream to which to write packed ints. + * @param byteOrder Endianness to use when writing a list of integers. + */ + public UnsignedIntPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) { + this.targetOutputStream = outputStream; + buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder); + } + + /** + * Write the data to the output stream. + * @param datum datum to write. + * @throws IOException if an I/O error occurs. + */ + public void write( long datum ) throws IOException { + buffer.rewind(); + buffer.putInt((int)datum); + targetOutputStream.write(buffer.array()); + } + + /** + * Write the data to the output stream. + * @param data data to write. occurrences.length must match alphabet size. + * @throws IOException if an I/O error occurs. + */ + public void write( long[] data ) throws IOException { + for(long datum: data) + write(datum); + } + + /** + * Write the given chunk of data to the input stream. + * @param data data to write. + * @param offset position at which to start. + * @param length number of ints to write. + * @throws IOException if an I/O error occurs. + */ + public void write( long[] data, int offset, int length ) throws IOException { + for( int i = offset; i < offset+length; i++ ) + write(data[i]); + } + + /** + * Flush the contents of the OutputStream to disk. + * @throws IOException if an I/O error occurs. + */ + public void flush() throws IOException { + targetOutputStream.flush(); + } + + /** + * Closes the given output stream. + * @throws IOException if an I/O error occurs. + */ + public void close() throws IOException { + targetOutputStream.close(); + } + +} diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index f8e298d88..b9e380295 100755 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -25,20 +25,21 @@ package org.broadinstitute.sting.analyzecovariates; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.gatk.walkers.recalibration.*; +import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate; +import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum; +import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.commandline.CommandLineProgram; -import org.broadinstitute.sting.commandline.Argument; +import java.io.*; import java.util.ArrayList; import java.util.Collection; -import java.util.List; import java.util.Map; import java.util.regex.Pattern; -import java.io.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/commandline/Argument.java b/public/java/src/org/broadinstitute/sting/commandline/Argument.java index b2ee9d1fc..33592287d 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/Argument.java +++ b/public/java/src/org/broadinstitute/sting/commandline/Argument.java @@ -25,12 +25,7 @@ package org.broadinstitute.sting.commandline; -import java.lang.annotation.Documented; -import java.lang.annotation.ElementType; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; +import java.lang.annotation.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java index c36a8e04f..b47677b08 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java @@ -27,10 +27,10 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.List; -import java.util.Collections; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; +import java.util.List; /** * A group of argument definitions. diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java index 39e698ca3..9f92df6e0 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java @@ -27,10 +27,10 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.Set; -import java.util.HashSet; import java.util.Collection; +import java.util.HashSet; import java.util.Iterator; +import java.util.Set; /** * A collection of argument definitions. diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 6c50e1784..9c33e084d 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -25,12 +25,12 @@ package org.broadinstitute.sting.commandline; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.walkers.Multiplex; import org.broadinstitute.sting.gatk.walkers.Multiplexer; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.lang.annotation.Annotation; diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index d404a2b6e..aba4fc109 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -25,14 +25,20 @@ package org.broadinstitute.sting.commandline; -import org.apache.log4j.*; +import org.apache.log4j.FileAppender; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.HelpFormatter; import java.io.IOException; -import java.util.*; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.Locale; public abstract class CommandLineProgram { diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java index 99608f167..bd2006388 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java @@ -25,11 +25,17 @@ package org.broadinstitute.sting.commandline; -import org.apache.log4j.*; +import org.apache.log4j.Appender; +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; import java.lang.annotation.Annotation; +import java.util.Collections; +import java.util.Enumeration; +import java.util.LinkedHashMap; +import java.util.Map; /** * Static utility methods for working with command-line arguments. diff --git a/public/java/src/org/broadinstitute/sting/commandline/MissingArgumentValueException.java b/public/java/src/org/broadinstitute/sting/commandline/MissingArgumentValueException.java index 8029db7b3..4e6c3a16f 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/MissingArgumentValueException.java +++ b/public/java/src/org/broadinstitute/sting/commandline/MissingArgumentValueException.java @@ -26,8 +26,6 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.utils.Utils; -import java.util.Collection; - /** * Specifies that a value was missing when attempting to populate an argument. */ diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index 717c5c522..8423bb2f2 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -25,16 +25,16 @@ package org.broadinstitute.sting.commandline; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.HelpFormatter; -import org.apache.log4j.Logger; -import java.lang.reflect.*; +import java.lang.reflect.Field; import java.util.*; /** diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java index d1cda3ed9..a070cb5a1 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java @@ -27,10 +27,8 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.utils.Utils; -import java.util.regex.Pattern; import java.util.regex.Matcher; -import java.util.List; -import java.util.ArrayList; +import java.util.regex.Pattern; /** * Holds a pattern, along with how to get to the argument definitions that could match that pattern. diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index fd7e749c3..a080ab439 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -25,30 +25,21 @@ package org.broadinstitute.sting.gatk; -import org.broadinstitute.sting.commandline.Tags; -import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; -import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; -import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; -import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.Walker; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.*; - -import net.sf.picard.filter.SamRecordFilter; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.ListFileUtils; -import org.broadinstitute.sting.utils.text.XReadLines; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; /** * @author aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 7982f61e2..da2be74bf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -26,13 +26,15 @@ package org.broadinstitute.sting.gatk; import org.broad.tribble.TribbleException; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.walkers.Attribution; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.text.TextFormattingUtils; -import org.broadinstitute.sting.utils.help.ApplicationDetails; -import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.ApplicationDetails; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index f8527c33b..918bc1251 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -28,24 +28,14 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.*; import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.ArgumentException; -import org.broadinstitute.sting.commandline.ArgumentSource; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.commandline.CommandLineUtils; -import org.broadinstitute.sting.commandline.ParsingEngine; -import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.datasources.reads.*; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; -import org.broadinstitute.sting.gatk.datasources.reads.MonolithicShardStrategy; -import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; -import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory; -import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.gatk.executive.MicroScheduler; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadFilter; @@ -65,6 +55,7 @@ import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java index 27a86ab3f..7cb615f7f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java @@ -25,13 +25,12 @@ package org.broadinstitute.sting.gatk; import net.sf.picard.filter.SamRecordFilter; - -import java.util.Map; -import java.util.HashMap; -import java.util.Collections; - import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + /** * Holds a bunch of basic information about the traversal. */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java index 1a361029a..93fa2d146 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java +++ b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk; -import net.sf.picard.filter.SamRecordFilter; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileReader; diff --git a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index 9553f651e..cf190835e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -25,19 +25,18 @@ package org.broadinstitute.sting.gatk; -import net.sf.picard.filter.SamRecordFilter; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.text.TextFormattingUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.help.DescriptionTaglet; import org.broadinstitute.sting.utils.help.DisplayNameTaglet; import org.broadinstitute.sting.utils.help.SummaryTaglet; -import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 93638f21d..ee2e85025 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -26,17 +26,16 @@ package org.broadinstitute.sting.gatk.arguments; import net.sf.samtools.SAMFileReader; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; -import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.DownsamplingMethod; -import org.broadinstitute.sting.utils.interval.IntervalSetRule; +import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.simpleframework.xml.*; import org.simpleframework.xml.core.Persister; import org.simpleframework.xml.stream.Format; diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java index 337c2664c..17e4a0743 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java @@ -26,13 +26,13 @@ package org.broadinstitute.sting.gatk.contexts; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import java.util.*; +import java.util.List; /** * Useful class for forwarding on locusContext data from this iterator diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java index 6a0d30837..1f9a7d705 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java @@ -27,8 +27,8 @@ package org.broadinstitute.sting.gatk.contexts; import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java index 760b3a7bc..376064cdb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java @@ -25,11 +25,12 @@ package org.broadinstitute.sting.gatk.contexts; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.BaseUtils; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.samtools.util.StringUtil; -import com.google.java.contract.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; /** * The section of the reference that overlaps with the given diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java index b36c59a2c..e92599494 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java @@ -1,16 +1,14 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; + +import java.util.Collections; import java.util.List; import java.util.NoSuchElementException; -import java.util.ArrayList; -import java.util.Collections; - -import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.utils.GenomeLoc; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; /** * User: hanna * Date: May 13, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java index 330a9e4f7..ff312bcac 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java @@ -1,13 +1,12 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; +import net.sf.picard.reference.ReferenceSequence; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Window; -import org.broadinstitute.sting.gatk.walkers.Reference; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import net.sf.picard.reference.ReferenceSequence; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; /* * Copyright (c) 2009 The Broad Institute * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java index 72b962522..55304da34 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java @@ -1,16 +1,15 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.iterators.LocusIterator; -import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Collection; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.GenomeLocParser; - /** * Presents data sharded by locus to the traversal engine. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index b467a2ab5..f9ed0cb74 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.DownsampleType; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java index a91e169c1..223659a46 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java @@ -5,10 +5,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.utils.GenomeLoc; -import java.util.List; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.List; /** * User: hanna * Date: May 21, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java index d2c097f5d..3d62faf49 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java @@ -1,9 +1,8 @@ package org.broadinstitute.sting.gatk.datasources.providers; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.GenomeLoc; /* * Copyright (c) 2009 The Broad Institute * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java index 5a672b09f..7843e7518 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java @@ -1,14 +1,13 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Collection; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.GenomeLocParser; - /** * Present data sharded by read to a traversal engine. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java index 0c4b78a7c..e809092d4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import java.util.Collection; import java.util.Arrays; - -import net.sf.samtools.SAMRecord; +import java.util.Collection; /** * User: hanna * Date: May 22, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java index d6c938f36..efb92235f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.utils.*; - -import java.util.Collections; -import java.util.Collection; -import java.util.Arrays; - -import net.sf.samtools.SAMSequenceRecord; -import net.sf.samtools.SAMRecord; -import net.sf.picard.reference.ReferenceSequence; import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.picard.reference.ReferenceSequence; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; /** * User: hanna * Date: May 22, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index feed2ab85..39c632539 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java index a0ea32f9b..803bd885b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java @@ -1,15 +1,14 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; -import java.util.List; import java.util.Collection; - -import net.sf.picard.reference.IndexedFastaSequenceFile; +import java.util.List; /** * User: hanna * Date: May 8, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMBlockStartIterator.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMBlockStartIterator.java index a9e04e357..de938e845 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMBlockStartIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMBlockStartIterator.java @@ -24,9 +24,6 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; -import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.utils.exceptions.StingException; import java.io.File; @@ -35,9 +32,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; -import java.util.ArrayList; import java.util.Iterator; -import java.util.List; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java index 34693d501..521bcd5a3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java @@ -39,12 +39,7 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; +import java.util.*; /** * Writes schedules for a single BAM file to a target output file. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java index 266232c0f..467aebac5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java @@ -26,20 +26,10 @@ package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.picard.util.PeekableIterator; import net.sf.samtools.GATKBAMFileSpan; -import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import java.io.File; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; +import java.util.*; /** * Assign intervals to the most appropriate blocks, keeping as little as possible in memory at once. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java index c014c1995..e4141f61c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java @@ -29,19 +29,11 @@ import net.sf.samtools.GATKBAMFileSpan; import net.sf.samtools.SAMFileSpan; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; +import java.util.*; /** * Represents a small section of a BAM file, and every associated interval. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java index 8ebb8b1a8..5d0c38b78 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java @@ -23,24 +23,18 @@ */ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.Bin; - -import net.sf.samtools.GATKBAMFileSpan; -import net.sf.samtools.GATKBin; -import net.sf.samtools.GATKChunk; -import net.sf.samtools.LinearIndex; -import net.sf.samtools.SAMException; -import net.sf.samtools.util.RuntimeIOException; +import net.sf.samtools.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.lang.ref.SoftReference; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; /** * A basic interface for querying BAM indices. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexData.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexData.java index f9b998a60..daf1b77e3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexData.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexData.java @@ -24,11 +24,7 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.Bin; -import net.sf.samtools.GATKBAMFileSpan; -import net.sf.samtools.GATKBin; -import net.sf.samtools.GATKChunk; -import net.sf.samtools.LinearIndex; +import net.sf.samtools.*; import java.util.ArrayList; import java.util.Collections; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java index fc3f76ab7..4ddf28dce 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java @@ -35,16 +35,7 @@ import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.PriorityQueue; -import java.util.Queue; +import java.util.*; /** * Shard intervals based on position within the BAM file. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java index 26af890b4..19d33aa6b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.datasources.reads; +import net.sf.samtools.SAMFileSpan; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; @@ -7,8 +8,6 @@ import org.broadinstitute.sting.utils.Utils; import java.util.List; import java.util.Map; -import net.sf.samtools.SAMFileSpan; - /** * Handles locus shards of BAM information. * @author aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java index 950d67428..a5ca07853 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java @@ -24,17 +24,18 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.GATKBAMFileSpan; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileSpan; +import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import java.util.*; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMSequenceRecord; -import net.sf.samtools.SAMFileSpan; -import net.sf.picard.reference.IndexedFastaSequenceFile; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; /** * A sharding strategy for loci based on reading of the index. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java index 7579c22f6..278eeb898 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.datasources.reads; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java index 10228ecd7..28b737f28 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java @@ -4,8 +4,8 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Iterator; -import java.util.NoSuchElementException; import java.util.List; +import java.util.NoSuchElementException; /** * Create a giant shard representing all the data in the input BAM(s). diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java index 9aecd7779..4d9c9092d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java @@ -1,15 +1,17 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter; - -import java.util.*; - import net.sf.samtools.SAMFileSpan; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; + /** * * User: aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java index da70a615b..c2235ec73 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java @@ -25,12 +25,11 @@ package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.samtools.SAMFileSpan; - -import java.util.*; - import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import java.util.*; + /** * The sharding strategy for reads using a simple counting mechanism. Each read shard * has a specific number of reads (default to 10K) which is configured in the constructor. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index c2aa5f18e..6064806f3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -24,21 +24,19 @@ package org.broadinstitute.sting.gatk.datasources.reads; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.picard.sam.MergingSamRecordIterator; +import net.sf.picard.sam.SamFileHeaderMerger; import net.sf.samtools.*; import net.sf.samtools.util.CloseableIterator; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.picard.sam.SamFileHeaderMerger; -import net.sf.picard.sam.MergingSamRecordIterator; -import net.sf.picard.reference.IndexedFastaSequenceFile; - import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.DownsamplingMethod; -import org.broadinstitute.sting.gatk.filters.ReadFilter; -import org.broadinstitute.sting.gatk.iterators.*; -import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.ReadMetrics; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator; +import org.broadinstitute.sting.gatk.filters.ReadFilter; +import org.broadinstitute.sting.gatk.iterators.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.baq.BAQ; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java index 418f5d3ee..f8d941784 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java @@ -2,17 +2,13 @@ package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.samtools.SAMFileSpan; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.HasGenomeLocation; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.io.Serializable; import java.util.Collections; import java.util.List; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java index fa733ce12..780b41ef7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.SAMSequenceDictionary; import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.SAMSequenceDictionary; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; /** * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java index ae0f1cf43..07c13a76e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java @@ -25,17 +25,17 @@ package org.broadinstitute.sting.gatk.datasources.reads.utilities; -import org.broadinstitute.sting.commandline.CommandLineProgram; +import net.sf.samtools.BAMIndex; +import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.instrumentation.Sizeof; import java.io.File; import java.lang.reflect.Field; -import java.util.Map; import java.util.List; - -import net.sf.samtools.*; +import java.util.Map; /** * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java index f03e2a44f..54de04379 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java @@ -24,12 +24,12 @@ package org.broadinstitute.sting.gatk.datasources.reads.utilities; -import org.broadinstitute.sting.commandline.CommandLineProgram; -import org.broadinstitute.sting.commandline.Argument; -import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileWriterFactory; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.CommandLineProgram; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/PrintBGZFBounds.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/PrintBGZFBounds.java index df7dccaa9..773541d11 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/PrintBGZFBounds.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/PrintBGZFBounds.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.datasources.reads.utilities; -import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.CommandLineProgram; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java index c2d64ddd8..ef69a8e5f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java @@ -25,16 +25,15 @@ package org.broadinstitute.sting.gatk.datasources.reference; -import net.sf.picard.reference.ReferenceSequenceFileFactory; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import net.sf.picard.reference.FastaSequenceIndexBuilder; -import net.sf.picard.sam.CreateSequenceDictionary; -import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.FastaSequenceIndex; +import net.sf.picard.reference.FastaSequenceIndexBuilder; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.picard.sam.CreateSequenceDictionary; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.file.FSLockWithShared; import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException; -import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ResourcePool.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ResourcePool.java index 934f4f997..21f58d480 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ResourcePool.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ResourcePool.java @@ -28,11 +28,7 @@ import net.sf.samtools.SAMSequenceDictionary; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; +import java.util.*; /** * A pool of open resources, all of which can create a closeable iterator. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java index 0a5981a1b..067bf3f72 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java @@ -3,11 +3,10 @@ package org.broadinstitute.sting.gatk.datasources.sample; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.yaml.snakeyaml.TypeDescription; import org.yaml.snakeyaml.Yaml; import org.yaml.snakeyaml.constructor.Constructor; diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java b/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java index a1419ba70..5dbd90405 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java @@ -1,15 +1,20 @@ package org.broadinstitute.sting.gatk.examples; -import net.sf.samtools.*; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.contexts.*; -import org.broadinstitute.sting.utils.pileup.*; +import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import java.util.*; import java.io.PrintStream; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; /** * Computes the coverage per sample. diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java index b96d0ffbf..26205a203 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.examples; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -32,8 +34,6 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidSNPGenotypePriors; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java b/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java index 3e335733d..b23782563 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java @@ -25,18 +25,18 @@ package org.broadinstitute.sting.gatk.executive; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; -import java.util.List; import java.util.Iterator; +import java.util.List; /** * Manages the */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index 8a27e008c..59fb4aa9e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -1,27 +1,27 @@ package org.broadinstitute.sting.gatk.executive; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.io.*; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.io.OutputTracker; +import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor; -import java.util.Queue; -import java.util.LinkedList; import java.util.Collection; -import java.util.concurrent.Executors; +import java.util.LinkedList; +import java.util.Queue; import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; -import net.sf.picard.reference.IndexedFastaSequenceFile; - /** * A microscheduler that schedules shards according to a tree-like structure. * Requires a special walker tagged with a 'TreeReducible' interface. diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 4cb571c45..9466fdf75 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -1,22 +1,21 @@ package org.broadinstitute.sting.gatk.executive; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.io.DirectOutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.Walker; import java.util.Collection; -import net.sf.picard.reference.IndexedFastaSequenceFile; - /** A micro-scheduling manager for single-threaded execution of a traversal. */ public class LinearMicroScheduler extends MicroScheduler { diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index cfe0f8187..23e5769f1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -25,26 +25,18 @@ package org.broadinstitute.sting.gatk.executive; +import net.sf.picard.reference.IndexedFastaSequenceFile; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.io.OutputTracker; +import org.broadinstitute.sting.gatk.iterators.NullSAMIterator; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.traversals.*; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.io.OutputTracker; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.iterators.NullSAMIterator; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.ReadMetrics; - -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.PrintStream; -import java.lang.management.ManagementFactory; -import java.util.*; - -import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.threading.*; @@ -52,6 +44,11 @@ import org.broadinstitute.sting.utils.threading.*; import javax.management.JMException; import javax.management.MBeanServer; import javax.management.ObjectName; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.PrintStream; +import java.lang.management.ManagementFactory; +import java.util.Collection; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/OutputMergeTask.java b/public/java/src/org/broadinstitute/sting/gatk/executive/OutputMergeTask.java index 76e0c1c8a..7be37a616 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/OutputMergeTask.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/OutputMergeTask.java @@ -1,10 +1,9 @@ package org.broadinstitute.sting.gatk.executive; import org.broadinstitute.sting.gatk.io.storage.Storage; -import org.broadinstitute.sting.gatk.io.OutputTracker; -import java.util.Collection; import java.util.ArrayList; +import java.util.Collection; /** * User: hanna diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/ReduceTree.java b/public/java/src/org/broadinstitute/sting/gatk/executive/ReduceTree.java index 151a1ba26..7aac70b47 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/ReduceTree.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ReduceTree.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.executive; -import java.util.Queue; -import java.util.List; import java.util.ArrayList; import java.util.LinkedList; +import java.util.List; +import java.util.Queue; import java.util.concurrent.Future; /** * User: hanna diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java index b78a4edc9..6136bd68d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.executive; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker; -import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.concurrent.Callable; diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java index 8fb4adb5d..d36a3b576 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java @@ -4,8 +4,8 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.concurrent.Callable; -import java.util.concurrent.Future; import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; /** * User: hanna * Date: Apr 29, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index 5c341bb02..cfbce58ee 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -1,17 +1,20 @@ package org.broadinstitute.sting.gatk.executive; -import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.iterators.*; +import net.sf.picard.util.PeekableIterator; import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; - -import java.util.*; - -import net.sf.picard.util.PeekableIterator; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; +import org.broadinstitute.sting.gatk.iterators.LocusIterator; +import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + /** * Buffer shards of data which may or may not contain multiple loci into * iterators of all data which cover an interval. Its existence is an homage diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java index b8a3ee977..0987c5d74 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java @@ -24,8 +24,10 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.*; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; /** * Filter out reads with wonky cigar strings. diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/BadMateFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/BadMateFilter.java index 3b988c8fb..8596e18eb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/BadMateFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/BadMateFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java b/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java index 84390c173..03fc2063b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java @@ -23,17 +23,16 @@ */ package org.broadinstitute.sting.gatk.filters; -import net.sf.samtools.util.CloserUtil; - -import java.util.Iterator; -import java.util.NoSuchElementException; -import java.util.Collection; - +import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; -import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.util.CloserUtil; import org.broadinstitute.sting.gatk.ReadMetrics; +import java.util.Collection; +import java.util.Iterator; +import java.util.NoSuchElementException; + /** * Filtering Iterator which takes a filter and an iterator and iterates * through only those records which are not rejected by the filter. diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java index fb3c38582..589910fc7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java index b7806524a..cd77a9e7e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java b/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java index cda08fb66..67f82235d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java @@ -25,11 +25,8 @@ package org.broadinstitute.sting.gatk.filters; -import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.classloader.PluginManager; -import net.sf.picard.filter.SamRecordFilter; - import java.util.Collection; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java index 4deeb09ee..74deace9a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java @@ -24,8 +24,8 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java index bcd473b15..75369b306 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java new file mode 100644 index 000000000..1afec36d1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2009 The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.filters; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.QualityUtils; + +/** + * Filter out mapping quality zero reads. + * + * @author ebanks + * @version 0.1 + */ + +public class MappingQualityUnavailableReadFilter extends ReadFilter { + public boolean filterOut(SAMRecord rec) { + return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE); + } +} + diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java similarity index 90% rename from public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java rename to public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java index 7e6fc5e82..e49d4117c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java @@ -24,17 +24,16 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** - * Filter out zero mapping quality reads. + * Filter out mapping quality zero reads. * * @author hanna * @version 0.1 */ -public class ZeroMappingQualityReadFilter extends ReadFilter { +public class MappingQualityZeroReadFilter extends ReadFilter { public boolean filterOut(SAMRecord rec) { return (rec.getMappingQuality() == 0); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java index 584783d34..7bcee033f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java index 8a2fd5af3..cd31da61a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MissingReadGroupFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MissingReadGroupFilter.java index d5d40ec38..490a55040 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MissingReadGroupFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MissingReadGroupFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java index fc21538e8..29738e499 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java index cfbba0383..31c2144ce 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java index dcddebd55..8ad91ac1c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java @@ -25,9 +25,7 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; - import org.broadinstitute.sting.utils.sam.ReadUtils; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java index 691fd95f0..30b2f828d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java @@ -25,11 +25,9 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; - -import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.utils.sam.ReadUtils; /** * Filter out PL matching reads. diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java index 4ef88402d..81044b888 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java @@ -1,14 +1,12 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; - -import java.util.Set; -import java.util.HashSet; - +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.util.HashSet; +import java.util.Set; + /** * Created by IntelliJ IDEA. * User: asivache diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java index 1d74ee20e..227637761 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilter.java index 69ebc8b04..0e5e8800c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilter.java @@ -25,17 +25,16 @@ package org.broadinstitute.sting.gatk.filters; -import java.util.*; -import java.util.Map.Entry; -import java.io.File; -import java.io.FileNotFoundException; - -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; +import java.io.File; +import java.io.FileNotFoundException; +import java.util.*; +import java.util.Map.Entry; + /** * Removes records matching the read group tag and exact match string. * For example, this filter value: diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadStrandFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadStrandFilter.java index 1b2a77f45..16eeed3cc 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadStrandFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadStrandFilter.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java index 682b22b1f..99d6bc154 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java @@ -25,9 +25,8 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java index 05b472036..2f93cbcae 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java @@ -25,9 +25,8 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; -import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java index ac4f4853a..e7ee345d2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/DirectOutputTracker.java b/public/java/src/org/broadinstitute/sting/gatk/io/DirectOutputTracker.java index 865528688..658a28fbd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/DirectOutputTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/DirectOutputTracker.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.gatk.io; -import org.broadinstitute.sting.gatk.io.stubs.Stub; -import org.broadinstitute.sting.gatk.io.storage.StorageFactory; import org.broadinstitute.sting.gatk.io.storage.Storage; +import org.broadinstitute.sting.gatk.io.storage.StorageFactory; +import org.broadinstitute.sting.gatk.io.stubs.Stub; /** * Maps creation of storage directly to output streams in parent. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java b/public/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java index b68013aa4..f39ba2d8d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java @@ -26,20 +26,20 @@ package org.broadinstitute.sting.gatk.io; import net.sf.samtools.SAMFileReader; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.commandline.ArgumentSource; -import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; -import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.io.storage.Storage; +import org.broadinstitute.sting.gatk.io.storage.StorageFactory; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub; import org.broadinstitute.sting.gatk.io.stubs.Stub; -import org.broadinstitute.sting.gatk.io.storage.StorageFactory; -import org.broadinstitute.sting.gatk.io.storage.Storage; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; -import java.io.*; +import java.io.OutputStream; import java.lang.reflect.Field; -import java.util.Map; import java.util.HashMap; +import java.util.Map; /** * Manages the output and err streams that are created specifically for walker diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/StingSAMFileWriter.java b/public/java/src/org/broadinstitute/sting/gatk/io/StingSAMFileWriter.java index 8701ecf3c..a9a74925d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/StingSAMFileWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/StingSAMFileWriter.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.io; -import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileWriter; /** * A writer that will allow unsorted BAM files to be written diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java index 36960246a..999deddd1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java @@ -25,15 +25,16 @@ package org.broadinstitute.sting.gatk.io; -import org.broadinstitute.sting.gatk.io.stubs.Stub; -import org.broadinstitute.sting.gatk.io.storage.StorageFactory; -import org.broadinstitute.sting.gatk.io.storage.Storage; import org.broadinstitute.sting.gatk.executive.OutputMergeTask; +import org.broadinstitute.sting.gatk.io.storage.Storage; +import org.broadinstitute.sting.gatk.io.storage.StorageFactory; +import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.*; import java.io.File; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; /** * An output tracker that can either track its output per-thread or directly, diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/OutputStreamStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/OutputStreamStorage.java index 4dc976289..56c9c0465 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/OutputStreamStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/OutputStreamStorage.java @@ -25,14 +25,14 @@ package org.broadinstitute.sting.gatk.io.storage; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; +import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.channels.WritableByteChannel; -import java.nio.channels.Channels; public class OutputStreamStorage extends OutputStream implements Storage { /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java index 610db1d76..cb8786be1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java @@ -27,17 +27,16 @@ package org.broadinstitute.sting.gatk.io.storage; import net.sf.samtools.*; import net.sf.samtools.util.CloseableIterator; - -import java.io.*; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; - import net.sf.samtools.util.RuntimeIOException; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.SimplifyingSAMFileWriter; +import java.io.File; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; + /** * Provides temporary storage for SAMFileWriters. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java index ee5c56524..66907dd6b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.gatk.io.storage; -import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub; +import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java index 74176ec35..1da03e9c2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java @@ -1,16 +1,21 @@ package org.broadinstitute.sting.gatk.io.storage; +import net.sf.samtools.util.BlockCompressedOutputStream; import org.apache.log4j.Logger; import org.broad.tribble.source.BasicFeatureSource; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; - -import java.io.*; - -import net.sf.samtools.util.BlockCompressedOutputStream; +import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter; +import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; /** * Provides temporary and permanent storage for genotypes in VCF format. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java index 00e78f391..8bc97c886 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java @@ -30,8 +30,8 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.io.OutputStream; import java.io.File; +import java.io.OutputStream; import java.lang.reflect.Constructor; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamStub.java index 5cf84c5a2..27bcb8a1c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamStub.java @@ -27,9 +27,9 @@ package org.broadinstitute.sting.gatk.io.stubs; import org.broadinstitute.sting.gatk.io.OutputTracker; -import java.io.OutputStream; -import java.io.IOException; import java.io.File; +import java.io.IOException; +import java.io.OutputStream; /** * A stub for routing and management of anything backed by an OutputStream. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java index d847015ed..f124c2302 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java @@ -24,14 +24,14 @@ package org.broadinstitute.sting.gatk.io.stubs; -import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; -import org.broadinstitute.sting.commandline.ArgumentSource; +import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.commandline.ArgumentMatches; +import org.broadinstitute.sting.commandline.ArgumentSource; +import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; import org.broadinstitute.sting.commandline.ParsingEngine; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import net.sf.samtools.SAMFileReader; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index a9a272220..38640eda0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -25,18 +25,17 @@ package org.broadinstitute.sting.gatk.io.stubs; +import net.sf.samtools.SAMFileWriter; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; -import net.sf.samtools.SAMFileWriter; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import java.lang.annotation.Annotation; -import java.util.List; -import java.util.Arrays; import java.io.File; import java.io.OutputStream; +import java.lang.annotation.Annotation; +import java.util.Arrays; +import java.util.List; /** * Insert a SAMFileWriterStub instead of a full-fledged concrete OutputStream implementations. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java index f5c1e0efc..d8e59a3dd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java @@ -25,20 +25,19 @@ package org.broadinstitute.sting.gatk.io.stubs; +import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileHeader; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.io.OutputTracker; +import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; import java.io.OutputStream; -import org.broadinstitute.sting.gatk.io.OutputTracker; -import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.baq.BAQ; - /** * A stub for routing and management of SAM file reading and writing. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index 7521e754d..615841f02 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -25,14 +25,17 @@ package org.broadinstitute.sting.gatk.io.stubs; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.OutputStream; -import java.util.*; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; /** * Injects new command-line arguments into the system providing support for the genotype writer. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java index 1a79d2785..bb84f9457 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java @@ -25,19 +25,19 @@ package org.broadinstitute.sting.gatk.io.stubs; -import java.io.File; -import java.io.PrintStream; -import java.io.OutputStream; -import java.util.Collection; - -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.Collection; /** * A stub for routing and management of genotype reading and writing. diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java index b5643f834..478675f9d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java @@ -1,13 +1,11 @@ package org.broadinstitute.sting.gatk.iterators; +import net.sf.picard.sam.MergingSamRecordIterator; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; -import net.sf.picard.sam.MergingSamRecordIterator; import java.util.Iterator; -import org.broadinstitute.sting.gatk.ReadProperties; - /* * Copyright (c) 2009 The Broad Institute * diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/BufferingReadIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/BufferingReadIterator.java index f3a060be1..7eaf4be41 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/BufferingReadIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/BufferingReadIterator.java @@ -26,12 +26,11 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.Queue; import java.util.LinkedList; import java.util.NoSuchElementException; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import java.util.Queue; /** * Buffers access to a large stream of reads, replenishing the buffer only when the reads diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java index 1342f11fd..835748ff0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import java.util.Iterator; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; - public class DownsampleIterator implements StingSAMIterator { diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java index aa376a12a..240564d34 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java @@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk.iterators; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import java.util.NoSuchElementException; import java.util.Iterator; +import java.util.NoSuchElementException; /** * User: hanna * Date: May 12, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java index 30c1cf512..e177984ca 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.util.CloseableIterator; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import java.util.Iterator; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; - /** * Iterator that traverses a SAM File, accumulating information on a per-locus basis */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 4e58813f5..e13c5a764 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -25,19 +25,27 @@ package org.broadinstitute.sting.gatk.iterators; -import net.sf.samtools.*; import net.sf.picard.util.PeekableIterator; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.DownsamplingMethod; import org.broadinstitute.sting.gatk.DownsampleType; +import org.broadinstitute.sting.gatk.DownsamplingMethod; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.ReservoirDownsampler; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.pileup.*; +import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileupImpl; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java index ff458467f..21b71c9e6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.iterators; -import org.broadinstitute.sting.gatk.ReadProperties; import net.sf.samtools.SAMRecord; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java index c2d3976ea..cc499b247 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.iterators; -import org.broadinstitute.sting.gatk.ReadProperties; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java index 239392eec..2f30d12a8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java @@ -1,13 +1,8 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMTag; -import net.sf.samtools.SAMReadGroupRecord; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.apache.log4j.Logger; - -import java.util.List; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** * An iterator which does post-processing of a read, including potentially wrapping diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java index d294993d4..1b248d097 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.gatk.ReadProperties; /** * * User: aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index a51ca9292..69c0b3e0a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -26,19 +26,16 @@ package org.broadinstitute.sting.gatk.phonehome; import org.apache.log4j.Level; import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.CommandLineUtils; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.jets3t.service.S3Service; import org.jets3t.service.S3ServiceException; import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Bucket; import org.jets3t.service.model.S3Object; import org.jets3t.service.security.AWSCredentials; import org.simpleframework.xml.Element; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java index fa0c323b5..ce924fd87 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java @@ -25,17 +25,17 @@ package org.broadinstitute.sting.gatk.refdata; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.gatk.iterators.PushbackIterator; +import java.io.File; +import java.io.FileNotFoundException; +import java.lang.reflect.Constructor; import java.util.Iterator; import java.util.regex.Pattern; -import java.io.FileNotFoundException; -import java.io.File; -import java.lang.reflect.Constructor; /** * This is a low-level iterator designed to provide system-wide generic support for reading record-oriented data diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java index cff97e4ee..59b273d38 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java @@ -2,9 +2,9 @@ package org.broadinstitute.sting.gatk.refdata; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 43bf6f8e0..d03b122e2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.refdata; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java index b3cb22a03..b7437e6e8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java @@ -6,9 +6,9 @@ import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index c7c0468e7..1d622e2c7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -3,17 +3,13 @@ package org.broadinstitute.sting.gatk.refdata; import org.broad.tribble.Feature; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.gelitext.GeliTextFeature; -import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.MutableGenotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.*; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java index 59cd14a22..6bba754be 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java @@ -25,12 +25,6 @@ package org.broadinstitute.sting.gatk.refdata.features.annotator; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.StringTokenizer; - import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broad.tribble.exception.CodecLineParsingException; @@ -41,6 +35,12 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.StringTokenizer; + public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec { private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class); diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java index d97e378fb..d12badd28 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java @@ -25,13 +25,13 @@ package org.broadinstitute.sting.gatk.refdata.features.annotator; +import org.broad.tribble.Feature; + import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Map; -import org.broad.tribble.Feature; - /** * This class represents a single record in an AnnotatorInputTable. */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java index 7f97451cf..5e536d4c1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java @@ -26,19 +26,19 @@ package org.broadinstitute.sting.gatk.refdata.features.beagle; import org.broad.tribble.Feature; +import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; -import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.broad.tribble.exception.CodecLineParsingException; -import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; public class BeagleCodec implements ReferenceDependentFeatureCodec { private String[] header; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java index c7bf86569..e6832754d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java @@ -25,14 +25,11 @@ package org.broadinstitute.sting.gatk.refdata.features.beagle; import org.broad.tribble.Feature; - +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.ArrayList; import java.util.Map; -import net.sf.samtools.util.StringUtil; -import org.broadinstitute.sting.utils.variantcontext.Allele; - public class BeagleFeature implements Feature { private String chr; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java index 4648efd1e..d12114f9a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java @@ -4,9 +4,8 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.gatk.refdata.Transcript; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java index 00b7c45d9..43e2c3ff5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java @@ -25,15 +25,15 @@ package org.broadinstitute.sting.gatk.refdata.features.sampileup; -import org.broad.tribble.FeatureCodec; import org.broad.tribble.Feature; +import org.broad.tribble.FeatureCodec; import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.ParsingUtils; import java.util.ArrayList; -import java.util.regex.Pattern; import java.util.regex.Matcher; +import java.util.regex.Pattern; import static org.broadinstitute.sting.gatk.refdata.features.sampileup.SAMPileupFeature.VariantType; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java index a794c2704..378f26934 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java @@ -25,12 +25,11 @@ package org.broadinstitute.sting.gatk.refdata.features.sampileup; +import net.sf.samtools.util.StringUtil; import org.broad.tribble.Feature; import java.util.List; -import net.sf.samtools.util.StringUtil; - /** * A tribble feature representing a SAM pileup. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java index 15f559d46..039b8adde 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java @@ -24,14 +24,14 @@ package org.broadinstitute.sting.gatk.refdata.features.samread; +import net.sf.samtools.Cigar; +import net.sf.samtools.TextCigarCodec; +import net.sf.samtools.util.StringUtil; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.ParsingUtils; -import net.sf.samtools.util.StringUtil; -import net.sf.samtools.TextCigarCodec; -import net.sf.samtools.Cigar; /** * Decodes a simple SAM text string. diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java index b831606a3..745ccdd9f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java @@ -1,13 +1,8 @@ package org.broadinstitute.sting.gatk.refdata.features.table; import org.broad.tribble.Feature; -import org.broad.tribble.readers.LineReader; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.UserException; -import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java old mode 100644 new mode 100755 index 6f0a712bf..ab1ac59d8 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java @@ -1,16 +1,14 @@ package org.broadinstitute.sting.gatk.refdata.features.table; import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodec; import org.broad.tribble.readers.LineReader; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.interval.IntervalUtils; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; /** * implementation of a simple table (tab or comma delimited format) input files diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java old mode 100644 new mode 100755 index 6ff0384a0..ca73ee960 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.refdata.features.table; import org.broad.tribble.Feature; import org.broadinstitute.sting.utils.GenomeLoc; -import java.util.*; +import java.util.List; /** * A feature representing a single row out of a text table @@ -55,10 +55,14 @@ public class TableFeature implements Feature { } public List getAllValues() { - return getValuesTo(values.size()-1); + return getValuesTo(values.size()); } public List getValuesTo(int columnPosition) { return values.subList(0,columnPosition); } + + public List getHeader() { + return keys; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java index 5bb65f9a2..085d6b5b3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java @@ -10,7 +10,6 @@ import org.broad.tribble.util.LittleEndianOutputStream; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java index 19050ae11..731df997d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java @@ -28,7 +28,6 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.utils.GenomeLoc; import java.io.IOException; -import java.util.Iterator; /** * @author aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java index 3b9f8243f..ba1ca674e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java @@ -35,8 +35,6 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; import java.io.IOException; -import java.lang.reflect.Type; -import java.util.Iterator; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java index c2057ad5e..19c91be1b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java @@ -42,15 +42,17 @@ import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SequenceDictionaryUtils; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.file.FSLockWithShared; import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException; import org.broadinstitute.sting.utils.instrumentation.Sizeof; -import java.io.*; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java index 462bf98df..104ba87b5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java @@ -28,8 +28,6 @@ import org.broad.tribble.Feature; import org.broad.tribble.iterators.CloseableTribbleIterator; import org.broadinstitute.sting.utils.GenomeLocParser; -import java.util.Iterator; - /** * diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java index 1553402a5..59e8471a3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java @@ -28,8 +28,6 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.HasGenomeLocation; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java index 83aa5f056..96086598a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java @@ -2,12 +2,8 @@ package org.broadinstitute.sting.gatk.refdata.utils; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.utils.GenomeLoc; -import java.util.Iterator; -import java.util.List; - /** * @author aaron *

diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java index 101784d97..fc7f7c58f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java @@ -25,10 +25,10 @@ package org.broadinstitute.sting.gatk.refdata.utils; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.bed.BedParser; -import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java index 75e7c1a32..3201769e0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java @@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk.refdata.utils.helpers; import net.sf.samtools.util.SequenceUtil; import org.broad.tribble.annotation.Strand; import org.broad.tribble.dbsnp.DbSNPFeature; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index f4c565318..59d496828 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -100,7 +100,11 @@ public class GATKReport { * @param tableDescription the description of the table */ public void addTable(String tableName, String tableDescription) { - GATKReportTable table = new GATKReportTable(tableName, tableDescription); + addTable(tableName, tableDescription, true); + } + + public void addTable(String tableName, String tableDescription, boolean sortByPrimaryKey) { + GATKReportTable table = new GATKReportTable(tableName, tableDescription, sortByPrimaryKey); tables.put(tableName, table); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 0e503f92a..f7ea25696 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -3,9 +3,7 @@ package org.broadinstitute.sting.gatk.report; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.PrintStream; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.TreeSet; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -96,8 +94,9 @@ public class GATKReportTable { private String tableDescription; private String primaryKeyName; - private TreeSet primaryKeyColumn; + private Collection primaryKeyColumn; private boolean primaryKeyDisplay; + boolean sortByPrimaryKey = true; private LinkedHashMap columns; @@ -121,12 +120,17 @@ public class GATKReportTable { * @param tableDescription the description of the table */ public GATKReportTable(String tableName, String tableDescription) { - if (!isValidName(tableName)) { + this(tableName, tableDescription, true); + } + + public GATKReportTable(String tableName, String tableDescription, boolean sortByPrimaryKey) { + if (!isValidName(tableName)) { throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed."); } this.tableName = tableName; this.tableDescription = tableDescription; + this.sortByPrimaryKey = sortByPrimaryKey; columns = new LinkedHashMap(); } @@ -137,20 +141,14 @@ public class GATKReportTable { * @param primaryKeyName the name of the primary key column */ public void addPrimaryKey(String primaryKeyName) { - if (!isValidName(primaryKeyName)) { - throw new ReviewedStingException("Attempted to set a GATKReportTable primary key name of '" + primaryKeyName + "'. GATKReportTable primary key names must be purely alphanumeric - no spaces or special characters are allowed."); - } - - this.primaryKeyName = primaryKeyName; - - primaryKeyColumn = new TreeSet(); - primaryKeyDisplay = true; + addPrimaryKey(primaryKeyName, true); } /** * Add an optionally visible primary key column. This becomes the unique identifier for every column in the table, and will always be printed as the first column. * * @param primaryKeyName the name of the primary key column + * @param display should this primary key be displayed? */ public void addPrimaryKey(String primaryKeyName, boolean display) { if (!isValidName(primaryKeyName)) { @@ -159,7 +157,7 @@ public class GATKReportTable { this.primaryKeyName = primaryKeyName; - primaryKeyColumn = new TreeSet(); + primaryKeyColumn = sortByPrimaryKey ? new TreeSet() : new LinkedList(); primaryKeyDisplay = display; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index e1085c6b2..89a179d0e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -25,11 +25,11 @@ package org.broadinstitute.sting.gatk.traversals; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.ReadMetrics; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -38,7 +38,10 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.PrintStream; -import java.util.*; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; public abstract class TraversalEngine,ProviderType extends ShardDataProvider> { // Time in milliseconds since we initialized this engine diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index 89ff688a7..1ba48ca5f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -28,12 +28,11 @@ package org.broadinstitute.sting.gatk.traversals; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.gatk.walkers.DuplicateWalker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index 240176f2f..232989fb0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java index f15a20cd3..196d54036 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java @@ -1,16 +1,18 @@ package org.broadinstitute.sting.gatk.traversals; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.ReadPairWalker; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordCoordinateComparator; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.apache.log4j.Logger; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMRecordCoordinateComparator; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.ReadPairWalker; +import org.broadinstitute.sting.gatk.walkers.Requires; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; /** * Traverse over a collection of read pairs, assuming that a given shard will contain all pairs. diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index 670676b48..06e4539c4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -2,14 +2,16 @@ package org.broadinstitute.sting.gatk.traversals; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.ReadMetrics; +import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.providers.*; +import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView; +import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView; +import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLoc; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java index a29e51189..2541921e9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java @@ -1,11 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; -import java.lang.annotation.ElementType; +import java.lang.annotation.*; /** * User: hanna * Date: May 19, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java index 99dd46cbe..03097887d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java @@ -1,11 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; -import java.lang.annotation.ElementType; +import java.lang.annotation.*; /** * User: hanna diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/By.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/By.java index 25455b587..8fa6a4c1b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/By.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/By.java @@ -1,11 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; -import java.lang.annotation.ElementType; +import java.lang.annotation.*; /** * User: hanna * Date: May 14, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java index 1a3f87a7a..ca4e3f5e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java @@ -25,29 +25,29 @@ package org.broadinstitute.sting.gatk.walkers; -import net.sf.samtools.*; -import net.sf.picard.reference.ReferenceSequenceFileFactory; -import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.ReferenceSequence; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.util.StringUtil; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.clipreads.ClippingOp; import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation; import org.broadinstitute.sting.utils.clipreads.ReadClipper; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.sam.ReadUtils; -import java.util.*; -import java.util.regex.Pattern; -import java.util.regex.Matcher; import java.io.File; import java.io.PrintStream; - -import net.sf.samtools.util.StringUtil; -import org.broadinstitute.sting.utils.sam.ReadUtils; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * This ReadWalker provides simple, yet powerful read clipping capabilities. It allows the user to clip bases in reads diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java index 1fc606f07..4bfedb672 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java @@ -1,16 +1,13 @@ package org.broadinstitute.sting.gatk.walkers; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter; +import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.utils.GenomeLoc; import java.util.List; import java.util.Set; -import java.util.Arrays; - -import net.sf.samtools.SAMRecord; -import net.sf.picard.filter.SamRecordFilter; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java index a272150c7..56287df31 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMFileWriter; -import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java index 13a55eaac..fcfcb81b5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import java.io.PrintStream; import java.text.DecimalFormat; import java.text.NumberFormat; -import java.io.PrintStream; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java index c0f469973..68bea4dba 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java @@ -28,13 +28,12 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; import java.io.PrintStream; -import java.util.*; +import java.util.List; /** * Walks along reference and calculates the GC content for each interval. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index fc4b403c5..b0b2687f4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -1,9 +1,12 @@ package org.broadinstitute.sting.gatk.walkers; -import org.broadinstitute.sting.gatk.filters.*; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; +import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckReadFilter; +import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter; +import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index 84d868c1a..508d1f6ee 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -26,22 +26,22 @@ package org.broadinstitute.sting.gatk.walkers; import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import java.io.PrintStream; import java.util.ArrayList; import java.util.List; -import java.io.PrintStream; /** * Prints the alignment in the pileup format. In the pileup format, each line represents a genomic position, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java index 9ac3fc0e6..158992a22 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java @@ -25,15 +25,15 @@ package org.broadinstitute.sting.gatk.walkers; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.commandline.Output; -import java.util.Iterator; import java.io.PrintStream; +import java.util.Iterator; /** * Prints out all of the RODs in the input data set. Data is rendered using the toString() method diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index 07938d322..7e1dcd707 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -28,14 +28,18 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; -import java.io.PrintStream; +import java.io.File; +import java.util.Collection; +import java.util.Set; +import java.util.TreeSet; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; /** * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear * in the input file. It can dynamically merge the contents of multiple input BAM files, resulting @@ -54,6 +58,13 @@ public class PrintReadsWalker extends ReadWalker { String platform = null; // E.g. ILLUMINA, 454 @Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false) int nReadsToPrint = -1; + @Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false) + public Set sampleFile = new TreeSet(); + @Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false) + public Set sampleNames = new TreeSet(); + + private TreeSet samplesToChoose = new TreeSet(); + private boolean SAMPLES_SPECIFIED = false; /** * The initialize function. @@ -61,6 +72,20 @@ public class PrintReadsWalker extends ReadWalker { public void initialize() { if ( platform != null ) platform = platform.toUpperCase(); + + Collection samplesFromFile; + if (!sampleFile.isEmpty()) { + samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFile); + samplesToChoose.addAll(samplesFromFile); + } + + if (!sampleNames.isEmpty()) + samplesToChoose.addAll(sampleNames); + + if(!samplesToChoose.isEmpty()) { + SAMPLES_SPECIFIED = true; + } + } /** @@ -87,6 +112,14 @@ public class PrintReadsWalker extends ReadWalker { if ( readPlatformAttr == null || !readPlatformAttr.toString().toUpperCase().contains(platform)) return false; } + if (SAMPLES_SPECIFIED ) { + // user specified samples to select + // todo - should be case-agnostic but for simplicity and speed this is ignored. + // todo - can check at initialization intersection of requested samples and samples in BAM header to further speedup. + if (!samplesToChoose.contains(read.getReadGroup().getSample())) + return false; + } + // check if we've reached the output limit if ( nReadsToPrint == 0 ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java index a5486fd9a..db2038aa3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Requires.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Requires.java index 6c1e64c4e..e9a381a85 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Requires.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Requires.java @@ -1,11 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; -import java.lang.annotation.ElementType; +import java.lang.annotation.*; /** * User: hanna * Date: May 19, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java index 014acff9c..486d233b7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java @@ -25,14 +25,20 @@ package org.broadinstitute.sting.gatk.walkers; -import net.sf.samtools.*; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.utils.sam.ReadUtils; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.utils.sam.ReadUtils; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * Divides the input data set into separate BAM files, one for each sample in the input data set. The split diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index f0ba8bb46..384742302 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -25,14 +25,14 @@ package org.broadinstitute.sting.gatk.walkers; -import java.util.List; - +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.filters.MalformedReadFilter; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.collections.Pair; -import org.apache.log4j.Logger; + +import java.util.List; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/WalkerName.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/WalkerName.java index 0e4d40675..4d46607e5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/WalkerName.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/WalkerName.java @@ -1,10 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; +import java.lang.annotation.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Window.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Window.java index 0b718071d..9827fdf09 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Window.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Window.java @@ -25,7 +25,10 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.*; +import java.lang.annotation.Documented; +import java.lang.annotation.Inherited; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; /** * Describes the size of the window into the genome. Has differing semantics based on diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index b02dcd8e2..3144098a8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -25,21 +25,21 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class AlleleBalance implements InfoFieldAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index 0be737897..a99f87a70 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -1,12 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.gatk.contexts.*; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; @@ -62,5 +66,5 @@ public class AlleleBalanceBySample implements GenotypeAnnotation, ExperimentalAn public List getKeyNames() { return Arrays.asList("AB"); } - public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), -1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); } + public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java index 51b5381dc..6c14e7445 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java @@ -1,8 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.variantcontext.Genotype; + import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index 244627154..66416ce11 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -31,19 +31,19 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class BaseCounts implements InfoFieldAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index ff916bedd..2a5c996f7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; -import java.util.Arrays; public class BaseQualityRankSumTest extends RankSumTest { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 143722d7c..74f7f9d80 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -25,24 +25,29 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import java.util.*; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation { private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; - private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"), - new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"), + private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"), + new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"), new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") }; public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index e56825dbe..c384e0d09 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -1,18 +1,19 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class DepthOfCoverage implements InfoFieldAnnotation, StandardAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 754d28dfd..e3e8bc258 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -1,20 +1,20 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; @@ -142,5 +142,5 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnot // public String getIndelBases() public List getKeyNames() { return Arrays.asList("AD"); } - public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFCompoundHeaderLine.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); } + public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 2115526a6..97ed221e7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -24,20 +24,20 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import cern.jet.math.Arithmetic; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import cern.jet.math.Arithmetic; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index 5eaa30bf3..48677bbe5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -1,18 +1,19 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class GCContent implements InfoFieldAnnotation, ExperimentalAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java index 3a5db2884..cca0ad4bc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java @@ -1,19 +1,20 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index bd8c51a41..b175579f1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -24,25 +24,29 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.genotype.Haplotype; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.genotype.Haplotype; -import org.broadinstitute.sting.utils.pileup.*; import java.util.*; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation { private final static boolean DEBUG = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index ecebfbbd2..d86728d5e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -1,20 +1,21 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.util.popgen.HardyWeinbergCalculation; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class HardyWeinberg implements InfoFieldAnnotation, WorkInProgressAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 099780fa7..02efd854c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -1,19 +1,19 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java index ee8b01d7d..2fd62ddf3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.IndelUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; @@ -24,11 +24,27 @@ public class IndelType implements InfoFieldAnnotation, ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { int run; - if ( vc.isIndel() && vc.isBiallelic() ) { + if (vc.isMixed()) { + Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%s", "MIXED")); + return map; + + } + else if ( vc.isIndel() ) { String type=""; - ArrayList inds = IndelUtils.findEventClassificationIndex(vc, ref); - for (int k : inds) { - type = type+ IndelUtils.getIndelClassificationName(k)+"."; + if (!vc.isBiallelic()) + type = "MULTIALLELIC_INDEL"; + else { + if (vc.isInsertion()) + type = "INS."; + else if (vc.isDeletion()) + type = "DEL."; + else + type = "OTHER."; + ArrayList inds = IndelUtils.findEventClassificationIndex(vc, ref); + for (int k : inds) { + type = type+ IndelUtils.getIndelClassificationName(k)+"."; + } } Map map = new HashMap(); map.put(getKeyNames().get(0), String.format("%s", type)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index 33069f1f5..1d999c531 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -1,19 +1,19 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class LowMQ implements InfoFieldAnnotation { @@ -42,5 +42,5 @@ public class LowMQ implements InfoFieldAnnotation { public List getKeyNames() { return Arrays.asList("LowMQ"); } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 3, VCFHeaderLineType.Integer, "3-tuple: ,,")); } + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 3, VCFHeaderLineType.Float, "3-tuple: ,,")); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index 11f86b972..cc62580a9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -1,16 +1,17 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; -import java.util.Arrays; public class MappingQualityRankSumTest extends RankSumTest { @@ -21,7 +22,7 @@ public class MappingQualityRankSumTest extends RankSumTest { protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List refQuals, List altQuals) { for ( final PileupElement p : pileup ) { - if( isUsableBase(p) && p.getMappingQual() < 254 ) { // 254 and 255 are special mapping qualities used as a code by aligners + if ( isUsableBase(p) ) { if ( p.getBase() == ref ) { refQuals.add((double)p.getMappingQual()); } else if ( p.getBase() == alt ) { @@ -34,7 +35,7 @@ public class MappingQualityRankSumTest extends RankSumTest { // equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ? HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); for (final PileupElement p: pileup) { - if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() < 254) { + if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) { // retrieve likelihood information corresponding to this read LinkedHashMap el = indelLikelihoodMap.get(p); // by design, first element in LinkedHashMap was ref allele @@ -54,8 +55,6 @@ public class MappingQualityRankSumTest extends RankSumTest { refQuals.add((double)p.getMappingQual()); else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH) altQuals.add((double)p.getMappingQual()); - - } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index 25a7b286d..f240d02bc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java index 00cc30309..0ca53adf2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java @@ -25,22 +25,22 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java index dc4934ade..08a25a7e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java @@ -1,15 +1,15 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java index ba3e2cc8b..1c70a1b33 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; @@ -47,5 +47,5 @@ public class NBaseCount implements InfoFieldAnnotation { public List getKeyNames() { return Arrays.asList("PercentNBaseSolid"); } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 4, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); } + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 1, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index 720984835..2175d39e6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -1,19 +1,19 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class QualByDepth extends AnnotationByDepth implements InfoFieldAnnotation, StandardAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 6e80c7555..d52f07b58 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -1,19 +1,23 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotation { @@ -38,8 +42,10 @@ public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotatio pileup = context.getBasePileup(); if (pileup != null) { - for (PileupElement p : pileup ) - qualities[index++] = p.getRead().getMappingQuality(); + for (PileupElement p : pileup ) { + if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE ) + qualities[index++] = p.getMappingQual(); + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 1a967293f..5466828f6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -1,22 +1,23 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.MannWhitneyU; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.List; import java.util.ArrayList; -import java.util.Map; import java.util.HashMap; +import java.util.List; +import java.util.Map; @@ -106,6 +107,9 @@ public abstract class RankSumTest implements InfoFieldAnnotation, StandardAnnota protected abstract void fillIndelQualsFromPileup(ReadBackedPileup pileup, List refQuals, List altQuals); protected static boolean isUsableBase( final PileupElement p ) { - return !( p.isDeletion() || p.getMappingQual() == 0 || ((int)p.getQual()) < 6 ); // need the unBAQed quality score here + return !( p.isDeletion() || + p.getMappingQual() == 0 || + p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE || + ((int)p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE ); // need the unBAQed quality score here } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java index f287549bb..c56e2622d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java @@ -26,24 +26,24 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; -import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; /** * Created by IntelliJ IDEA. @@ -200,8 +200,8 @@ public class ReadDepthAndAllelicFractionBySample implements GenotypeAnnotation { 1, VCFHeaderLineType.Integer, "Total read depth per sample, including MQ0"), - new VCFFormatHeaderLine(getKeyNames().get(1), - VCFCompoundHeaderLine.UNBOUNDED, + new VCFFormatHeaderLine(getKeyNames().get(1), + VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Float, "Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample")); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 727904a3b..aabfb2970 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -1,14 +1,17 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import net.sf.samtools.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java index fc769ac54..a5ebd8db2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java index 82f16be42..ff9092a71 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java @@ -25,14 +25,15 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; @@ -65,5 +66,5 @@ public class SampleList implements InfoFieldAnnotation { public List getKeyNames() { return Arrays.asList("Samples"); } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFInfoHeaderLine.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); } + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index 0b6cbcc2e..a4668eeb6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java new file mode 100755 index 000000000..b46d82d8b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java @@ -0,0 +1,79 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Created by IntelliJ IDEA. + * User: delangel + * Date: 6/29/11 + * Time: 3:14 PM + * To change this template use File | Settings | File Templates. + */ +public class TechnologyComposition implements ExperimentalAnnotation,InfoFieldAnnotation { + private String nSLX = "NumSLX"; + private String n454 ="Num454"; + private String nSolid = "NumSOLiD"; + private String nOther = "NumOther"; + public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + if ( stratifiedContexts.size() == 0 ) + return null; + + int readsIllumina = 0; + int readsSolid = 0; + int reads454 = 0; + int readsOther = 0; + + for ( Map.Entry sample : stratifiedContexts.entrySet() ) { + AlignmentContext context = sample.getValue(); + + ReadBackedPileup pileup = null; + if (context.hasExtendedEventPileup()) + pileup = context.getExtendedEventPileup(); + else if (context.hasBasePileup()) + pileup = context.getBasePileup(); + + if (pileup != null) { + for (PileupElement p : pileup ) { + if(ReadUtils.is454Read(p.getRead())) + reads454++; + else if (ReadUtils.isSOLiDRead(p.getRead())) + readsSolid++; + else if (ReadUtils.isSLXRead(p.getRead())) + readsIllumina++; + else + readsOther++; + } + } + } + + Map map = new HashMap(); + map.put(nSLX, String.format("%d", readsIllumina)); + map.put(n454, String.format("%d", reads454)); + map.put(nSolid, String.format("%d", readsSolid)); + map.put(nOther, String.format("%d", readsOther)); + return map; + } + + public List getKeyNames() { return Arrays.asList(nSLX,n454,nSolid,nOther); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(nSLX, 1, VCFHeaderLineType.Integer, "Number of SLX reads"), + new VCFInfoHeaderLine(n454, 1, VCFHeaderLineType.Integer, "Number of 454 reads"), + new VCFInfoHeaderLine(nSolid, 1, VCFHeaderLineType.Integer, "Number of SOLiD reads"), + new VCFInfoHeaderLine(nOther, 1, VCFHeaderLineType.Integer, "Number of Other technology reads")); } + +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index cd5b6694b..acbeee3b2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -25,24 +25,23 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 54c8be73a..fdf498a3d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -25,30 +25,27 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Map.Entry; - -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.*; +import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.GenomicAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.JoinTable; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationInterfaceManager; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.*; +import java.util.Map.Entry; public class VariantAnnotatorEngine { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java index e02c62baf..05c1b3c52 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java @@ -25,13 +25,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; -import java.util.*; -import java.util.Map.Entry; - -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -39,7 +32,14 @@ import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTa import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.*; +import java.util.Map.Entry; /** * This plugin for {@link VariantAnnotatorEngine} serves as the core diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java index 69a35a584..b42310780 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java @@ -26,28 +26,28 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; -import java.io.File; -import java.io.IOException; -import java.util.*; -import java.util.Map.Entry; - -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.io.IOException; +import java.util.*; +import java.util.Map.Entry; /** * Annotates variant calls with information from user-specified tabular files. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java index c57aacb5b..714f374cf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java @@ -25,6 +25,9 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; + import java.io.BufferedReader; import java.io.File; import java.io.FileReader; @@ -33,9 +36,6 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - /** * This is a container that holds all data corresponding to a single join table as specified by one -J arg (ex: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2). * Some terminology: diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java index d3fcfd42a..3b6c87f90 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java @@ -25,16 +25,16 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; + import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.UserException; - /** * Used to parse files passed to the GenomicAnnotator via the -J arg. * The files must be tab-delimited, and the first non-empty/non-commented line diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java index 0ed61fc48..0bbfa51b4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java @@ -24,9 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; -import java.io.*; -import java.util.*; - import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -36,17 +33,15 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.gatk.walkers.By; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.RMD; -import org.broadinstitute.sting.gatk.walkers.Reference; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.io.IOException; +import java.io.PrintStream; +import java.util.*; + /** * Takes a table of transcripts (eg. UCSC refGene, knownGene, and CCDS tables) and generates the big table which contains * annotations for each possible variant at each transcript position (eg. 4 variants at each genomic position). diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java index 29b256479..57bc44ab8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import java.util.Map; import java.util.List; +import java.util.Map; public interface GenotypeAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java index 5b33395b5..4e850d01b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; import java.util.List; +import java.util.Map; public interface InfoFieldAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index aa23abc67..21c8ec430 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -25,26 +25,26 @@ package org.broadinstitute.sting.gatk.walkers.beagle; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; + import static java.lang.Math.log10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 93ee0b085..3eed12992 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -25,10 +25,6 @@ package org.broadinstitute.sting.gatk.walkers.beagle; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Input; @@ -36,17 +32,20 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java index 18aa3e257..f6cd1d636 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java @@ -25,28 +25,29 @@ package org.broadinstitute.sting.gatk.walkers.beagle; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.PrintStream; -import java.util.*; +import java.util.Arrays; +import java.util.Set; /** * Produces an input file to Beagle imputation engine, listing unphased, hard-called genotypes for a single sample diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java index 2c67265d6..90e6fcd77 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java @@ -22,6 +22,8 @@ package org.broadinstitute.sting.gatk.walkers.coverage; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -31,12 +33,10 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import java.io.File; -import java.io.PrintStream; import java.io.FileNotFoundException; +import java.io.PrintStream; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoarseCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoarseCoverageWalker.java index ae947eac1..405a44c29 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoarseCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoarseCoverageWalker.java @@ -26,12 +26,11 @@ package org.broadinstitute.sting.gatk.walkers.coverage; import net.sf.samtools.SAMRecord; - -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java index 5a9c62b7f..6b91b0198 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java @@ -22,19 +22,19 @@ package org.broadinstitute.sting.gatk.walkers.coverage; +import org.broad.tribble.bed.FullBEDFeature; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broad.tribble.bed.FullBEDFeature; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.*; import java.io.PrintStream; +import java.util.Arrays; +import java.util.List; /** * Test routine for new VariantContext object diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java index 298aa90b9..a41e55166 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java @@ -8,7 +8,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; -import java.util.*; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; /** * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 91ae81cd5..c1956f1d7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -26,6 +26,8 @@ package org.broadinstitute.sting.gatk.walkers.coverage; import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -34,12 +36,13 @@ import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.*; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java new file mode 100644 index 000000000..a1c043365 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordIterator; +import net.sf.samtools.util.BlockCompressedInputStream; + +import java.io.*; +import java.util.Arrays; + + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 1:09 PM + * + * Class implementing diffnode reader for VCF + */ +public class BAMDiffableReader implements DiffableReader { + @Override + public String getName() { return "BAM"; } + + @Override + public DiffElement readFromFile(File file, int maxElementsToRead) { + final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index + reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); + + DiffNode root = DiffNode.rooted(file.getName()); + SAMRecordIterator iterator = reader.iterator(); + + int count = 0; + while ( iterator.hasNext() ) { + if ( count++ > maxElementsToRead && maxElementsToRead != -1) + break; + final SAMRecord record = iterator.next(); + + // name is the read name + first of pair + String name = record.getReadName().replace('.', '_'); + if ( record.getReadPairedFlag() ) { + name += record.getFirstOfPairFlag() ? "_1" : "_2"; + } + + DiffNode readRoot = DiffNode.empty(name, root); + + // add fields + readRoot.add("NAME", record.getReadName()); + readRoot.add("FLAGS", record.getFlags()); + readRoot.add("RNAME", record.getReferenceName()); + readRoot.add("POS", record.getAlignmentStart()); + readRoot.add("MAPQ", record.getMappingQuality()); + readRoot.add("CIGAR", record.getCigarString()); + readRoot.add("RNEXT", record.getMateReferenceName()); + readRoot.add("PNEXT", record.getMateAlignmentStart()); + readRoot.add("TLEN", record.getInferredInsertSize()); + readRoot.add("SEQ", record.getReadString()); + readRoot.add("QUAL", record.getBaseQualityString()); + + for ( SAMRecord.SAMTagAndValue xt : record.getAttributes() ) { + readRoot.add(xt.tag, xt.value); + } + + // add record to root + if ( ! root.hasElement(name) ) + // protect ourselves from malformed files + root.add(readRoot); + } + + reader.close(); + + return root.getBinding(); + } + + @Override + public boolean canRead(File file) { + final byte[] BAM_MAGIC = "BAM\1".getBytes(); + final byte[] buffer = new byte[BAM_MAGIC.length]; + try { + InputStream fstream = new BufferedInputStream(new FileInputStream(file)); + if ( !BlockCompressedInputStream.isValidFile(fstream) ) + return false; + new BlockCompressedInputStream(fstream).read(buffer, 0, BAM_MAGIC.length); + return Arrays.equals(buffer, BAM_MAGIC); + } catch ( IOException e ) { + return false; + } catch ( net.sf.samtools.FileTruncatedException e ) { + return false; + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java new file mode 100644 index 000000000..eb8a71c2c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; +import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 12:55 PM + * + * An interface that must be implemented to allow us to calculate differences + * between structured objects + */ +@Invariant({ + "name != null", + "value != null", + "parent != null || name.equals(\"ROOT\")", + "value == null || value.getBinding() == this"}) +public class DiffElement { + public final static DiffElement ROOT = new DiffElement(); + + final private String name; + final private DiffElement parent; + final private DiffValue value; + + /** + * For ROOT only + */ + private DiffElement() { + this.name = "ROOT"; + this.parent = null; + this.value = new DiffValue(this, "ROOT"); + } + + @Requires({"name != null", "parent != null", "value != null"}) + public DiffElement(String name, DiffElement parent, DiffValue value) { + if ( name.equals("ROOT") ) throw new IllegalArgumentException("Cannot use reserved name ROOT"); + this.name = name; + this.parent = parent; + this.value = value; + this.value.setBinding(this); + } + + @Ensures({"result != null"}) + public String getName() { + return name; + } + + public DiffElement getParent() { + return parent; + } + + @Ensures({"result != null"}) + public DiffValue getValue() { + return value; + } + + public boolean isRoot() { return this == ROOT; } + + @Ensures({"result != null"}) + @Override + public String toString() { + return getName() + "=" + getValue().toString(); + } + + public String toString(int offset) { + return (offset > 0 ? Utils.dupString(' ', offset) : 0) + getName() + "=" + getValue().toString(offset); + } + + @Ensures({"result != null"}) + public final String fullyQualifiedName() { + if ( isRoot() ) + return ""; + else if ( parent.isRoot() ) + return name; + else + return parent.fullyQualifiedName() + "." + name; + } + + @Ensures({"result != null"}) + public String toOneLineString() { + return getName() + "=" + getValue().toOneLineString(); + } + + @Ensures({"result != null"}) + public DiffNode getValueAsNode() { + if ( getValue().isCompound() ) + return (DiffNode)getValue(); + else + throw new ReviewedStingException("Illegal request conversion of a DiffValue into a DiffNode: " + this); + } + + public int size() { + return 1 + getValue().size(); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java new file mode 100644 index 000000000..89e20dad1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.io.File; +import java.io.PrintStream; +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 12:51 PM + * A generic engine for comparing tree-structured objects + */ +public class DiffEngine { + final protected static Logger logger = Logger.getLogger(DiffEngine.class); + + private final Map readers = new HashMap(); + + public DiffEngine() { + loadDiffableReaders(); + } + + // -------------------------------------------------------------------------------- + // + // difference calculation + // + // -------------------------------------------------------------------------------- + + public List diff(DiffElement master, DiffElement test) { + DiffValue masterValue = master.getValue(); + DiffValue testValue = test.getValue(); + + if ( masterValue.isCompound() && masterValue.isCompound() ) { + return diff(master.getValueAsNode(), test.getValueAsNode()); + } else if ( masterValue.isAtomic() && testValue.isAtomic() ) { + return diff(masterValue, testValue); + } else { + // structural difference in types. one is node, other is leaf + return Arrays.asList(new Difference(master, test)); + } + } + + public List diff(DiffNode master, DiffNode test) { + Set allNames = new HashSet(master.getElementNames()); + allNames.addAll(test.getElementNames()); + List diffs = new ArrayList(); + + for ( String name : allNames ) { + DiffElement masterElt = master.getElement(name); + DiffElement testElt = test.getElement(name); + if ( masterElt == null && testElt == null ) { + throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name); + } else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value + // todo -- should one of these be a special MISSING item? + diffs.add(new Difference(masterElt, testElt)); + } else { + diffs.addAll(diff(masterElt, testElt)); + } + } + + return diffs; + } + + public List diff(DiffValue master, DiffValue test) { + if ( master.getValue().equals(test.getValue()) ) { + return Collections.emptyList(); + } else { + return Arrays.asList(new Difference(master.getBinding(), test.getBinding())); + } + } + + // -------------------------------------------------------------------------------- + // + // Summarizing differences + // + // -------------------------------------------------------------------------------- + + /** + * Emits a summary of the diffs to out. Suppose you have the following three differences: + * + * A.X.Z:1!=2 + * A.Y.Z:3!=4 + * B.X.Z:5!=6 + * + * The above is the itemized list of the differences. The summary looks for common differences + * in the name hierarchy, counts those shared elements, and emits the differences that occur + * in order of decreasing counts. + * + * So, in the above example, what are the shared elements? + * + * A.X.Z and B.X.Z share X.Z, so there's a *.X.Z with count 2 + * A.X.Z, A.Y.Z, and B.X.Z all share *.*.Z, with count 3 + * Each of A.X.Z, A.Y.Z, and B.X.Z are individually unique, with count 1 + * + * So we would emit the following summary: + * + * *.*.Z: 3 + * *.X.Z: 2 + * A.X.Z: 1 [specific difference: 1!=2] + * A.Y.Z: 1 [specific difference: 3!=4] + * B.X.Z: 1 [specific difference: 5!=6] + * + * The algorithm to accomplish this calculation is relatively simple. Start with all of the + * concrete differences. For each pair of differences A1.A2....AN and B1.B2....BN: + * + * find the longest common subsequence Si.Si+1...SN where Ai = Bi = Si + * If i == 0, then there's no shared substructure + * If i > 0, then generate the summarized value X = *.*...Si.Si+1...SN + * if X is a known summary, increment it's count, otherwise set its count to 1 + * + * Not that only pairs of the same length are considered as potentially equivalent + * + * @param params determines how we display the items + * @param diffs the list of differences to summarize + */ + public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) { + printSummaryReport(summarizeDifferences(diffs), params ); + } + + public List summarizeDifferences(List diffs) { + return summarizedDifferencesOfPaths(diffs); + } + + final protected static String[] diffNameToPath(String diffName) { + return diffName.split("\\."); + } + + protected List summarizedDifferencesOfPathsFromString(List singletonDiffs) { + List diffs = new ArrayList(); + + for ( String diff : singletonDiffs ) { + diffs.add(new Difference(diff)); + } + + return summarizedDifferencesOfPaths(diffs); + } + + protected List summarizedDifferencesOfPaths(List singletonDiffs) { + Map summaries = new HashMap(); + + // create the initial set of differences + for ( int i = 0; i < singletonDiffs.size(); i++ ) { + for ( int j = 0; j <= i; j++ ) { + Difference diffPath1 = singletonDiffs.get(i); + Difference diffPath2 = singletonDiffs.get(j); + if ( diffPath1.length() == diffPath2.length() ) { + int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts()); + String path = diffPath2.getPath(); + if ( lcp != 0 && lcp != diffPath1.length() ) + path = summarizedPath(diffPath2.getParts(), lcp); + Difference sumDiff = new Difference(path, diffPath2.getMaster(), diffPath2.getTest()); + sumDiff.setCount(0); + addSummaryIfMissing(summaries, sumDiff); + } + } + } + + // count differences + for ( Difference diffPath : singletonDiffs ) { + for ( Difference sumDiff : summaries.values() ) { + if ( sumDiff.matches(diffPath.getParts()) ) + sumDiff.incCount(); + } + } + + List sortedSummaries = new ArrayList(summaries.values()); + Collections.sort(sortedSummaries); + return sortedSummaries; + } + + protected void addSummaryIfMissing(Map summaries, Difference diff) { + if ( ! summaries.containsKey(diff.getPath()) ) { + summaries.put(diff.getPath(), diff); + } + } + + protected void printSummaryReport(List sortedSummaries, SummaryReportParams params ) { + List toShow = new ArrayList(); + int count = 0, count1 = 0; + for ( Difference diff : sortedSummaries ) { + if ( diff.getCount() < params.minSumDiffToShow ) + // in order, so break as soon as the count is too low + break; + + if ( params.maxItemsToDisplay != 0 && count++ > params.maxItemsToDisplay ) + break; + + if ( diff.getCount() == 1 ) { + count1++; + if ( params.maxCountOneItems != 0 && count1 > params.maxCountOneItems ) + break; + } + + toShow.add(diff); + } + + // if we want it in descending order, reverse the list + if ( ! params.descending ) { + Collections.reverse(toShow); + } + + // now that we have a specific list of values we want to show, display them + GATKReport report = new GATKReport(); + final String tableName = "diffences"; + report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false); + GATKReportTable table = report.getTable(tableName); + table.addPrimaryKey("Difference", true); + table.addColumn("NumberOfOccurrences", 0); + table.addColumn("ExampleDifference", 0); + for ( Difference diff : toShow ) { + table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount()); + table.set(diff.getPath(), "ExampleDifference", diff.valueDiffString()); + } + table.write(params.out); + } + + protected static int longestCommonPostfix(String[] diffPath1, String[] diffPath2) { + int i = 0; + for ( ; i < diffPath1.length; i++ ) { + int j = diffPath1.length - i - 1; + if ( ! diffPath1[j].equals(diffPath2[j]) ) + break; + } + return i; + } + + /** + * parts is [A B C D] + * commonPostfixLength: how many parts are shared at the end, suppose its 2 + * We want to create a string *.*.C.D + * + * @param parts the separated path values [above without .] + * @param commonPostfixLength + * @return + */ + protected static String summarizedPath(String[] parts, int commonPostfixLength) { + int stop = parts.length - commonPostfixLength; + if ( stop > 0 ) parts = parts.clone(); + for ( int i = 0; i < stop; i++ ) { + parts[i] = "*"; + } + return Utils.join(".", parts); + } + + // -------------------------------------------------------------------------------- + // + // plugin manager + // + // -------------------------------------------------------------------------------- + + public void loadDiffableReaders() { + List> drClasses = new PluginManager( DiffableReader.class ).getPlugins(); + + logger.info("Loading diffable modules:"); + for (Class drClass : drClasses ) { + logger.info("\t" + drClass.getSimpleName()); + + try { + DiffableReader dr = drClass.newInstance(); + readers.put(dr.getName(), dr); + } catch (InstantiationException e) { + throw new ReviewedStingException("Unable to instantiate module '" + drClass.getSimpleName() + "'"); + } catch (IllegalAccessException e) { + throw new ReviewedStingException("Illegal access error when trying to instantiate '" + drClass.getSimpleName() + "'"); + } + } + } + + protected Map getReaders() { + return readers; + } + + protected DiffableReader getReader(String name) { + return readers.get(name); + } + + /** + * Returns a reader appropriate for this file, or null if no such reader exists + * @param file + * @return + */ + public DiffableReader findReaderForFile(File file) { + for ( DiffableReader reader : readers.values() ) + if (reader.canRead(file) ) + return reader; + + return null; + } + + /** + * Returns true if reader appropriate for this file, or false if no such reader exists + * @param file + * @return + */ + public boolean canRead(File file) { + return findReaderForFile(file) != null; + } + + + public DiffElement createDiffableFromFile(File file) { + return createDiffableFromFile(file, -1); + } + + public DiffElement createDiffableFromFile(File file, int maxElementsToRead) { + DiffableReader reader = findReaderForFile(file); + if ( reader == null ) + throw new UserException("Unsupported file type: " + file); + else + return reader.readFromFile(file, maxElementsToRead); + } + + public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) { + DiffEngine diffEngine = new DiffEngine(); + + if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) { + DiffElement master = diffEngine.createDiffableFromFile(masterFile); + DiffElement test = diffEngine.createDiffableFromFile(testFile); + List diffs = diffEngine.diff(master, test); + diffEngine.reportSummarizedDifferences(diffs, params); + return true; + } else { + return false; + } + } + + public static class SummaryReportParams { + PrintStream out = System.out; + int maxItemsToDisplay = 0; + int maxCountOneItems = 0; + int minSumDiffToShow = 0; + boolean descending = true; + + public SummaryReportParams(PrintStream out, int maxItemsToDisplay, int maxCountOneItems, int minSumDiffToShow) { + this.out = out; + this.maxItemsToDisplay = maxItemsToDisplay; + this.maxCountOneItems = maxCountOneItems; + this.minSumDiffToShow = minSumDiffToShow; + } + + public void setDescending(boolean descending) { + this.descending = descending; + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java new file mode 100644 index 000000000..2f48de2d3 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 12:55 PM + * + * An interface that must be implemented to allow us to calculate differences + * between structured objects + */ +public class DiffNode extends DiffValue { + private Map getElementMap() { + return (Map)super.getValue(); + } + private static Map emptyElements() { return new HashMap(); } + + private DiffNode(Map elements) { + super(elements); + } + + private DiffNode(DiffElement binding, Map elements) { + super(binding, elements); + } + + // --------------------------------------------------------------------------- + // + // constructors + // + // --------------------------------------------------------------------------- + + public static DiffNode rooted(String name) { + return empty(name, DiffElement.ROOT); + } + + public static DiffNode empty(String name, DiffElement parent) { + DiffNode df = new DiffNode(emptyElements()); + DiffElement elt = new DiffElement(name, parent, df); + df.setBinding(elt); + return df; + } + + public static DiffNode empty(String name, DiffValue parent) { + return empty(name, parent.getBinding()); + } + + // --------------------------------------------------------------------------- + // + // accessors + // + // --------------------------------------------------------------------------- + + @Override + public boolean isAtomic() { return false; } + + public Collection getElementNames() { + return getElementMap().keySet(); + } + + public Collection getElements() { + return getElementMap().values(); + } + + private Collection getElements(boolean atomicOnly) { + List elts = new ArrayList(); + for ( DiffElement elt : getElements() ) + if ( (atomicOnly && elt.getValue().isAtomic()) || (! atomicOnly && elt.getValue().isCompound())) + elts.add(elt); + return elts; + } + + public Collection getAtomicElements() { + return getElements(true); + } + + public Collection getCompoundElements() { + return getElements(false); + } + + /** + * Returns the element bound to name, or null if no such binding exists + * @param name + * @return + */ + public DiffElement getElement(String name) { + return getElementMap().get(name); + } + + /** + * Returns true if name is bound in this node + * @param name + * @return + */ + public boolean hasElement(String name) { + return getElement(name) != null; + } + + // --------------------------------------------------------------------------- + // + // add + // + // --------------------------------------------------------------------------- + + @Requires("elt != null") + public void add(DiffElement elt) { + if ( getElementMap().containsKey(elt.getName()) ) + throw new IllegalArgumentException("Attempting to rebind already existing binding: " + elt + " node=" + this); + getElementMap().put(elt.getName(), elt); + } + + @Requires("elt != null") + public void add(DiffValue elt) { + add(elt.getBinding()); + } + + @Requires("elts != null") + public void add(Collection elts) { + for ( DiffElement e : elts ) + add(e); + } + + public void add(String name, Object value) { + add(new DiffElement(name, this.getBinding(), new DiffValue(value))); + } + + public int size() { + int count = 0; + for ( DiffElement value : getElements() ) + count += value.size(); + return count; + } + + // --------------------------------------------------------------------------- + // + // toString + // + // --------------------------------------------------------------------------- + + @Override + public String toString() { + return toString(0); + } + + @Override + public String toString(int offset) { + String off = offset > 0 ? Utils.dupString(' ', offset) : ""; + StringBuilder b = new StringBuilder(); + + b.append("(").append("\n"); + Collection atomicElts = getAtomicElements(); + for ( DiffElement elt : atomicElts ) { + b.append(elt.toString(offset + 2)).append('\n'); + } + + for ( DiffElement elt : getCompoundElements() ) { + b.append(elt.toString(offset + 4)).append('\n'); + } + b.append(off).append(")").append("\n"); + + return b.toString(); + } + + @Override + public String toOneLineString() { + StringBuilder b = new StringBuilder(); + + b.append('('); + List parts = new ArrayList(); + for ( DiffElement elt : getElements() ) + parts.add(elt.toOneLineString()); + b.append(Utils.join(" ", parts)); + b.append(')'); + + return b.toString(); + } + + // -------------------------------------------------------------------------------- + // + // fromString and toOneLineString + // + // -------------------------------------------------------------------------------- + + public static DiffElement fromString(String tree) { + return fromString(tree, DiffElement.ROOT); + } + + /** + * Doesn't support full tree structure parsing + * @param tree + * @param parent + * @return + */ + private static DiffElement fromString(String tree, DiffElement parent) { + // X=(A=A B=B C=(D=D)) + String[] parts = tree.split("=", 2); + if ( parts.length != 2 ) + throw new ReviewedStingException("Unexpected tree structure: " + tree + " parts=" + parts); + String name = parts[0]; + String value = parts[1]; + + if ( value.length() == 0 ) + throw new ReviewedStingException("Illegal tree structure: " + value + " at " + tree); + + if ( value.charAt(0) == '(' ) { + if ( ! value.endsWith(")") ) + throw new ReviewedStingException("Illegal tree structure. Missing ): " + value + " at " + tree); + String subtree = value.substring(1, value.length()-1); + DiffNode rec = DiffNode.empty(name, parent); + String[] subParts = subtree.split(" "); + for ( String subPart : subParts ) { + rec.add(fromString(subPart, rec.getBinding())); + } + return rec.getBinding(); + } else { + return new DiffValue(name, parent, value).getBinding(); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java new file mode 100644 index 000000000..fba6549fb --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; + +import java.io.File; +import java.io.PrintStream; +import java.util.List; + +/** + * Compares two record-oriented files, itemizing specific difference between equivalent + * records in the two files. Reports both itemized and summarized differences. + * @author Mark DePristo + * @version 0.1 + */ +@Requires(value={}) +public class DiffObjectsWalker extends RodWalker { + @Output(doc="File to which results should be written",required=true) + protected PrintStream out; + + @Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false) + int MAX_OBJECTS_TO_READ = -1; + + @Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false) + int MAX_DIFFS = 0; + + @Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false) + int MAX_COUNT1_DIFFS = 0; + + @Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false) + int minCountForDiff = 1; + + @Argument(fullName="showItemizedDifferences", shortName="SID", doc="Should we enumerate all differences between the files?", required=false) + boolean showItemizedDifferences = false; + + @Argument(fullName="master", shortName="m", doc="Master file: expected results", required=true) + File masterFile; + + @Argument(fullName="test", shortName="t", doc="Test file: new results to compare to the master file", required=true) + File testFile; + + final DiffEngine diffEngine = new DiffEngine(); + + @Override + public void initialize() { + + } + + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + return 0; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer counter, Integer sum) { + return counter + sum; + } + + @Override + public void onTraversalDone(Integer sum) { + out.printf("Reading master file %s%n", masterFile); + DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ); + out.printf(" Read %d objects%n", master.size()); + out.printf("Reading test file %s%n", testFile); + DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ); + out.printf(" Read %d objects%n", test.size()); + +// out.printf("Master diff objects%n"); +// out.println(master.toString()); +// out.printf("Test diff objects%n"); +// out.println(test.toString()); + + List diffs = diffEngine.diff(master, test); + if ( showItemizedDifferences ) { + out.printf("Itemized results%n"); + for ( Difference diff : diffs ) + out.printf("DIFF: %s%n", diff.toString()); + } + + DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff); + params.setDescending(false); + diffEngine.reportSummarizedDifferences(diffs, params); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java new file mode 100644 index 000000000..963191446 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 12:55 PM + * + * An interface that must be implemented to allow us to calculate differences + * between structured objects + */ +public class DiffValue { + private DiffElement binding = null; + final private Object value; + + public DiffValue(Object value) { + this.value = value; + } + + public DiffValue(DiffElement binding, Object value) { + this.binding = binding; + this.value = value; + } + + public DiffValue(DiffValue parent, Object value) { + this(parent.getBinding(), value); + } + + public DiffValue(String name, DiffElement parent, Object value) { + this.binding = new DiffElement(name, parent, this); + this.value = value; + } + + public DiffValue(String name, DiffValue parent, Object value) { + this(name, parent.getBinding(), value); + } + + public DiffElement getBinding() { + return binding; + } + + protected void setBinding(DiffElement binding) { + this.binding = binding; + } + + public Object getValue() { + return value; + } + + public String toString() { + return getValue().toString(); + } + + public String toString(int offset) { + return toString(); + } + + public String toOneLineString() { + return getValue().toString(); + } + + public boolean isAtomic() { return true; } + public boolean isCompound() { return ! isAtomic(); } + public int size() { return 1; } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java new file mode 100644 index 000000000..a117206f1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; + +import java.io.File; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 1:09 PM + * + * Interface for readers creating diffable objects from a file + */ +public interface DiffableReader { + @Ensures("result != null") + /** + * Return the name of this DiffableReader type. For example, the VCF reader returns 'VCF' and the + * bam reader 'BAM' + */ + public String getName(); + + @Ensures("result != null") + @Requires("file != null") + /** + * Read up to maxElementsToRead DiffElements from file, and return them. + */ + public DiffElement readFromFile(File file, int maxElementsToRead); + + /** + * Return true if the file can be read into DiffElement objects with this reader. This should + * be uniquely true/false for all readers, as the system will use the first reader that can read the + * file. This routine should never throw an exception. The VCF reader, for example, looks at the + * first line of the file for the ##format=VCF4.1 header, and the BAM reader for the BAM_MAGIC value + * @param file + * @return + */ + @Requires("file != null") + public boolean canRead(File file); +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java new file mode 100644 index 000000000..81b6f7e0e --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +public class Difference implements Comparable { + final String path; // X.Y.Z + final String[] parts; + int count = 1; + DiffElement master = null , test = null; + + public Difference(String path) { + this.path = path; + this.parts = DiffEngine.diffNameToPath(path); + } + + public Difference(DiffElement master, DiffElement test) { + this(createPath(master, test), master, test); + } + + public Difference(String path, DiffElement master, DiffElement test) { + this(path); + this.master = master; + this.test = test; + } + + public String[] getParts() { + return parts; + } + + public void incCount() { count++; } + + public int getCount() { + return count; + } + + public void setCount(int count) { + this.count = count; + } + + /** + * The fully qualified path object A.B.C etc + * @return + */ + public String getPath() { + return path; + } + + /** + * @return the length of the parts of this summary + */ + public int length() { + return this.parts.length; + } + + /** + * Returns true if the string parts matches this summary. Matches are + * must be equal() everywhere where this summary isn't *. + * @param otherParts + * @return + */ + public boolean matches(String[] otherParts) { + if ( otherParts.length != length() ) + return false; + + // TODO optimization: can start at right most non-star element + for ( int i = 0; i < length(); i++ ) { + String part = parts[i]; + if ( ! part.equals("*") && ! part.equals(otherParts[i]) ) + return false; + } + + return true; + } + + @Override + public String toString() { + return String.format("%s:%d:%s", getPath(), getCount(), valueDiffString()); + } + + @Override + public int compareTo(Difference other) { + // sort first highest to lowest count, then by lowest to highest path + int countCmp = Integer.valueOf(count).compareTo(other.count); + return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path); + } + + public String valueDiffString() { + if ( hasSpecificDifference() ) { + return String.format("%s!=%s", getOneLineString(master), getOneLineString(test)); + } else { + return "N/A"; + } + } + + private static String createPath(DiffElement master, DiffElement test) { + return (master == null ? test : master).fullyQualifiedName(); + } + + private static String getOneLineString(DiffElement elt) { + return elt == null ? "MISSING" : elt.getValue().toOneLineString(); + } + + public boolean hasSpecificDifference() { + return master != null || test != null; + } + + public DiffElement getMaster() { + return master; + } + + public DiffElement getTest() { + return test; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java new file mode 100644 index 000000000..77a992ce0 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import org.broad.tribble.readers.AsciiLineReader; +import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.IOException; +import java.util.Map; + + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 1:09 PM + * + * Class implementing diffnode reader for VCF + */ +public class VCFDiffableReader implements DiffableReader { + @Override + public String getName() { return "VCF"; } + + @Override + public DiffElement readFromFile(File file, int maxElementsToRead) { + DiffNode root = DiffNode.rooted(file.getName()); + try { + // read the version line from the file + LineReader lineReader = new AsciiLineReader(new FileInputStream(file)); + final String version = lineReader.readLine(); + root.add("VERSION", version); + lineReader.close(); + + lineReader = new AsciiLineReader(new FileInputStream(file)); + VCFCodec vcfCodec = new VCFCodec(); + + // must be read as state is stored in reader itself + VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader); + for ( VCFHeaderLine headerLine : header.getMetaData() ) { + String key = headerLine.getKey(); + if ( headerLine instanceof VCFNamedHeaderLine ) + key += "_" + ((VCFNamedHeaderLine) headerLine).getName(); + root.add(key, headerLine.toString()); + } + + String line = lineReader.readLine(); + int count = 0, nRecordsAtPos = 1; + String prevName = ""; + while ( line != null ) { + if ( count++ > maxElementsToRead && maxElementsToRead != -1) + break; + + VariantContext vc = (VariantContext)vcfCodec.decode(line); + String name = vc.getChr() + ":" + vc.getStart(); + if ( name.equals(prevName) ) { + name += "_" + ++nRecordsAtPos; + } else { + prevName = name; + } + DiffNode vcRoot = DiffNode.empty(name, root); + + // add fields + vcRoot.add("CHROM", vc.getChr()); + vcRoot.add("POS", vc.getStart()); + vcRoot.add("ID", vc.hasID() ? vc.getID() : VCFConstants.MISSING_VALUE_v4); + vcRoot.add("REF", vc.getReference()); + vcRoot.add("ALT", vc.getAlternateAlleles()); + vcRoot.add("QUAL", vc.hasNegLog10PError() ? vc.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4); + vcRoot.add("FILTER", vc.getFilters()); + + // add info fields + for (Map.Entry attribute : vc.getAttributes().entrySet()) { + if ( ! attribute.getKey().startsWith("_") && ! attribute.getKey().equals(VariantContext.ID_KEY)) + vcRoot.add(attribute.getKey(), attribute.getValue()); + } + + for (Genotype g : vc.getGenotypes().values() ) { + DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot); + gRoot.add("GT", g.getGenotypeString()); + gRoot.add("GQ", g.hasNegLog10PError() ? g.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4 ); + + for (Map.Entry attribute : g.getAttributes().entrySet()) { + if ( ! attribute.getKey().startsWith("_") ) + gRoot.add(attribute.getKey(), attribute.getValue()); + } + + vcRoot.add(gRoot); + } + + root.add(vcRoot); + line = lineReader.readLine(); + } + + lineReader.close(); + } catch ( IOException e ) { + return null; + } + + return root.getBinding(); + } + + @Override + public boolean canRead(File file) { + try { + final String VCF4_HEADER = "##fileformat=VCFv4"; + char[] buff = new char[VCF4_HEADER.length()]; + new FileReader(file).read(buff, 0, VCF4_HEADER.length()); + String firstLine = new String(buff); + return firstLine.startsWith(VCF4_HEADER); + } catch ( IOException e ) { + return false; + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index 828d39717..efc101618 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -25,13 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.fasta; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Collection; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java index 6be2336c0..2dbfc76ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java @@ -25,16 +25,15 @@ package org.broadinstitute.sting.gatk.walkers.fasta; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java index b31526987..2c009f7f2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.filters; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; public class ClusteredSnps { private GenomeLocParser genomeLocParser; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java index c3849e240..ede19746a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java @@ -25,8 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.filters; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; public class FiltrationContext { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java index 225cdecc3..d7c0dd4d5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java @@ -27,7 +27,9 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; +import java.util.LinkedList; +import java.util.List; +import java.util.ListIterator; /** * A window of variants surrounding the current variant being investigated diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 884d0ac24..6c023573a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -25,21 +25,20 @@ package org.broadinstitute.sting.gatk.walkers.filters; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 47be7e6fe..83a8ce7d7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -26,14 +26,15 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; -import java.util.*; +import java.util.Map; +import java.util.Set; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java index 22c9dcf91..696a74de8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java @@ -4,8 +4,6 @@ import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import java.util.Arrays; - /** * Created by IntelliJ IDEA. * User: delangel diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index ab075eaf2..2014801e4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -26,13 +26,14 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.pileup.FragmentPileup; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.pileup.FragmentPileup; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import static java.lang.Math.log10; import static java.lang.Math.pow; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 89504b371..5c27bc943 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -26,17 +26,22 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.SimpleTimer; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.exceptions.UserException; import sun.reflect.generics.reflectiveObjects.NotImplementedException; -import java.util.*; import java.io.PrintStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index 3902a0b7f..8261cd588 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -30,11 +30,11 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java index c4e315f68..10b646d63 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java @@ -26,18 +26,18 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; import java.io.PrintStream; +import java.util.*; public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 1f430548b..60ea601d5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -25,16 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; -import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.genotype.Haplotype; @@ -42,11 +40,10 @@ import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; @@ -394,7 +391,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood if (DEBUG) System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength, (int)ref.getWindow().size(), loc.getStart(), numPrefBases); - + //System.out.println(eventLength); haplotypeMap = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(), ref, hsize, numPrefBases); @@ -421,8 +418,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood - // which genotype likelihoods correspond to two most likely alleles? By convention, likelihood vector is lexically ordered, for example - // for 3 alleles it's 00 01 02 11 12 22 + // which genotype likelihoods correspond to two most likely alleles? By convention, likelihood vector is ordered as for example + // for 3 alleles it's 00 01 11 02 12 22 GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(), alleleList, genotypeLikelihoods, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java index 2cf149fd0..3652763de 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.ArrayList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 9f4d4182f..3e3cd128b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -25,23 +25,25 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java index cf1c57a05..22c3081a3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java @@ -25,8 +25,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.DownsampleType; @@ -36,8 +34,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.HashSet; +import java.util.Set; +import java.util.TreeSet; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java index 1533e8777..68d8f9b54 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java @@ -24,19 +24,19 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 055eb0b97..2b25df4aa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Input; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 7a765c602..2a0338bca 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -25,22 +25,25 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.gatk.contexts.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.DownsampleType; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.BadMateFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; -import org.broadinstitute.sting.gatk.DownsampleType; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; -import java.util.*; import java.io.PrintStream; +import java.util.*; /** @@ -48,7 +51,7 @@ import java.io.PrintStream; * multi-sample data. The user can choose from several different incorporated calculation models. */ @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) -@ReadFilters( {BadMateFilter.class} ) +@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableReadFilter.class} ) @Reference(window=@Window(start=-200,stop=200)) @By(DataSource.REFERENCE) @Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250) @@ -158,7 +161,7 @@ public class UnifiedGenotyper extends LocusWalker getSupportedHeaderStrings() { + Set result = new HashSet(); + result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); + result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality")); + result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)")); + result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification")); + + return result; + } + /** * Compute at a given locus. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 6fc972b5d..a10897172 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -25,22 +25,24 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import com.google.java.contract.Requires; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import com.google.java.contract.*; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java index df1f4f908..adb7c4c38 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java @@ -1,11 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.indels; import net.sf.picard.sam.SamPairUtil; -import net.sf.samtools.*; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordComparator; +import net.sf.samtools.SAMRecordCoordinateComparator; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java index 7617aa9de..e68aa31e0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java @@ -25,21 +25,16 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import net.sf.samtools.AlignmentBlock; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Arrays; import java.util.HashMap; -import java.util.List; public class HaplotypeIndelErrorModel { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index a53665d64..61f21c488 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -25,39 +25,41 @@ package org.broadinstitute.sting.gatk.walkers.indels; +import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.*; import net.sf.samtools.util.RuntimeIOException; -import net.sf.samtools.util.StringUtil; import net.sf.samtools.util.SequenceUtil; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.commandline.*; +import net.sf.samtools.util.StringUtil; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.interval.IntervalMergingRule; -import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.walkers.BAQMode; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.walkers.BAQMode; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.interval.NwayIntervalMergingIterator; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.NWaySAMFileWriter; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java index 010e0cf6f..af8051334 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java @@ -25,8 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import net.sf.samtools.*; -import org.broadinstitute.sting.commandline.*; +import net.sf.samtools.Cigar; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index ab7ae4184..55450486b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -28,30 +28,25 @@ package org.broadinstitute.sting.gatk.walkers.indels; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -/*import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate; -import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager; -import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDatum; -import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalibrationArgumentCollection; -*/import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.collections.NestedHashMap; -import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.text.XReadLines; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.io.File; -import java.io.FileNotFoundException; -import java.util.*; -import java.util.regex.Pattern; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; + +/*import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDatum; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalibrationArgumentCollection; +*/ public class PairHMMIndelErrorModel { @@ -1047,8 +1042,8 @@ public class PairHMMIndelErrorModel { int k=0; double maxElement = Double.NEGATIVE_INFINITY; - for (int i=0; i < hSize; i++) { - for (int j=i; j < hSize; j++){ + for (int j=0; j < hSize; j++) { + for (int i=0; i <= j; i++){ genotypeLikelihoods[k++] = haplotypeLikehoodMatrix[i][j]; if (haplotypeLikehoodMatrix[i][j] > maxElement) maxElement = haplotypeLikehoodMatrix[i][j]; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignedReadCounter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignedReadCounter.java index fc196e712..2c89b907b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignedReadCounter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignedReadCounter.java @@ -25,19 +25,23 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import net.sf.samtools.*; -import org.broadinstitute.sting.utils.interval.IntervalMergingRule; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.filters.BadMateFilter; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; -import org.broadinstitute.sting.utils.sam.ReadUtils; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.filters.BadMateFilter; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.By; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.sam.ReadUtils; import java.io.File; -import java.util.*; +import java.util.Iterator; @By(DataSource.READS) // walker to count realigned reads diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 048dbd8cb..488e37f26 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -25,32 +25,32 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.BadCigarFilter; -import org.broadinstitute.sting.gatk.filters.Platform454Filter; -import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.filters.BadMateFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; +import org.broadinstitute.sting.gatk.filters.Platform454Filter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.ArrayList; import java.io.PrintStream; +import java.util.ArrayList; /** * Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string. */ -@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class, BadCigarFilter.class}) +@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, BadCigarFilter.class}) @Reference(window=@Window(start=-1,stop=50)) @Allows(value={DataSource.READS, DataSource.REFERENCE}) @By(DataSource.REFERENCE) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java index 1fe3fdd04..3854a4a8c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java @@ -23,7 +23,8 @@ */ package org.broadinstitute.sting.gatk.walkers.indels; -import net.sf.samtools.*; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordCoordinateComparator; /** * Extends Picard's Comparator for sorting SAMRecords by coordinate. This one actually deals with unmapped reads diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index c2953d1d7..443e6e9f2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -25,15 +25,21 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import net.sf.samtools.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; -import org.broadinstitute.sting.gatk.filters.*; -import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; +import org.broadinstitute.sting.gatk.filters.Platform454Filter; +import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter; +import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.Transcript; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; @@ -42,22 +48,22 @@ import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODItera import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.interval.IntervalUtils; -import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; -import org.broadinstitute.sting.utils.interval.IntervalMergingRule; -import org.broadinstitute.sting.utils.interval.OverlappingIntervalIterator; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.collections.CircularArray; import org.broadinstitute.sting.utils.collections.PrimitivePair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.Hidden; - -import net.sf.samtools.*; +import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.interval.IntervalUtils; +import org.broadinstitute.sting.utils.interval.OverlappingIntervalIterator; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.*; import java.util.*; @@ -72,7 +78,7 @@ import java.util.*; * if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains * only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords. */ -@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class, PlatformUnitFilter.class}) +@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, PlatformUnitFilter.class}) public class SomaticIndelDetectorWalker extends ReadWalker { // @Output // PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java index 869edf784..cb123c868 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java @@ -23,9 +23,9 @@ */ package org.broadinstitute.sting.gatk.walkers.phasing; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java index 81d9b4ddb..9aa370d3f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java @@ -24,14 +24,10 @@ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature; @@ -41,9 +37,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.AminoAci import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java new file mode 100644 index 000000000..298d8d6c8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java @@ -0,0 +1,110 @@ +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; + +import java.util.*; + +/** + * Merges read-back-phased and phase-by-transmission files. + */ +public class MergeAndMatchHaplotypes extends RodWalker { + @Output + protected VCFWriter vcfWriter = null; + + private Map pbtCache = new HashMap(); + private Map rbpCache = new HashMap(); + + private final String SOURCE_NAME = "MergeReadBackedAndTransmissionPhasedVariants"; + + public void initialize() { + ArrayList rodNames = new ArrayList(); + rodNames.add("pbt"); + + Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); + Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + Set headerLines = new HashSet(); + headerLines.addAll(VCFUtils.getHeaderFields(this.getToolkit())); + + vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples)); + } + + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (tracker != null) { + Collection pbts = tracker.getVariantContexts(ref, "pbt", null, ref.getLocus(), true, true); + Collection rbps = tracker.getVariantContexts(ref, "rbp", null, ref.getLocus(), true, true); + + VariantContext pbt = pbts.iterator().hasNext() ? pbts.iterator().next() : null; + VariantContext rbp = rbps.iterator().hasNext() ? rbps.iterator().next() : null; + + if (pbt != null && rbp != null) { + Map genotypes = pbt.getGenotypes(); + + if (!rbp.isFiltered()) { + for (String sample : rbp.getSampleNames()) { + Genotype rbpg = rbp.getGenotype(sample); + Genotype pbtg = pbt.getGenotype(sample); + + // Propagate read-backed phasing information to genotypes unphased by transmission + //if (!pbtg.isPhased() && rbpCache.containsKey(sample)) { + if (!pbtg.isPhased() && rbpg.isPhased() && rbpCache.containsKey(sample)) { + boolean orientationMatches = rbpCache.get(sample).sameGenotype(pbtCache.get(sample), false); + + if (orientationMatches) { + pbtg = rbpg; + } else { + List fwdAlleles = rbpg.getAlleles(); + List revAlleles = new ArrayList(); + + for (int i = fwdAlleles.size() - 1; i >= 0; i--) { + revAlleles.add(fwdAlleles.get(i)); + } + + pbtg = new Genotype(sample, revAlleles, rbpg.getNegLog10PError(), rbpg.getFilters(), rbpg.getAttributes(), rbpg.isPhased()); + } + } + + genotypes.put(sample, pbtg); + + // Update the cache + if (/*rbpg.isPhased() &&*/ rbpg.isHet()) { + rbpCache.put(sample, rbpg); + pbtCache.put(sample, pbtg); + } else if (!rbpg.isPhased()) { + rbpCache.remove(sample); + pbtCache.remove(sample); + } + } + } + + VariantContext newvc = new VariantContext(SOURCE_NAME, pbt.getChr(), pbt.getStart(), pbt.getStart(), pbt.getAlleles(), genotypes, pbt.getNegLog10PError(), pbt.getFilters(), pbt.getAttributes()); + vcfWriter.add(newvc, ref.getBase()); + } + } + + return null; + } + + @Override + public Integer reduceInit() { + return null; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return null; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java index 709bc44ce..5bd438605 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java @@ -24,10 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -35,7 +31,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java index abced442e..b0491a281 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java @@ -24,20 +24,19 @@ package org.broadinstitute.sting.gatk.walkers.phasing; -import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.ReferenceSequenceFile; import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java index ec6f5c648..be15d4541 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java @@ -24,20 +24,22 @@ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java new file mode 100755 index 000000000..b24437c4a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -0,0 +1,320 @@ +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; + +import java.util.*; + +/** + * Phases a trio VCF (child phased by transmission, implied phase carried over to parents). Given genotypes for a trio, + * this walker modifies the genotypes (if necessary) to reflect the most likely configuration given the genotype + * likelihoods and inheritance constraints, phases child by transmission and carries over implied phase to the parents + * (their alleles in their genotypes are ordered as transmitted|untransmitted). Computes probability that the + * determined phase is correct given that the genotype configuration is correct (useful if you want to use this to + * compare phasing accuracy, but want to break that comparison down by phasing confidence in the truth set). Optionally + * filters out sites where the phasing is indeterminate (site has no-calls), ambiguous (everyone is heterozygous), or + * the genotypes exhibit a Mendelian violation. This walker assumes there are only three samples in the VCF file to + * begin. + */ +public class PhaseByTransmission extends RodWalker { + @Argument(shortName="f", fullName="familyPattern", required=true, doc="Pattern for the family structure (usage: mom+dad=child)") + public String familyStr = null; + + @Argument(shortName="nofilters", fullName="disableFilters", required=false, doc="Disable filters for sites where the phase can't be determined, where the parental origin of the alleles is ambiguous (i.e. everyone is heterozygous), or Mendelian violations") + public Boolean noFilters = false; + + @Output + protected VCFWriter vcfWriter = null; + + private String SAMPLE_NAME_MOM; + private String SAMPLE_NAME_DAD; + private String SAMPLE_NAME_CHILD; + + private final String ROD_NAME = "variant"; + private final String AMBIGUOUS_ALLELE_ORIGIN_FILTER_NAME = "AmbiguousAlleleOrigin"; + private final String INSUFFICIENT_DATA_FILTER_NAME = "InsufficientInformation"; + private final String MENDELIAN_VIOLATION_FILTER_NAME = "MendelianViolation"; + private final String TRANSMISSION_PROBABILITY_TAG_NAME = "TP"; + private final String SOURCE_NAME = "PhaseByTransmission"; + + private final Double MENDELIAN_VIOLATION_PRIOR = 1e-8; + + /** + * Parse the familial relationship specification, and initialize VCF writer + */ + public void initialize() { + String[] pieces = familyStr.split("[\\+\\=]"); + + SAMPLE_NAME_MOM = pieces[0]; + SAMPLE_NAME_DAD = pieces[1]; + SAMPLE_NAME_CHILD = pieces[2]; + + ArrayList rodNames = new ArrayList(); + rodNames.add(ROD_NAME); + + Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); + Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + + if (vcfSamples.size() != 3) { + throw new UserException("File to phase by transmission contains more than three samples. This walker only" + + "accepts VCFs with three samples, so that the meaning of the applied filters is" + + "unambiguous."); + } + + if (!vcfSamples.contains(SAMPLE_NAME_MOM) || !vcfSamples.contains(SAMPLE_NAME_DAD) || !vcfSamples.contains(SAMPLE_NAME_CHILD)) { + throw new UserException("One or more of the samples specified in the familyPattern argument is not present" + + "in this file. Please supply a VCF file that contains only three samples: the" + + "mother, the father, and the child"); + } + + Set samples = new TreeSet(); + samples.add(SAMPLE_NAME_MOM); + samples.add(SAMPLE_NAME_DAD); + samples.add(SAMPLE_NAME_CHILD); + + Set headerLines = new HashSet(); + headerLines.addAll(VCFUtils.getHeaderFields(this.getToolkit())); + + if (!noFilters) { + headerLines.add(new VCFFilterHeaderLine(AMBIGUOUS_ALLELE_ORIGIN_FILTER_NAME, "The parental origin of each of the child's allele cannot be determined (ie everyone is heterozygous)")); + headerLines.add(new VCFFilterHeaderLine(INSUFFICIENT_DATA_FILTER_NAME, "The phase of the child's genotype cannot be determined (ie someone is a no-call)")); + headerLines.add(new VCFFilterHeaderLine(MENDELIAN_VIOLATION_FILTER_NAME, "No combination of the parents' alleles can yield the child's genotype (ie a possible Mendelian violation)")); + } + + headerLines.add(new VCFInfoHeaderLine(TRANSMISSION_PROBABILITY_TAG_NAME, 1, VCFHeaderLineType.Float, "Probability that the phase is correct given that the genotypes are correct")); + vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); + } + + private double computeTransmissionLikelihoodOfGenotypeConfiguration(Genotype mom, Genotype dad, Genotype child) { + double[] momLikelihoods = MathUtils.normalizeFromLog10(mom.getLikelihoods().getAsVector()); + double[] dadLikelihoods = MathUtils.normalizeFromLog10(dad.getLikelihoods().getAsVector()); + double[] childLikelihoods = MathUtils.normalizeFromLog10(child.getLikelihoods().getAsVector()); + + int momIndex = mom.getType().ordinal() - 1; + int dadIndex = dad.getType().ordinal() - 1; + int childIndex = child.getType().ordinal() - 1; + + return momLikelihoods[momIndex]*dadLikelihoods[dadIndex]*childLikelihoods[childIndex]; + } + + private ArrayList createAllThreeGenotypes(Allele refAllele, Allele altAllele, Genotype g) { + List homRefAlleles = new ArrayList(); + homRefAlleles.add(refAllele); + homRefAlleles.add(refAllele); + Genotype homRef = new Genotype(g.getSampleName(), homRefAlleles, g.getNegLog10PError(), null, g.getAttributes(), false); + + List hetAlleles = new ArrayList(); + hetAlleles.add(refAllele); + hetAlleles.add(altAllele); + Genotype het = new Genotype(g.getSampleName(), hetAlleles, g.getNegLog10PError(), null, g.getAttributes(), false); + + List homVarAlleles = new ArrayList(); + homVarAlleles.add(altAllele); + homVarAlleles.add(altAllele); + Genotype homVar = new Genotype(g.getSampleName(), homVarAlleles, g.getNegLog10PError(), null, g.getAttributes(), false); + + ArrayList genotypes = new ArrayList(); + genotypes.add(homRef); + genotypes.add(het); + genotypes.add(homVar); + + return genotypes; + } + + private int getNumberOfMatchingAlleles(Allele alleleToMatch, Genotype g) { + List alleles = g.getAlleles(); + int matchingAlleles = 0; + + for (Allele a : alleles) { + if (!alleleToMatch.equals(a)) { + matchingAlleles++; + } + } + + return matchingAlleles; + } + + private boolean isMendelianViolation(Allele refAllele, Allele altAllele, Genotype mom, Genotype dad, Genotype child) { + int numMomRefAlleles = getNumberOfMatchingAlleles(refAllele, mom) > 0 ? 1 : 0; + int numMomAltAlleles = getNumberOfMatchingAlleles(altAllele, mom) > 0 ? 1 : 0; + + int numDadRefAlleles = getNumberOfMatchingAlleles(refAllele, dad) > 0 ? 1 : 0; + int numDadAltAlleles = getNumberOfMatchingAlleles(altAllele, dad) > 0 ? 1 : 0; + + int numChildRefAlleles = getNumberOfMatchingAlleles(refAllele, child); + int numChildAltAlleles = getNumberOfMatchingAlleles(altAllele, child); + + return (numMomRefAlleles + numDadRefAlleles < numChildRefAlleles || numMomAltAlleles + numDadAltAlleles < numChildAltAlleles); + } + + private ArrayList getPhasedGenotypes(Genotype mom, Genotype dad, Genotype child) { + Set possiblePhasedChildGenotypes = new HashSet(); + + for (Allele momAllele : mom.getAlleles()) { + for (Allele dadAllele : dad.getAlleles()) { + ArrayList possiblePhasedChildAlleles = new ArrayList(); + possiblePhasedChildAlleles.add(momAllele); + possiblePhasedChildAlleles.add(dadAllele); + + Genotype possiblePhasedChildGenotype = new Genotype(child.getSampleName(), possiblePhasedChildAlleles, child.getNegLog10PError(), child.getFilters(), child.getAttributes(), true); + + possiblePhasedChildGenotypes.add(possiblePhasedChildGenotype); + } + } + + ArrayList finalGenotypes = new ArrayList(); + + for (Genotype phasedChildGenotype : possiblePhasedChildGenotypes) { + if (child.sameGenotype(phasedChildGenotype, true)) { + Allele momTransmittedAllele = phasedChildGenotype.getAllele(0); + Allele momUntransmittedAllele = mom.getAllele(0) != momTransmittedAllele ? mom.getAllele(0) : mom.getAllele(1); + + ArrayList phasedMomAlleles = new ArrayList(); + phasedMomAlleles.add(momTransmittedAllele); + phasedMomAlleles.add(momUntransmittedAllele); + + Genotype phasedMomGenotype = new Genotype(mom.getSampleName(), phasedMomAlleles, mom.getNegLog10PError(), mom.getFilters(), mom.getAttributes(), true); + + Allele dadTransmittedAllele = phasedChildGenotype.getAllele(1); + Allele dadUntransmittedAllele = dad.getAllele(0) != dadTransmittedAllele ? dad.getAllele(0) : dad.getAllele(1); + + ArrayList phasedDadAlleles = new ArrayList(); + phasedDadAlleles.add(dadTransmittedAllele); + phasedDadAlleles.add(dadUntransmittedAllele); + + Genotype phasedDadGenotype = new Genotype(dad.getSampleName(), phasedDadAlleles, dad.getNegLog10PError(), dad.getFilters(), dad.getAttributes(), true); + + finalGenotypes.add(phasedMomGenotype); + finalGenotypes.add(phasedDadGenotype); + finalGenotypes.add(phasedChildGenotype); + + return finalGenotypes; + } + } + + finalGenotypes.add(mom); + finalGenotypes.add(dad); + finalGenotypes.add(child); + + return finalGenotypes; + } + + private VariantContext phaseTrioGenotypes(VariantContext vc) { + Genotype mom = vc.getGenotype(SAMPLE_NAME_MOM); + Genotype dad = vc.getGenotype(SAMPLE_NAME_DAD); + Genotype child = vc.getGenotype(SAMPLE_NAME_CHILD); + + Set filters = new HashSet(); + filters.addAll(vc.getFilters()); + + Map attributes = new HashMap(); + attributes.putAll(vc.getAttributes()); + attributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, 0.0); + + ArrayList finalGenotypes = new ArrayList(); + finalGenotypes.add(mom); + finalGenotypes.add(dad); + finalGenotypes.add(child); + + if (!mom.isCalled() || !dad.isCalled() || !child.isCalled()) { + filters.add(INSUFFICIENT_DATA_FILTER_NAME); + } else { + ArrayList possibleMomGenotypes = createAllThreeGenotypes(vc.getReference(), vc.getAlternateAllele(0), mom); + ArrayList possibleDadGenotypes = createAllThreeGenotypes(vc.getReference(), vc.getAlternateAllele(0), dad); + ArrayList possibleChildGenotypes = createAllThreeGenotypes(vc.getReference(), vc.getAlternateAllele(0), child); + + double bestConfigurationLikelihood = 0.0; + double bestPrior = 0.0; + Genotype bestMomGenotype = mom; + Genotype bestDadGenotype = dad; + Genotype bestChildGenotype = child; + + double norm = 0.0; + + for (Genotype momGenotype : possibleMomGenotypes) { + for (Genotype dadGenotype : possibleDadGenotypes) { + for (Genotype childGenotype : possibleChildGenotypes) { + double prior = isMendelianViolation(vc.getReference(), vc.getAlternateAllele(0), momGenotype, dadGenotype, childGenotype) ? MENDELIAN_VIOLATION_PRIOR : 1.0 - 12*MENDELIAN_VIOLATION_PRIOR; + double configurationLikelihood = computeTransmissionLikelihoodOfGenotypeConfiguration(momGenotype, dadGenotype, childGenotype); + norm += prior*configurationLikelihood; + + if (prior*configurationLikelihood > bestPrior*bestConfigurationLikelihood) { + bestConfigurationLikelihood = configurationLikelihood; + bestPrior = prior; + bestMomGenotype = momGenotype; + bestDadGenotype = dadGenotype; + bestChildGenotype = childGenotype; + } + } + } + } + + if (isMendelianViolation(vc.getReference(), vc.getAlternateAllele(0), bestMomGenotype, bestDadGenotype, bestChildGenotype)) { + filters.add(MENDELIAN_VIOLATION_FILTER_NAME); + } else if (bestMomGenotype.isHet() && bestDadGenotype.isHet() && bestChildGenotype.isHet()) { + filters.add(AMBIGUOUS_ALLELE_ORIGIN_FILTER_NAME); + } else { + finalGenotypes = getPhasedGenotypes(bestMomGenotype, bestDadGenotype, bestChildGenotype); + + attributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, bestPrior*bestConfigurationLikelihood / norm); + } + } + + return new VariantContext(SOURCE_NAME, vc.getChr(), vc.getStart(), vc.getStart(), vc.getAlleles(), finalGenotypes, vc.getNegLog10PError(), noFilters ? vc.getFilters() : filters, attributes); + } + + /** + * For each variant in the file, determine the phasing for the child and replace the child's genotype with the trio's genotype + * + * @param tracker the reference meta-data tracker + * @param ref the reference context + * @param context the alignment context + * @return null + */ + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (tracker != null) { + Collection vcs = tracker.getVariantContexts(ref, ROD_NAME, null, context.getLocation(), true, true); + + for (VariantContext vc : vcs) { + vcfWriter.add(phaseTrioGenotypes(vc), ref.getBase()); + } + } + + return null; + } + + /** + * Provide an initial value for reduce computations. + * + * @return Initial value of reduce. + */ + @Override + public Integer reduceInit() { + return null; + } + + /** + * Reduces a single map with the accumulator provided as the ReduceType. + * + * @param value result of the map. + * @param sum accumulator for the reduce. + * @return accumulator with result of the map taken into account. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return null; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index af24035c8..9702fd18c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -23,27 +23,28 @@ */ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.DisjointSet; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.HasGenomeLocation; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.codecs.vcf.SortingVCFWriter; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.*; import java.util.*; @@ -58,7 +59,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr @Requires(value = {DataSource.READS, DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class)) @By(DataSource.READS) -@ReadFilters({ZeroMappingQualityReadFilter.class}) +@ReadFilters({MappingQualityZeroReadFilter.class}) // Filter out all reads with zero mapping quality public class ReadBackedPhasingWalker extends RodWalker { @@ -242,7 +243,7 @@ public class ReadBackedPhasingWalker extends RodWalker KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet(Arrays.asList("PQ")); + private static final Set KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet(Arrays.asList(PQ_KEY)); private VariantContext reduceVCToSamples(VariantContext vc, List samplesToPhase) { // for ( String sample : samplesToPhase ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java index db1f888a1..153c4a23f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java @@ -23,10 +23,10 @@ */ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; public class SNPallelePair extends AllelePair { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java index f82e48abd..2851ace0d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java @@ -24,9 +24,9 @@ package org.broadinstitute.sting.gatk.walkers.phasing; import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; public class WriteVCF { public static void writeVCF(VariantContext vc, VCFWriter writer, Logger logger) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java index feb5f62af..2bdd4558f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java @@ -6,14 +6,12 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.RefWalker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; -import java.util.List; import java.io.PrintStream; +import java.util.List; /** * Counts the number of contiguous regions the walker traverses over. Slower than it needs to be, but diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java index ef6ff04f2..0d68c8493 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import net.sf.samtools.SAMFileWriter; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java index cece04fcf..df89efe6d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java @@ -25,14 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.walkers.ReadPairWalker; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; -import org.broadinstitute.sting.commandline.Output; -import net.sf.samtools.SAMRecord; +import java.io.PrintStream; import java.util.Collection; import java.util.List; -import java.io.PrintStream; /** * Counts the number of read pairs encountered in a file sorted in diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java index 74f63aa2f..87c0409b9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.qc; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.walkers.Requires; /** * Walks over the input data set, calculating the number of reads seen for diagnostic purposes. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodByRefWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodByRefWalker.java index a4e80138f..d1545f159 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodByRefWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodByRefWalker.java @@ -25,13 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; /** * Prints out counts of the number of reference ordered data objects are diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java index 92867e1a9..8a03dea44 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java @@ -27,23 +27,24 @@ package org.broadinstitute.sting.gatk.walkers.qc; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import java.io.PrintStream; import java.util.ArrayList; -import java.util.List; import java.util.Collection; import java.util.LinkedList; -import java.io.PrintStream; +import java.util.List; /** * Prints out counts of the number of reference ordered data objects are diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java index 5ae35416a..b5f5442cd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java @@ -25,22 +25,22 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.collections.PrimitivePair; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.utils.collections.PrimitivePair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import java.util.*; import java.io.*; +import java.util.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java index 39b69ba29..d3b992cb5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; -import java.util.List; -import java.util.Arrays; import java.io.PrintStream; - -import net.sf.samtools.SAMRecord; +import java.util.Arrays; +import java.util.List; /** * At each locus in the input data set, prints the reference base, genomic location, and diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java index 2f1773d01..908e389a8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java @@ -22,21 +22,24 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; -import net.sf.samtools.*; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import java.util.*; -import java.io.*; +import java.io.PrintStream; +import java.util.Arrays; /** * User: depristo diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java index 6ad0340a4..fa1bb4d55 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileWriter; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.List; import java.util.ArrayList; +import java.util.List; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java index 9cb715507..170630b77 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -8,8 +7,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.walkers.Reference; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.*; import java.math.BigInteger; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java index bc68be592..e1e6c4b69 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java @@ -25,20 +25,20 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.sampileup.SAMPileupFeature; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import java.util.Arrays; import java.io.PrintStream; +import java.util.Arrays; /** * At every locus in the input set, compares the pileup data (reference base, aligned base from diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java index 568e1b638..fc6b3daee 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java @@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.text.XReadLines; import java.io.File; import java.io.FileNotFoundException; import java.io.PrintStream; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.regex.Pattern; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index ee504b6e7..8c6539f8d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -27,23 +27,25 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import org.broad.tribble.bed.BEDCodec; import org.broad.tribble.dbsnp.DbSNPCodec; -import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Gather; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; +import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.collections.NestedHashMap; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -75,7 +77,7 @@ import java.util.Map; @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) @By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file -@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality +@ReadFilters( {MappingQualityZeroReadFilter.class, MappingQualityUnavailableReadFilter.class} ) // Filter out all reads with zero or unavailable mapping quality @Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta @PartitionBy(PartitionType.LOCUS) public class CountCovariatesWalker extends LocusWalker implements TreeReducible { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index 64e0864c0..945d02837 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; - import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java index 0de6897d0..a7717161a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; -import java.util.HashMap; - import net.sf.samtools.SAMRecord; - import org.broadinstitute.sting.utils.BaseUtils; +import java.util.HashMap; + /* * Copyright (c) 2009 The Broad Institute * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index fdbeb6a31..e6d0b306c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -25,19 +25,21 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMUtils; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.collections.NestedHashMap; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import java.util.*; - -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMReadGroupRecord; -import net.sf.samtools.SAMUtils; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java index 7ba441ccc..f04989fa5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.QualityUtils; -import java.util.*; +import java.util.List; /* * Copyright (c) 2010 The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 0eaa1245e..0277fda0d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -25,31 +25,33 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; -import java.io.File; -import java.io.FileNotFoundException; -import java.util.*; -import java.util.regex.Pattern; - import net.sf.samtools.*; import net.sf.samtools.util.SequenceUtil; - +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.collections.NestedHashMap; +import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; +import org.broadinstitute.sting.utils.text.XReadLines; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.ArrayList; +import java.util.List; +import java.util.MissingResourceException; +import java.util.ResourceBundle; +import java.util.regex.Pattern; /** * This walker is designed to work as the second pass in a two-pass processing step, doing a by-read traversal. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/CreateSequenomMask.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/CreateSequenomMask.java deleted file mode 100755 index c1c17bda5..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/CreateSequenomMask.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.sequenom; - -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.GenomeLoc; - -import java.io.PrintStream; - -/** - * Create a mask for use with the PickSequenomProbes walker. - */ -public class CreateSequenomMask extends RodWalker { - @Output - PrintStream out; - - public void initialize() {} - - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) - return 0; - - int result = 0; - for ( VariantContext vc : tracker.getAllVariantContexts(ref) ) { - if ( vc.isSNP() ) { - GenomeLoc loc = context.getLocation(); - out.println(loc.getContig() + "\t" + (loc.getStart()-1) + "\t" + loc.getStop()); - result = 1; - break; - } - } - - return result; - } - - public Integer reduceInit() { - return 0; - } - - public Integer reduce(Integer value, Integer sum) { - return value + sum; - } - - public void onTraversalDone(Integer sum) { - logger.info("Found " + sum + " masking sites."); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java deleted file mode 100755 index fde233b5d..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.sequenom; - -import net.sf.samtools.util.CloseableIterator; -import org.broad.tribble.bed.BEDCodec; -import org.broad.tribble.dbsnp.DbSNPCodec; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; -import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; - -import java.io.File; -import java.util.*; -import java.io.PrintStream; - - -/** - * Generates Sequenom probe information given a single variant track. Emitted is the variant - * along with the 200 reference bases on each side of the variant. - */ -@WalkerName("PickSequenomProbes") -@Requires(value={DataSource.REFERENCE}) -@Reference(window=@Window(start=-200,stop=200)) -public class PickSequenomProbes extends RodWalker { - @Output - PrintStream out; - - @Argument(required=false, shortName="snp_mask", doc="positions to be masked with N's") - protected String SNP_MASK = null; - @Argument(required=false, shortName="project_id",doc="If specified, all probenames will be prepended with 'project_id|'") - String project_id = null; - @Argument(required = false, shortName="omitWindow", doc = "If specified, the window appender will be omitted from the design files (e.g. \"_chr:start-stop\")") - boolean omitWindow = false; - @Argument(required = false, fullName="usePlinkRODNamingConvention", shortName="nameConvention",doc="Use the naming convention defined in PLINKROD") - boolean useNamingConvention = false; - @Argument(required = false, fullName="noMaskWindow",shortName="nmw",doc="Do not mask bases within X bases of an event when designing probes") - int noMaskWindow = 0; - @Argument(required = false, shortName="counter", doc = "If specified, unique count id (ordinal number) is added to the end of each assay name") - boolean addCounter = false; - - private byte [] maskFlags = new byte[401]; - - private LocationAwareSeekableRODIterator snpMaskIterator=null; - - private GenomeLoc positionOfLastVariant = null; - - private int cnt = 0; - private int discarded = 0; - - VariantCollection VCs ; // will keep a set of distinct variants at a given site - private List processedVariantsInScope = new LinkedList(); - - public void initialize() { - if ( SNP_MASK != null ) { - logger.info("Loading SNP mask... "); - ReferenceOrderedData snp_mask; - //if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) { - RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe); - RMDTrack track = builder.createInstanceOfTrack(BEDCodec.class, new File(SNP_MASK)); - snpMaskIterator = new SeekableRODIterator(track.getHeader(), - track.getSequenceDictionary(), - getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), - getToolkit().getGenomeLocParser(), - track.getIterator()); - //} else { - // // TODO: fix me when Plink is back - // throw new IllegalArgumentException("We currently do not support other snp_mask tracks (like Plink)"); - //} - - } - VCs = new VariantCollection(); - } - - - public String map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) - return ""; - - logger.debug("Probing " + ref.getLocus() + " " + ref.getWindow()); - - VCs.clear(); - VCs.addAll( tracker.getAllVariantContexts(ref), ref.getLocus() ); - - discarded += VCs.discarded(); - - if ( VCs.size() == 0 ) { - logger.debug(" Context empty"); - return ""; - } - - if ( VCs.size() > 1 ) { - logger.debug(" "+VCs.size()+ " variants at the locus"); - } - -// System.out.print("At locus "+ref.getLocus()+": "); -// for ( VariantContext vc : VCs ) { -// System.out.println(vc.toString()); -// } - - // little optimization: since we may have few events at the current site on the reference, - // we are going to make sure we compute the mask and ref bases only once for each location and only if we need to - boolean haveMaskForWindow = false; - boolean haveBasesForWindow = false; - String leading_bases = null; - String trailing_bases = null; - - StringBuilder assaysForLocus = new StringBuilder(""); // all assays for current locus will be collected here (will be multi-line if multiple events are assayed) - - // get all variant contexts!!!! - for ( VariantContext vc : VCs ) { - - // we can only deal with biallelic sites for now - if ( !vc.isBiallelic() ) { - logger.debug(" Not biallelic; skipped"); - continue; - } - - // we don't want to see the same multi-base event (deletion, DNP etc) multiple times. - // All the vcs we are currently seeing are clearly on the same contig as the current reference - // poisiton (or we would not see them at all!). All we need to check is if the vc starts at the - // current reference position (i.e. it is the first time we see it) or not (i.e. we saw it already). - if ( ref.getLocus().getStart() != vc.getStart() ) - continue; - - if ( ! haveMaskForWindow ) { - String contig = context.getLocation().getContig(); - int offset = context.getLocation().getStart(); - int true_offset = offset - 200; - - // we have variant; let's load all the snps falling into the current window and prepare the mask array. - // we need to do it only once per window, regardless of how many vcs we may have at this location! - if ( snpMaskIterator != null ) { - // clear the mask - for ( int i = 0 ; i < 401; i++ ) - maskFlags[i] = 0; - - RODRecordList snpList = snpMaskIterator.seekForward(getToolkit().getGenomeLocParser().createGenomeLoc(contig,offset-200,offset+200)); - if ( snpList != null && snpList.size() != 0 ) { - Iterator snpsInWindow = snpList.iterator(); - int i = 0; - while ( snpsInWindow.hasNext() ) { - GenomeLoc snp = snpsInWindow.next().getLocation(); - // we don't really want to mask out multi-base indels - if ( snp.size() > 1 ) - continue; - logger.debug(" SNP at "+snp.getStart()); - int offsetInWindow = (int)(snp.getStart() - true_offset); - maskFlags[offsetInWindow] = 1; - } - } - } - haveMaskForWindow = true; // if we use masking, we will probably need to recompute the window... - } - - if ( ! haveBasesForWindow ) { - byte[] context_bases = ref.getBases(); - for (int i = 0; i < 401; i++) { - if ( maskFlags[i] == 1 && ( i < 200 - noMaskWindow || i > 200 + getNoMaskWindowRightEnd(vc,noMaskWindow) ) ) { - context_bases[i] = 'N'; - } - } - leading_bases = new String(Arrays.copyOfRange(context_bases, 0, 200)); - trailing_bases = new String(Arrays.copyOfRange(context_bases, 201, 401)); - // masked bases are not gonna change for the current window, unless we use windowed masking; - // in the latter case the bases (N's) will depend on the event we are currently looking at, - // so we better recompute.. - if ( noMaskWindow == 0 ) haveBasesForWindow = true; - } - - - // below, build single assay line for the current VC: - - String assay_sequence; - if ( vc.isSNP() ) - assay_sequence = leading_bases + "[" + (char)ref.getBase() + "/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases; - else if ( vc.isMNP() ) - assay_sequence = leading_bases + "[" + new String(vc.getReference().getBases()) + "/" + new String(vc.getAlternateAllele(0).getBases())+"]"+trailing_bases.substring(vc.getReference().length()-1); - else if ( vc.isInsertion() ) - assay_sequence = leading_bases + (char)ref.getBase() + "[-/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases; - else if ( vc.isDeletion() ) - assay_sequence = leading_bases + (char)ref.getBase() + "[" + new String(vc.getReference().getBases()) + "/-]" + trailing_bases.substring(vc.getReference().length()); - else - continue; - - StringBuilder assay_id = new StringBuilder(); - if ( project_id != null ) { - assay_id.append(project_id); - assay_id.append('|'); - } - if ( useNamingConvention ) { - assay_id.append('c'); - assay_id.append(context.getLocation().toString().replace(":","_p")); - } else { - assay_id.append(context.getLocation().toString().replace(':','_')); - } - if ( vc.isInsertion() ) assay_id.append("_gI"); - else if ( vc.isDeletion()) assay_id.append("_gD"); - - if ( ! omitWindow ) { - assay_id.append("_"); - assay_id.append(ref.getWindow().toString().replace(':', '_')); - } - ++cnt; - if ( addCounter ) assay_id.append("_"+cnt); - - assaysForLocus.append(assay_id); - assaysForLocus.append('\t'); - assaysForLocus.append(assay_sequence); - assaysForLocus.append('\n'); - } - return assaysForLocus.toString(); - } - - public String reduceInit() { - return ""; - } - - public String reduce(String data, String sum) { - out.print(data); - return ""; - } - - private int getNoMaskWindowRightEnd(VariantContext vc, int window) { - if ( window == 0 ) { - return 0; - } - - if ( vc.isInsertion() ) { - return window-1; - } - - int max = 0; - for (Allele a : vc.getAlleles() ) { - if ( vc.isInsertion() ) { - logger.debug("Getting length of allele "+a.toString()+" it is "+a.getBases().length+" (ref allele is "+vc.getReference().toString()+")"); - } - if ( a.getBases().length > max ) { - max = a.getBases().length; - } - } - return max+window-1; - } - - public void onTraversalDone(String sum) { - logger.info(cnt+" assay seqences generated"); - logger.info(discarded+" events were found to be duplicates and discarded (no redundant assays generated)"); - } - - static class EventComparator implements Comparator { - - public int compare(VariantContext o1, VariantContext o2) { - // if variants start at different positions, they are different. All we actually - // care about is detecting the variants that are strictly the same; the actual ordering of distinct variants - // (which one we deem less and which one greater) is utterly unimportant. We just need to be consistent. - if ( o1.getStart() < o2.getStart() ) return -1; - if ( o1.getStart() > o2.getStart() ) return 1; - - if ( o1.getType() != o2.getType() ) return o1.getType().compareTo(o2.getType()); - - int refComp = o1.getReference().compareTo(o2.getReference()); - if ( refComp != 0 ) return refComp; - - return o1.getAlternateAllele(0).compareTo(o2.getAlternateAllele(0)); - - } - } - - static class VariantCollection implements Iterable { - TreeSet variants = new TreeSet(new EventComparator()); - int discarded = 0; - - public void add(VariantContext vc, GenomeLoc current) { - if ( vc.getStart() != current.getStart() ) return; // we add only variants that start at current locus - // note that we do not check chr here, since the way this class is used, the mathod is always called with - // VCs coming from the same metadata tracker, so they simply can not be on different chrs! - if ( !vc.isBiallelic() ) { - logger.info(" Non-biallelic variant encountered; skipped"); - return; - } - if ( variants.add(vc) == false ) discarded++; - } - - public void addAll(Collection c, GenomeLoc current) { - for ( VariantContext vc : c ) add(vc,current); - } - - public void clear() { - variants.clear(); - discarded = 0; - } - - public int discarded() { return discarded; } - - public int size() { return variants.size(); } - - public Iterator iterator() { return variants.iterator(); } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java new file mode 100755 index 000000000..cb03d4c61 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -0,0 +1,414 @@ +package org.broadinstitute.sting.gatk.walkers.validation; + +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMSequenceDictionary; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.RMD; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 6/13/11 + * Time: 2:12 PM + * To change this template use File | Settings | File Templates. + */ +@Requires(value={DataSource.REFERENCE}, referenceMetaData={@RMD(name="ProbeIntervals",type=TableFeature.class), +@RMD(name="ValidateAlleles",type=VariantContext.class),@RMD(name="MaskAlleles",type=VariantContext.class)}) +public class ValidationAmplicons extends RodWalker { + + @Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false) + boolean lowerCaseSNPs = false; + + @Argument(doc="Size of the virtual primer to use for lower-casing regions with low specificity",fullName="virtualPrimerSize",required=false) + int virtualPrimerSize = 20; + + @Argument(doc="Monomorphic sites in the mask file will be treated as filtered",fullName="filterMonomorphic",required=false) + boolean filterMonomorphic = false; + + @Argument(doc="Do not use BWA, lower-case repeats only",fullName="doNotUseBWA",required=false) + boolean doNotUseBWA = false; + + GenomeLoc prevInterval; + GenomeLoc allelePos; + String probeName; + StringBuilder sequence; + StringBuilder rawSequence; + boolean sequenceInvalid; + List invReason; + int indelCounter; + + @Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " + + "generated by bwa index -d bwtsw. If unspecified, will default " + + "to the reference specified via the -R argument.",required=false) + private File targetReferenceFile = null; + + @Output + PrintStream out; + + BWACAligner aligner = null; + + private SAMFileHeader header = null; + + public void initialize() { + if ( ! doNotUseBWA ) { + if(targetReferenceFile == null) + targetReferenceFile = getToolkit().getArguments().referenceFile; + BWTFiles bwtFiles = new BWTFiles(targetReferenceFile.getAbsolutePath()); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); + header = new SAMFileHeader(); + SAMSequenceDictionary referenceDictionary = + ReferenceSequenceFileFactory.getReferenceSequenceFile(targetReferenceFile).getSequenceDictionary(); + header.setSequenceDictionary(referenceDictionary); + header.setSortOrder(SAMFileHeader.SortOrder.unsorted); + } + } + + public Integer reduceInit() { + prevInterval = null; + sequence = null; + rawSequence = null; + sequenceInvalid = false; + probeName = null; + invReason = null; + indelCounter = 0; + return 0; + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null || ! tracker.hasROD("ProbeIntervals")) { return null; } + + GenomeLoc interval = ((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getLocation(); + //logger.debug(interval); + if ( prevInterval == null || ! interval.equals(prevInterval) ) { + // we're in a new interval, we should: + // 1) print out previous data + // 2) reset internal data + // 3) instantiate traversal of this interval + + // step 1: + if ( prevInterval != null ) { + // there was a previous interval + validateSequence(); // ensure the sequence in the region is valid + // next line removed in favor of the one after + if ( doNotUseBWA ) { + lowerRepeats(); // change repeats in sequence to lower case + } else { + lowerNonUniqueSegments(); + } + print(); // print out the fasta sequence + } + + // step 2: + prevInterval = interval; + allelePos = null; + sequence = new StringBuilder(); + rawSequence = new StringBuilder(); + sequenceInvalid = false; + invReason = new LinkedList(); + logger.debug(Utils.join("\t",((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getAllValues())); + probeName = ((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getValue(1); + indelCounter = 0; + } + + // step 3 (or 1 if not new): + // build up the sequence + + VariantContext mask = tracker.getVariantContext(ref,"MaskAlleles",ref.getLocus()); + VariantContext validate = tracker.getVariantContext(ref,"ValidateAlleles",ref.getLocus()); + + if ( mask == null && validate == null ) { + if ( indelCounter > 0 ) { + sequence.append('N'); + indelCounter--; + } else { + sequence.append(Character.toUpperCase((char) ref.getBase())); + } + rawSequence.append(Character.toUpperCase((char) ref.getBase())); + } else if ( validate != null ) { + // doesn't matter if there's a mask here too -- this is what we want to validate + if ( validate.isFiltered() ) { + logger.warn("You are attempting to validate a filtered site. Why are you attempting to validate a filtered site? You should not be attempting to validate a filtered site."); + sequenceInvalid = true; + invReason.add("SITE_IS_FILTERED"); + } + if ( validate.isIndel() ) { + sequence.append(Character.toUpperCase((char)ref.getBase())); + rawSequence.append(Character.toUpperCase((char)ref.getBase())); + } + sequence.append('['); + sequence.append(validate.getAlternateAllele(0).toString()); + sequence.append('/'); + sequence.append(validate.getReference().toString()); + sequence.append(']'); + // do this to the raw sequence to -- the indeces will line up that way + rawSequence.append('['); + rawSequence.append(validate.getAlternateAllele(0).getBaseString()); + rawSequence.append('/'); + rawSequence.append(validate.getReference().getBaseString()); + rawSequence.append(']'); + allelePos = ref.getLocus(); + if ( indelCounter > 0 ) { + logger.warn("An indel event overlaps the event to be validated. This completely invalidates the probe."); + sequenceInvalid = true; + invReason.add("INDEL_OVERLAPS_VALIDATION_SITE"); + if ( validate.isSNP() ) { + indelCounter--; + } else { + indelCounter -= validate.getEnd()-validate.getStart(); + } + } + } else /* (mask != null && validate == null ) */ { + if ( ! mask.isSNP() && ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphic() )) { + logger.warn("Mask Variant Context on the following warning line is not a SNP. Currently we can only mask out SNPs. This probe will not be designed."); + logger.warn(String.format("%s:%d-%d\t%s\t%s",mask.getChr(),mask.getStart(),mask.getEnd(),mask.isInsertion() ? "INS" : "DEL", Utils.join(",",mask.getAlleles()))); + sequenceInvalid = true; + invReason.add(mask.isInsertion() ? "INSERTION" : "DELETION"); + // note: indelCounter could be > 0 (could have small deletion within larger one). This always selects + // the larger event. + int indelCounterNew = mask.isInsertion() ? 2 : mask.getEnd()-mask.getStart(); + if ( indelCounterNew > indelCounter ) { + indelCounter = indelCounterNew; + } + //sequence.append((char) ref.getBase()); + //sequence.append(mask.isInsertion() ? 'I' : 'D'); + sequence.append("N"); + indelCounter--; + rawSequence.append(Character.toUpperCase((char) ref.getBase())); + } else if ( indelCounter > 0 ) { + // previous section resets the indel counter. Doesn't matter if there's a SNP underlying this, we just want to append an 'N' and move on. + sequence.append('N'); + indelCounter--; + rawSequence.append(Character.toUpperCase((char)ref.getBase())); + } else if ( ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphic() )){ + logger.debug("SNP in mask found at " + ref.getLocus().toString()); + + if ( lowerCaseSNPs ) { + sequence.append(Character.toLowerCase((char) ref.getBase())); + } else { + sequence.append((char) BaseUtils.N); + } + + rawSequence.append(Character.toUpperCase((char) ref.getBase())); + } else if ( mask.isSNP() ) { + logger.debug("SNP in mask found at "+ref.getLocus().toString()+" but was either filtered or monomorphic"); + sequence.append((Character.toUpperCase((char) ref.getBase()))); + rawSequence.append(Character.toUpperCase((char) ref.getBase())); + } + } + + return 1; + } + + public Integer reduce(Integer i, Integer j) { + return 0; + } + + public void onTraversalDone(Integer fin ) { + validateSequence(); + if ( doNotUseBWA ) { + lowerRepeats(); + } else { + lowerNonUniqueSegments(); + aligner.close(); + } + print(); + } + + public void validateSequence() { + // code for ensuring primer sequence is valid goes here + + // validate that there are no masked sites near to the variant site + String seq = sequence.toString(); + int start = seq.indexOf('[') - 4; + int end = seq.indexOf(']') + 5; + + if ( start < 50 ) { + logger.warn("There is not enough sequence before the start position of the probed allele for adequate probe design. This site will not be designed."); + sequenceInvalid = true; + invReason.add("START_TOO_CLOSE"); + } else if ( end > seq.length() - 50 ) { + logger.warn("There is not enough sequence after the end position of the probed allele fore adequate probe design. This site will not be desinged. "); + sequenceInvalid = true; + invReason.add("END_TOO_CLOSE"); + } else { + boolean maskNearVariantSite = false; + for ( int i = start; i < end; i++ ) { + maskNearVariantSite |= (seq.charAt(i) == 'N' || Character.isLowerCase(seq.charAt(i))); + } + + if ( maskNearVariantSite ) { + logger.warn("There is one (or more) mask variants within 4 basepair of the variant given for validation. This site will not be designed."); + sequenceInvalid = true; + invReason.add("VARIANT_TOO_NEAR_PROBE"); + } + } + + if ( seq.indexOf("[") != seq.lastIndexOf("[") ) { + logger.warn("Multiple probe variants were found within this interval. Please fix the definitions of the intervals so they do not overlap."); + sequenceInvalid = true; + invReason.add("MULTIPLE_PROBES"); + } + + if ( seq.indexOf("[") < 0 ) { + logger.warn("No variants in region were found. This site will not be designed."); + sequenceInvalid = true; + invReason.add("NO_VARIANTS_FOUND"); + } + } + + public void lowerNonUniqueSegments() { + if ( ! invReason.contains("MULTIPLE_PROBES") && !invReason.contains("NO_VARIANTS_FOUND") ) { + String leftFlank = rawSequence.toString().split("\\[")[0]; + String rightFlank = rawSequence.toString().split("\\]")[1]; + List badLeft = getBadIndeces(leftFlank); + List badRight = getBadIndeces(rightFlank); + // propagate lowercases into the printed sequence + for ( int idx = 0; idx < leftFlank.length(); idx++ ) { + while ( badLeft.size() > 0 && idx > badLeft.get(0) + virtualPrimerSize ) { + badLeft.remove(0); + } + + if ( badLeft.size() > 0 && badLeft.get(0) <= idx && idx <= badLeft.get(0) + virtualPrimerSize ) { + sequence.setCharAt(idx,Character.toLowerCase(sequence.charAt(idx))); + } + } + + int offset = 1 + rawSequence.indexOf("]"); + for ( int i= 0; i < rightFlank.length(); i++ ) { + int idx = i + offset; + while ( badRight.size() > 0 && i > badRight.get(0) + virtualPrimerSize ) { + //logger.debug("Removing "+badRight.get(0)+" because "+(badRight.get(0)+virtualPrimerSize)+" < "+i); + badRight.remove(0); + } + + if ( badRight.size() > 0 && badRight.get(0) <= i && i <= badRight.get(0) + virtualPrimerSize ) { + //logger.debug("Resetting character on right flank: "+idx+" "+i+" offset="+offset); + //logger.debug(sequence); + sequence.setCharAt(idx,Character.toLowerCase(sequence.charAt(idx))); + //logger.debug(sequence); + } + } + } + } + + private List getBadIndeces(String sequence) { + + List badLeftIndeces = new ArrayList(sequence.length()-virtualPrimerSize); + for ( int i = 0; i < sequence.length()-virtualPrimerSize ; i++ ) { + String toAlign = sequence.substring(i,i+virtualPrimerSize); + Iterable allAlignments = aligner.getAllAlignments(toAlign.getBytes()); + for ( Alignment[] alignments : allAlignments ) { + if ( alignments.length > 1 ) { + if ( alignments[0].getMappingQuality() == 0 ) { + // this region is bad -- multiple MQ alignments + badLeftIndeces.add(i); + } + } + } + } + + return badLeftIndeces; + } + + + /** + * Note- this is an old function - a proxy for identifying regions with low specificity to genome. Saved in case the alignment-based version + * turns out to be worse than just doing a simple repeat-lowering method. + */ + public void lowerRepeats() { + // convert to lower case low-complexity repeats, e.g. tandem k-mers + final int K_LIM = 8; + String seq = sequence.toString(); + StringBuilder newSequence = new StringBuilder(); + int start_pos = 0; + while( start_pos < seq.length() ) { + boolean broke = false; + for ( int length = K_LIM; length > 1; length -- ) { + //logger.debug(String.format("start1: %d end1: %d start2: %d end2: %d str: %d",start_pos,start_pos+length,start_pos+length,start_pos+2*length,seq.length())); + if ( start_pos + 2*length> seq.length() ) { + continue; + } + if ( equalsIgnoreNs(seq.substring(start_pos,start_pos+length),seq.substring(start_pos+length,start_pos+2*length)) ) { + newSequence.append(seq.substring(start_pos,start_pos+length).toLowerCase()); + newSequence.append(seq.substring(start_pos+length,start_pos+2*length).toLowerCase()); + start_pos += 2*length; + broke = true; + break; + } + } + + if ( ! broke ) { + newSequence.append(seq.substring(start_pos,start_pos+1)); + start_pos++; + } + + } + + if ( seq.indexOf("[") != seq.lastIndexOf("[") ) { + return; + } + + sequence = newSequence; + } + + public boolean equalsIgnoreNs(String one, String two) { + if ( one.length() != two.length() ) { return false; } + for ( int idx = 0; idx < one.length(); idx++ ) { + if ( Character.toUpperCase(one.charAt(idx)) != Character.toUpperCase(two.charAt(idx)) ) { + if ( Character.toUpperCase(one.charAt(idx)) != 'N' && Character.toUpperCase(two.charAt(idx)) != 'N' ) { + return false; + } + } + } + + //logger.debug(String.format("one: %s two: %s",one,two)); + + return true; + } + + public void print() { + String valid; + if ( sequenceInvalid ) { + valid = ""; + while ( invReason.size() > 0 ) { + String reason = invReason.get(0); + invReason.remove(reason); + int num = 1; + while ( invReason.contains(reason) ) { + num++; + invReason.remove(reason); + } + valid += String.format("%s=%d,",reason,num); + } + } else { + valid = "Valid"; + } + + String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D'); + out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 15d808ebe..fe3173506 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -1,14 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; @@ -20,23 +18,21 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.*; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import net.sf.picard.reference.FastaSequenceFile; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import net.sf.picard.reference.ReferenceSequence; -import java.io.FileNotFoundException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; +import java.io.FileNotFoundException; import java.io.PrintStream; import java.lang.reflect.Field; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompEvalGenotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompEvalGenotypes.java index 787dbe9af..925bff9c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompEvalGenotypes.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompEvalGenotypes.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.variantcontext.Genotype; class NewCompEvalGenotypes { private GenomeLoc loc; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java index 76db330ed..255a54737 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; /** * The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java index c4277adc9..8c281b2f8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; @Analysis(description = "Counts different classes of variants in the sample") public class CountVariants extends VariantEvaluator implements StandardEval { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java index 4b56cf130..bbd3f5f54 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java @@ -1,18 +1,18 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java index 3d14dd0e5..a476a2680 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java @@ -1,20 +1,20 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.phasing.AllelePair; import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.HashMap; import java.util.HashSet; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java index 5daf33a9f..77def0f30 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; /** * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java index eca6c5193..6e1b76acd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java index 48b06d532..d99196ecf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java @@ -1,15 +1,15 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; import org.broadinstitute.sting.utils.IndelUtils; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java index 85e0b5889..a0cc393d9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java @@ -1,21 +1,15 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.Arrays; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; /** * Mendelian violation detection and counting diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java index 7d54d0df8..b209ee13d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java @@ -24,12 +24,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; @Analysis(name = "PrintMissingComp", description = "the overlap between eval and comp sites") public class PrintMissingComp extends VariantEvaluator { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java index 5f3e6b0fa..751f61a97 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.variantcontext.Genotype; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java index deed05508..d466645ea 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java @@ -1,18 +1,18 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Degeneracy; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Sample; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.ArrayList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java index 89c67cfe9..ec43cbd55 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java index 8811dc001..be957abd7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @Analysis(description = "Ti/Tv Variant Evaluator") public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEval { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java index 405f35635..9c331b577 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java @@ -1,17 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Set; /** * The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java index 6017ecca3..e29e7ed50 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext; import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; public abstract class VariantEvaluator { public void initialize(VariantEvalWalker walker) {} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java index 4af14810b..b6ad55b18 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java @@ -25,16 +25,16 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.ArrayList; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index ff59c9e29..411493d4f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -1,12 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; @@ -43,9 +42,9 @@ public class AlleleCount extends VariantStratifier { if (eval != null) { int AC = -1; - if ( eval.hasAttribute("AC") ) + if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) { AC = eval.getAttributeAsInt("AC"); - else if ( eval.isVariant() ) { + } else if ( eval.isVariant() ) { for (Allele allele : eval.getAlternateAlleles()) AC = Math.max(AC, eval.getChromosomeCount(allele)); } else diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java index 48b4ffa91..2ffc7716c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java index 9942ba8d6..c6975808f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java index 1a9d31085..c14355035 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java index 15b1d41c1..3e8a6ed17 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java index 65af6090c..155a66186 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.HashMap; -import java.util.Set; import java.util.HashSet; +import java.util.Set; public class Degeneracy extends VariantStratifier { private ArrayList states; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java index 7bd15a974..40f952fd2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java index 8d8782ab7..3b7a419f2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java index fd724d6d1..c6c094f8e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java index a7ccd3182..76efedbf4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index 39d2e6b4d..a0973a088 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Collection; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java index f909de4f3..a2a3eb3fb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java index 218cb23ca..2c4b8bc46 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/Analysis.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java similarity index 80% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/Analysis.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java index 129d5a95d..2b37ce210 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/Analysis.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.tags; +package org.broadinstitute.sting.gatk.walkers.varianteval.util; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java index c8d917040..db44e9e28 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java @@ -23,8 +23,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.lang.annotation.Annotation; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/DataPoint.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/DataPoint.java similarity index 77% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/DataPoint.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/DataPoint.java index 3ba448049..396843252 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/DataPoint.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/DataPoint.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.tags; +package org.broadinstitute.sting.gatk.walkers.varianteval.util; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java index 3208c26bb..8112ae97f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -9,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvalu import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.HashMap; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java index 0281653af..38f7a7f40 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; -import org.apache.commons.jexl2.*; +import org.apache.commons.jexl2.Expression; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; public class SortableJexlVCMatchExp extends VariantContextUtils.JexlVCMatchExp implements Comparable { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index b8e45e462..0a915db37 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -1,24 +1,21 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; -import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.StandardEval; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.RequiredStratification; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.StandardStratification; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.lang.reflect.Field; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 7957d35cd..403c67d3e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -25,23 +25,23 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.FileNotFoundException; -import java.lang.Double; import java.util.*; /** @@ -55,7 +55,6 @@ import java.util.*; public class ApplyRecalibration extends RodWalker { - ///////////////////////////// // Inputs ///////////////////////////// @@ -86,6 +85,7 @@ public class ApplyRecalibration extends RodWalker { final private List tranches = new ArrayList(); final private Set inputNames = new HashSet(); final private NestedHashMap lodMap = new NestedHashMap(); + final private NestedHashMap annotationMap = new NestedHashMap(); final private Set ignoreInputFilterSet = new TreeSet(); //--------------------------------------------------------------------------------------------------------------- @@ -124,6 +124,7 @@ public class ApplyRecalibration extends RodWalker { final Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "Log odds ratio of being a true variant versus being false under the trained gaussian mixture model")); + hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.CULPRIT_KEY, 1, VCFHeaderLineType.String, "The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out")); final TreeSet samples = new TreeSet(); samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)); @@ -149,6 +150,7 @@ public class ApplyRecalibration extends RodWalker { for ( final String line : new XReadLines( RECAL_FILE ) ) { final String[] vals = line.split(","); lodMap.put( Double.parseDouble(vals[3]), vals[0], Integer.parseInt(vals[1]), Integer.parseInt(vals[2]) ); // value comes before the keys + annotationMap.put( vals[4], vals[0], Integer.parseInt(vals[1]), Integer.parseInt(vals[2]) ); // value comes before the keys } } catch ( FileNotFoundException e ) { throw new UserException.CouldNotReadInputFile(RECAL_FILE, e); @@ -174,11 +176,15 @@ public class ApplyRecalibration extends RodWalker { String filterString = null; final Map attrs = new HashMap(vc.getAttributes()); final Double lod = (Double) lodMap.get( vc.getChr(), vc.getStart(), vc.getEnd() ); + final String worstAnnotation = (String) annotationMap.get( vc.getChr(), vc.getStart(), vc.getEnd() ); if( lod == null ) { throw new UserException("Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + vc ); } + // Annotate the new record with its VQSLOD and the worst performing annotation attrs.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod)); + attrs.put(VariantRecalibrator.CULPRIT_KEY, worstAnnotation); + for( int i = tranches.size() - 1; i >= 0; i-- ) { final Tranche tranche = tranches.get(i); if( lod >= tranche.minVQSLod ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java index 41fea0896..17461de2f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import Jama.Matrix; +import cern.jet.random.Normal; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.MathUtils; @@ -214,6 +215,19 @@ public class GaussianMixtureModel { return MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k)) } + public Double evaluateDatumInOneDimension( final VariantDatum datum, final int iii ) { + if(datum.isNull[iii]) { return null; } + + final Normal normal = new Normal(0.0, 1.0, null); + final double[] pVarInGaussianLog10 = new double[gaussians.size()]; + int gaussianIndex = 0; + for( final MultivariateGaussian gaussian : gaussians ) { + normal.setState( gaussian.mu[iii], gaussian.sigma.get(iii, iii) ); + pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + Math.log10( normal.pdf( datum.annotations[iii] ) ); + } + return MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k)) + } + public double evaluateDatumMarginalized( final VariantDatum datum ) { int numSamples = 0; double sumPVarInGaussian = 0.0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java index 67132b133..6c1a7ddbc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java @@ -39,6 +39,7 @@ public class TrainingSet { public String name; public boolean isKnown = false; public boolean isTraining = false; + public boolean isAntiTraining = false; public boolean isTruth = false; public boolean isConsensus = false; public double prior = 0.0; @@ -47,17 +48,24 @@ public class TrainingSet { public TrainingSet( final String name, final Tags tags ) { this.name = name; + + // Parse the tags to decide which tracks have which properties if( tags != null ) { isKnown = tags.containsKey("known") && tags.getValue("known").equals("true"); isTraining = tags.containsKey("training") && tags.getValue("training").equals("true"); + isAntiTraining = tags.containsKey("bad") && tags.getValue("bad").equals("true"); isTruth = tags.containsKey("truth") && tags.getValue("truth").equals("true"); isConsensus = tags.containsKey("consensus") && tags.getValue("consensus").equals("true"); prior = ( tags.containsKey("prior") ? Double.parseDouble(tags.getValue("prior")) : prior ); } - if( !isConsensus ) { + + // Report back to the user which tracks were found and the properties that were detected + if( !isConsensus && !isAntiTraining ) { logger.info( String.format( "Found %s track: \tKnown = %s \tTraining = %s \tTruth = %s \tPrior = Q%.1f", this.name, isKnown, isTraining, isTruth, prior) ); - } else { + } else if( isConsensus ) { logger.info( String.format( "Found consensus track: %s", this.name) ); + } else { + logger.info( String.format( "Found bad sites training track: %s", this.name) ); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java index 64fe36637..15424f0f7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java @@ -25,11 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import java.io.*; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; import java.util.*; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java index 5deb5d8c2..bc7252ec2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java @@ -26,10 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index e1a076e76..67d54a408 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -34,9 +33,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; /** * Created by IntelliJ IDEA. @@ -82,19 +84,11 @@ public class VariantDataManager { } foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || (theSTD < 1E-6); - if( annotationKeys.get(iii).toLowerCase().contains("ranksum") ) { // BUGBUG: to clean up - for( final VariantDatum datum : data ) { - if( datum.annotations[iii] > 0.0 ) { datum.annotations[iii] /= 3.0; } - } - } meanVector[iii] = theMean; varianceVector[iii] = theSTD; for( final VariantDatum datum : data ) { + // Transform each data point via: (x - mean) / standard deviation datum.annotations[iii] = ( datum.isNull[iii] ? GenomeAnalysisEngine.getRandomGenerator().nextGaussian() : ( datum.annotations[iii] - theMean ) / theSTD ); - // Each data point is now [ (x - mean) / standard deviation ] - if( annotationKeys.get(iii).toLowerCase().contains("ranksum") && datum.isNull[iii] && datum.annotations[iii] > 0.0 ) { - datum.annotations[iii] /= 3.0; - } } } if( foundZeroVarianceAnnotation ) { @@ -108,7 +102,6 @@ public class VariantDataManager { remove = remove || (Math.abs(val) > VRAC.STD_THRESHOLD); } datum.failingSTDThreshold = remove; - datum.usedForTraining = 0; } } @@ -142,38 +135,47 @@ public class VariantDataManager { for( final VariantDatum datum : data ) { if( datum.atTrainingSite && !datum.failingSTDThreshold && datum.originalQual > VRAC.QUAL_THRESHOLD ) { trainingData.add( datum ); - datum.usedForTraining = 1; } } logger.info( "Training with " + trainingData.size() + " variants after standard deviation thresholding." ); if( trainingData.size() < VRAC.MIN_NUM_BAD_VARIANTS ) { - logger.warn("WARNING: Training with very few variant sites! Please check the model reporting PDF to ensure the quality of the model is reliable."); + logger.warn( "WARNING: Training with very few variant sites! Please check the model reporting PDF to ensure the quality of the model is reliable." ); } return trainingData; } public ExpandingArrayList selectWorstVariants( double bottomPercentage, final int minimumNumber ) { - Collections.sort( data ); + // The return value is the list of training variants final ExpandingArrayList trainingData = new ExpandingArrayList(); - final int numToAdd = Math.max( minimumNumber, Math.round((float)bottomPercentage * data.size()) ); - if( numToAdd > data.size() ) { - throw new UserException.BadInput("Error during negative model training. Minimum number of variants to use in training is larger than the whole call set. One can attempt to lower the --minNumBadVariants arugment but this is unsafe."); + + // First add to the training list all sites overlapping any bad sites training tracks + for( final VariantDatum datum : data ) { + if( datum.atAntiTrainingSite && !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) { + trainingData.add( datum ); + } } - if( numToAdd == minimumNumber ) { - logger.warn("WARNING: Training with very few variant sites! Please check the model reporting PDF to ensure the quality of the model is reliable."); + final int numBadSitesAdded = trainingData.size(); + logger.info( "Found " + numBadSitesAdded + " variants overlapping bad sites training tracks." ); + + // Next sort the variants by the LOD coming from the positive model and add to the list the bottom X percent of variants + Collections.sort( data ); + final int numToAdd = Math.max( minimumNumber - trainingData.size(), Math.round((float)bottomPercentage * data.size()) ); + if( numToAdd > data.size() ) { + throw new UserException.BadInput( "Error during negative model training. Minimum number of variants to use in training is larger than the whole call set. One can attempt to lower the --minNumBadVariants arugment but this is unsafe." ); + } else if( numToAdd == minimumNumber - trainingData.size() ) { + logger.warn( "WARNING: Training with very few variant sites! Please check the model reporting PDF to ensure the quality of the model is reliable." ); bottomPercentage = ((float) numToAdd) / ((float) data.size()); } - int index = 0; - int numAdded = 0; + int index = 0, numAdded = 0; while( numAdded < numToAdd ) { final VariantDatum datum = data.get(index++); - if( !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) { + if( !datum.atAntiTrainingSite && !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) { + datum.atAntiTrainingSite = true; trainingData.add( datum ); - datum.usedForTraining = -1; numAdded++; } } - logger.info("Training with worst " + (float) bottomPercentage * 100.0f + "% of passing data --> " + trainingData.size() + " variants with LOD <= " + String.format("%.4f", data.get(index).lod) + "."); + logger.info( "Additionally training with worst " + String.format("%.3f", (float) bottomPercentage * 100.0f) + "% of passing data --> " + (trainingData.size() - numBadSitesAdded) + " variants with LOD <= " + String.format("%.4f", data.get(index).lod) + "." ); return trainingData; } @@ -186,10 +188,11 @@ public class VariantDataManager { returnData.add(datum); } } - // add an extra 5% of points from bad training set, since that set is small but interesting + + // Add an extra 5% of points from bad training set, since that set is small but interesting for( int iii = 0; iii < Math.floor(0.05*numToAdd); iii++) { final VariantDatum datum = data.get(GenomeAnalysisEngine.getRandomGenerator().nextInt(data.size())); - if( datum.usedForTraining == -1 && !datum.failingSTDThreshold ) { returnData.add(datum); } + if( datum.atAntiTrainingSite && !datum.failingSTDThreshold ) { returnData.add(datum); } else { iii--; } } @@ -232,23 +235,15 @@ public class VariantDataManager { double value; try { - if( annotationKey.equalsIgnoreCase("QUAL") ) { - value = vc.getPhredScaledQual(); - } else if( annotationKey.equalsIgnoreCase("DP") ) { - value = Double.parseDouble( (String)vc.getAttribute( "DP" ) ) / Double.parseDouble( (String)vc.getAttribute( "AN" ) ); - } else { - value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) ); - if( Double.isInfinite(value) ) { value = Double.NaN; } - if( annotationKey.equalsIgnoreCase("InbreedingCoeff") && value > 0.05 ) { value = Double.NaN; } - if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM - value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); - } - if( annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } - if( annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.01) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } + value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) ); + if( Double.isInfinite(value) ) { value = Double.NaN; } + if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM + value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } - + if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } + if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } } catch( Exception e ) { - value = Double.NaN; // The VQSR works with missing data now by marginalizing over the missing dimension when evaluating Gaussians + value = Double.NaN; // The VQSR works with missing data by marginalizing over the missing dimension when evaluating the Gaussian mixture model } return value; @@ -258,8 +253,10 @@ public class VariantDataManager { datum.isKnown = false; datum.atTruthSite = false; datum.atTrainingSite = false; + datum.atAntiTrainingSite = false; datum.prior = 2.0; datum.consensusCount = 0; + for( final TrainingSet trainingSet : trainingSets ) { for( final VariantContext trainVC : tracker.getVariantContexts( ref, trainingSet.name, null, context.getLocation(), false, false ) ) { if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && @@ -272,13 +269,19 @@ public class VariantDataManager { datum.prior = Math.max( datum.prior, trainingSet.prior ); datum.consensusCount += ( trainingSet.isConsensus ? 1 : 0 ); } + if( trainVC != null ) { + datum.atAntiTrainingSite = datum.atAntiTrainingSite || trainingSet.isAntiTraining; + } + } } } public void writeOutRecalibrationTable( final PrintStream RECAL_FILE ) { for( final VariantDatum datum : data ) { - RECAL_FILE.println(String.format("%s,%d,%d,%.4f", datum.contig, datum.start, datum.stop, datum.lod)); + RECAL_FILE.println(String.format("%s,%d,%d,%.4f,%s", + datum.contig, datum.start, datum.stop, datum.lod, + (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"))); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java index 8295ec205..eb9e98fcb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java @@ -39,16 +39,17 @@ public class VariantDatum implements Comparable { public double lod; public boolean atTruthSite; public boolean atTrainingSite; + public boolean atAntiTrainingSite; public boolean isTransition; public boolean isSNP; public boolean failingSTDThreshold; public double originalQual; public double prior; public int consensusCount; - public int usedForTraining; public String contig; public int start; public int stop; + public int worstAnnotation; public MultivariateGaussian assignment; // used in K-means implementation public int compareTo( final VariantDatum other ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index a77c5962c..8179463eb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -25,14 +25,12 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; @@ -42,6 +40,8 @@ import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; import java.io.FileNotFoundException; @@ -60,6 +60,7 @@ import java.util.*; public class VariantRecalibrator extends RodWalker, ExpandingArrayList> implements TreeReducible> { public static final String VQS_LOD_KEY = "VQSLOD"; + public static final String CULPRIT_KEY = "culprit"; @ArgumentCollection private VariantRecalibratorArgumentCollection VRAC = new VariantRecalibratorArgumentCollection(); @@ -175,7 +176,6 @@ public class VariantRecalibrator extends RodWalker data, final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel ) { + for( final VariantDatum datum : data ) { + int worstAnnotation = -1; + double minProb = Double.MAX_VALUE; + for( int iii = 0; iii < datum.annotations.length; iii++ ) { + final Double goodProbLog10 = goodModel.evaluateDatumInOneDimension(datum, iii); + final Double badProbLog10 = badModel.evaluateDatumInOneDimension(datum, iii); + if( goodProbLog10 != null && badProbLog10 != null ) { + final double prob = goodProbLog10 - badProbLog10; + if(prob < minProb) { minProb = prob; worstAnnotation = iii; } + } + } + datum.worstAnnotation = worstAnnotation; + } + } + + ///////////////////////////// // Private Methods used for generating a GaussianMixtureModel ///////////////////////////// diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 597a54dc1..9c2a520ef 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -25,12 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.apache.poi.hpsf.Variant; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.Requires; @@ -38,11 +39,10 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; @@ -150,7 +150,7 @@ public class CombineVariants extends RodWalker { // get all of the vcf rods at this locus // Need to provide reference bases to simpleMerge starting at current locus - Collection vcs = tracker.getAllVariantContexts(ref, null,context.getLocation(), true, false); + Collection vcs = tracker.getAllVariantContexts(ref, null, context.getLocation(), true, false); if ( sitesOnlyVCF ) { vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs); @@ -173,17 +173,25 @@ public class CombineVariants extends RodWalker { if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN)) return 0; - VariantContext mergedVC; + List mergedVCs = new ArrayList(); if ( master ) { - mergedVC = VariantContextUtils.masterMerge(vcs, "master"); + mergedVCs.add(VariantContextUtils.masterMerge(vcs, "master")); } else { - mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, filteredRecordsMergeType, - genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC); + Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); + // iterate over the types so that it's deterministic + for ( VariantContext.Type type : VariantContext.Type.values() ) { + if ( VCsByType.containsKey(type) ) + mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), + priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, + ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); + } } - //out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC); + for ( VariantContext mergedVC : mergedVCs ) { + // only operate at the start of events + if ( mergedVC == null ) + continue; - if ( mergedVC != null ) { // only operate at the start of events HashMap attributes = new HashMap(mergedVC.getAttributes()); // re-compute chromosome counts VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index ba6f5e513..b45ee1b67 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -24,18 +24,21 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Arrays; +import java.util.Collection; +import java.util.Map; +import java.util.Set; /** * Filters a lifted-over VCF file for ref bases that have been changed. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index d9dd85f0c..2ebd183f4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -28,21 +28,17 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.SortingVCFWriter; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 7eda54387..4f05c8aac 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -24,27 +24,27 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; - -import java.io.File; -import java.util.*; - import net.sf.picard.PicardException; import net.sf.picard.liftover.LiftOver; import net.sf.picard.util.Interval; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileReader; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RMD; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; + +import java.io.File; +import java.util.*; /** * Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index 0c41a9728..f0756d884 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -24,11 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -39,11 +34,15 @@ import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; /** * Takes a VCF file, randomly splits variants into two different sets, and outputs 2 new VCFs with the results. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 963aa0ce5..ac6797609 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -24,27 +24,39 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; +import java.lang.annotation.AnnotationFormatError; import java.util.*; /** @@ -75,12 +87,31 @@ public class SelectVariants extends RodWalker { @Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci in the analysis.", required=false) private boolean EXCLUDE_FILTERED = false; + @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't include filtered loci.", required=false) + private boolean KEEP_ORIGINAL_CHR_COUNTS = false; + @Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called on a ROD comparison track. Use -disc ROD_NAME", required=false) private String discordanceRodName = ""; @Argument(fullName="concordance", shortName = "conc", doc="Output variants that were also called on a ROD comparison track. Use -conc ROD_NAME", required=false) private String concordanceRodName = ""; + @Hidden + @Argument(fullName="inputAF", shortName = "inputAF", doc="", required=false) + private String inputAFRodName = ""; + + @Hidden + @Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false) + private boolean KEEP_AF_SPECTRUM = false; + + @Hidden + @Argument(fullName="afFile", shortName="afFile", doc="The output recal file used by ApplyRecalibration", required=false) + private File AF_FILE = new File(""); + + @Hidden + @Argument(fullName="family_structure_file", shortName="familyFile", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) + private File FAMILY_STRUCTURE_FILE = null; + @Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) private String FAMILY_STRUCTURE = ""; @@ -102,6 +133,9 @@ public class SelectVariants extends RodWalker { @Argument(fullName="selectIndels", shortName="indels", doc="Select only Indels.", required=false) private boolean SELECT_INDELS = false; + @Hidden + @Argument(fullName="outMVFile", shortName="outMVFile", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) + private String outMVFile = null; /* Private class used to store the intermediate variants in the integer random selection process */ private class RandomVariantStructure { @@ -129,7 +163,7 @@ public class SelectVariants extends RodWalker { private boolean DISCORDANCE_ONLY = false; private boolean CONCORDANCE_ONLY = false; - private MendelianViolation mv; + private Set mvSet = new HashSet(); /* default name for the variant dataset (VCF) */ private final String variantRodName = "variant"; @@ -144,8 +178,14 @@ public class SelectVariants extends RodWalker { private RandomVariantStructure [] variantArray; + /* Variables used for random selection with AF boosting */ + private ArrayList afBreakpoints = null; + private ArrayList afBoosts = null; + double bkDelta = 0.0; + private PrintStream outMVFileStream = null; + /** * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher @@ -178,6 +218,12 @@ public class SelectVariants extends RodWalker { // Initialize VCF header Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger); headerLines.add(new VCFHeaderLine("source", "SelectVariants")); + + if (KEEP_ORIGINAL_CHR_COUNTS) { + headerLines.add(new VCFFormatHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC")); + headerLines.add(new VCFFormatHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF")); + headerLines.add(new VCFFormatHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN")); + } vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) { @@ -195,10 +241,29 @@ public class SelectVariants extends RodWalker { CONCORDANCE_ONLY = concordanceRodName.length() > 0; if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceRodName); - if (MENDELIAN_VIOLATIONS) - mv = new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD); + if (MENDELIAN_VIOLATIONS) { + if ( FAMILY_STRUCTURE_FILE != null) { + try { + for ( final String line : new XReadLines( FAMILY_STRUCTURE_FILE ) ) { + MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD); + if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom())) + mvSet.add(mv); + } + } catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(AF_FILE, e); + } + if (outMVFile != null) + try { + outMVFileStream = new PrintStream(outMVFile); + } + catch (FileNotFoundException e) { + throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); } + } + else + mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD)); + } else if (!FAMILY_STRUCTURE.isEmpty()) { - mv = new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD); + mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); MENDELIAN_VIOLATIONS = true; } @@ -210,6 +275,33 @@ public class SelectVariants extends RodWalker { SELECT_RANDOM_FRACTION = fractionRandom > 0; if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + fractionRandom + "% of the variants at random from the variant track"); + + + if (KEEP_AF_SPECTRUM) { + try { + afBreakpoints = new ArrayList(); + afBoosts = new ArrayList(); + logger.info("Reading in AF boost table..."); + boolean firstLine = false; + for ( final String line : new XReadLines( AF_FILE ) ) { + if (!firstLine) { + firstLine = true; + continue; + } + final String[] vals = line.split(" "); + + double bkp = Double.valueOf(vals[0]); + double afb = Double.valueOf(vals[1]); + afBreakpoints.add(bkp); + afBoosts.add(afb); + + } + bkDelta = afBreakpoints.get(0); + } catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(AF_FILE, e); + } + + } } /** @@ -233,9 +325,24 @@ public class SelectVariants extends RodWalker { for (VariantContext vc : vcs) { if (MENDELIAN_VIOLATIONS) { - if (!mv.isViolation(vc)) { - break; + boolean foundMV = false; + for (MendelianViolation mv : mvSet) { + if (mv.isViolation(vc)) { + foundMV = true; + //System.out.println(vc.toString()); + if (outMVFile != null) + outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + + "childG=%s childGL=%s\n",vc.getChr(), vc.getStart(), + vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getChromosomeCount(vc.getAlternateAllele(0)), + mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(), + vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), + vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), + vc.getGenotype(mv.getSampleChild()).toBriefString(),vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString() ); + } } + + if (!foundMV) + break; } if (DISCORDANCE_ONLY) { Collection compVCs = tracker.getVariantContexts(ref, discordanceRodName, null, context.getLocation(), true, false); @@ -266,9 +373,61 @@ public class SelectVariants extends RodWalker { if (SELECT_RANDOM_NUMBER) { randomlyAddVariant(++variantNumber, sub, ref.getBase()); } - else if (!SELECT_RANDOM_FRACTION || GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom) { + else if (!SELECT_RANDOM_FRACTION || (!KEEP_AF_SPECTRUM && GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { vcfWriter.add(sub, ref.getBase()); } + else { + if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) { + // ok we have a comp VC and we need to match the AF spectrum of inputAFRodName. + // We then pick a variant with probablity AF*desiredFraction + if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) { + String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY); + + double af; + double afBoost = 1.0; + if (afo.contains(",")) { + String[] afs = afo.split(","); + afs[0] = afs[0].substring(1,afs[0].length()); + afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1); + + double[] afd = new double[afs.length]; + + for (int k=0; k < afd.length; k++) + afd[k] = Double.valueOf(afs[k]); + + af = MathUtils.arrayMax(afd); + //af = Double.valueOf(afs[0]); + + } + else + af = Double.valueOf(afo); + + // now boost af by table read from file if desired + //double bkpt = 0.0; + int bkidx = 0; + if (!afBreakpoints.isEmpty()) { + for ( Double bkpt : afBreakpoints) { + if (af < bkpt + bkDelta) + break; + else bkidx++; + } + if (bkidx >=afBoosts.size()) + bkidx = afBoosts.size()-1; + afBoost = afBoosts.get(bkidx); + //System.out.formatPrin("af:%f bkidx:%d afboost:%f\n",af,bkidx,afBoost); + + + + } + + //System.out.format("%s .. %4.4f\n",afo.toString(), af); + if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * afBoost * afBoost) + vcfWriter.add(sub, ref.getBase()); + } + + + } + } } } @@ -350,8 +509,8 @@ public class SelectVariants extends RodWalker { private boolean haveSameGenotypes(Genotype g1, Genotype g2) { if ((g1.isCalled() && g2.isFiltered()) || - (g2.isCalled() && g1.isFiltered()) || - (g1.isFiltered() && g2.isFiltered() && EXCLUDE_FILTERED)) + (g2.isCalled() && g1.isFiltered()) || + (g1.isFiltered() && g2.isFiltered() && EXCLUDE_FILTERED)) return false; List a1s = g1.getAlleles(); @@ -384,7 +543,7 @@ public class SelectVariants extends RodWalker { * @param vc the VariantContext record to subset * @param samples the samples to extract * @return the subsetted VariantContext - */ + */ private VariantContext subsetRecord(VariantContext vc, Set samples) { if ( samples == null || samples.isEmpty() ) return vc; @@ -394,7 +553,7 @@ public class SelectVariants extends RodWalker { if ( samples.contains(genotypePair.getKey()) ) genotypes.add(genotypePair.getValue()); } - + VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles()); HashMap attributes = new HashMap(sub.getAttributes()); @@ -404,7 +563,7 @@ public class SelectVariants extends RodWalker { Genotype g = sub.getGenotype(sample); if (g.isNotFiltered() && g.isCalled()) { - + String dp = (String) g.getAttribute("DP"); if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) { depth += Integer.valueOf(dp); @@ -413,6 +572,16 @@ public class SelectVariants extends RodWalker { } + if (KEEP_ORIGINAL_CHR_COUNTS) { + if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) ) + attributes.put("AC_Orig",attributes.get(VCFConstants.ALLELE_COUNT_KEY)); + if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) ) + attributes.put("AF_Orig",attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY)); + if ( attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY) ) + attributes.put("AN_Orig",attributes.get(VCFConstants.ALLELE_NUMBER_KEY)); + + } + VariantContextUtils.calculateChromosomeCounts(sub,attributes,false); attributes.put("DP", depth); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 1bd73414c..0644c669b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -25,22 +25,25 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.TribbleException; +import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; import java.io.File; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 482679593..86bb3b0e8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -25,20 +25,19 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 71dd5df3f..39358dad5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -24,18 +24,18 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.PrintStream; import java.util.*; @@ -75,17 +75,29 @@ public class VariantsToTable extends RodWalker { // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } }); getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } }); - getters.put("REF", new Getter() { public String get(VariantContext vc) { return vc.getReference().toString(); } }); + getters.put("REF", new Getter() { + public String get(VariantContext vc) { + String x = ""; + if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) { + Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)); + x=x+new String(new byte[]{refByte}); + } + return x+vc.getReference().getDisplayString(); + } + }); getters.put("ALT", new Getter() { public String get(VariantContext vc) { StringBuilder x = new StringBuilder(); int n = vc.getAlternateAlleles().size(); - if ( n == 0 ) return "."; + if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) { + Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)); + x.append(new String(new byte[]{refByte})); + } for ( int i = 0; i < n; i++ ) { if ( i != 0 ) x.append(","); - x.append(vc.getAlternateAllele(i).toString()); + x.append(vc.getAlternateAllele(i).getDisplayString()); } return x.toString(); } @@ -169,6 +181,31 @@ public class VariantsToTable extends RodWalker { throw new UserException(String.format("Missing field %s in vc %s at %s", field, vc.getSource(), vc)); } + if (field.equals("AF") || field.equals("AC")) { + String afo = val; + + double af=0; + if (afo.contains(",")) { + String[] afs = afo.split(","); + afs[0] = afs[0].substring(1,afs[0].length()); + afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1); + + double[] afd = new double[afs.length]; + + for (int k=0; k < afd.length; k++) + afd[k] = Double.valueOf(afs[k]); + + af = MathUtils.arrayMax(afd); + //af = Double.valueOf(afs[0]); + + } + else + if (!afo.equals("NA")) + af = Double.valueOf(afo); + + val = Double.toString(af); + + } vals.add(val); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 7eb49da34..aa0e5987f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -28,26 +28,26 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import net.sf.samtools.util.CloseableIterator; import org.broad.tribble.dbsnp.DbSNPCodec; import org.broad.tribble.dbsnp.DbSNPFeature; -import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; @@ -199,8 +199,8 @@ public class VariantsToVCF extends RodWalker { // setup the header fields Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); - hInfo.add(new VCFHeaderLine("source", "VariantsToVCF")); - hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); + //hInfo.add(new VCFHeaderLine("source", "VariantsToVCF")); + //hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY); for ( VCFHeaderLine field : hInfo ) { diff --git a/public/java/src/org/broadinstitute/sting/jna/clibrary/LibC.java b/public/java/src/org/broadinstitute/sting/jna/clibrary/LibC.java index b01533ee6..b5efcc153 100644 --- a/public/java/src/org/broadinstitute/sting/jna/clibrary/LibC.java +++ b/public/java/src/org/broadinstitute/sting/jna/clibrary/LibC.java @@ -24,7 +24,10 @@ package org.broadinstitute.sting.jna.clibrary; -import com.sun.jna.*; +import com.sun.jna.LastErrorException; +import com.sun.jna.Native; +import com.sun.jna.NativeLong; +import com.sun.jna.Structure; import com.sun.jna.ptr.NativeLongByReference; /** diff --git a/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java b/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java index 0c0579d6f..c7b3de6cf 100644 --- a/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java +++ b/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java @@ -29,7 +29,7 @@ import com.sun.jna.ptr.FloatByReference; import com.sun.jna.ptr.IntByReference; import com.sun.jna.ptr.PointerByReference; import org.broadinstitute.sting.jna.clibrary.JNAUtils; -import org.broadinstitute.sting.jna.clibrary.LibC.*; +import org.broadinstitute.sting.jna.clibrary.LibC.timeval; /* NOTE: This library uses Pointer for some Struct.ByReference members going diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java index fe6758e76..c09c4037e 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java @@ -27,13 +27,16 @@ package org.broadinstitute.sting.queue.extensions.gatk; import net.sf.samtools.BAMIndex; import net.sf.samtools.SAMFileWriter; import org.broad.tribble.Tribble; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import java.io.File; import java.lang.annotation.Annotation; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; public abstract class ArgumentDefinitionField extends ArgumentField { diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java index 2da427d44..e90933504 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java @@ -35,7 +35,10 @@ import java.io.File; import java.io.InputStream; import java.io.OutputStream; import java.lang.annotation.Annotation; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; public abstract class ArgumentField { diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index 21fb44733..5095bd6e5 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -27,17 +27,20 @@ package org.broadinstitute.sting.queue.extensions.gatk; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; +import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadFilter; -import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; +import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; diff --git a/public/java/src/org/broadinstitute/sting/utils/DisjointSet.java b/public/java/src/org/broadinstitute/sting/utils/DisjointSet.java index 5f68d3414..52c18e6d6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/DisjointSet.java +++ b/public/java/src/org/broadinstitute/sting/utils/DisjointSet.java @@ -23,7 +23,9 @@ */ package org.broadinstitute.sting.utils; -import java.util.*; +import java.util.Collection; +import java.util.Set; +import java.util.TreeSet; public class DisjointSet { private ItemNode[] nodes; diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java index 1f8800542..b96923589 100644 --- a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java +++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.Serializable; diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index 18a1e7ffd..a5c6e0537 100644 --- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -25,12 +25,14 @@ package org.broadinstitute.sting.utils; -import com.google.java.contract.*; +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; +import com.google.java.contract.Requires; +import com.google.java.contract.ThrowEnsures; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; - import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java b/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java index 30e1a3f5b..af69ebca6 100755 --- a/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.utils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 50aa0f707..36ed506aa 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -25,17 +25,14 @@ package org.broadinstitute.sting.utils; -import cern.jet.math.Arithmetic; - -import java.math.BigDecimal; -import java.util.*; - import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.collections.PrimitivePair; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.math.BigDecimal; +import java.util.*; + /** * MathUtils is a static class (no instantiation allowed!) with some useful math methods. * diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index a8089ffe8..c6a07b5ce 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -1,12 +1,13 @@ package org.broadinstitute.sting.utils; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Collection; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; diff --git a/public/java/src/org/broadinstitute/sting/utils/PathUtils.java b/public/java/src/org/broadinstitute/sting/utils/PathUtils.java index 47466be20..822d04dfd 100755 --- a/public/java/src/org/broadinstitute/sting/utils/PathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/PathUtils.java @@ -2,10 +2,10 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.List; -import java.util.ArrayList; import java.io.File; import java.io.FilenameFilter; +import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java index 23054e95f..fad2320fc 100755 --- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java @@ -9,9 +9,13 @@ import net.sf.samtools.SAMUtils; * @author Kiran Garimella */ public class QualityUtils { + public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE; public final static double MIN_REASONABLE_ERROR = 0.0001; public final static byte MAX_REASONABLE_Q_SCORE = 40; + public final static byte MIN_USABLE_Q_SCORE = 6; + + public final static int MAPPING_QUALITY_UNAVAILABLE = 255; /** * Private constructor. No instantiating this class! diff --git a/public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java b/public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java index 30257d967..a758df431 100644 --- a/public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java @@ -3,7 +3,9 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; /** * Randomly downsample from a stream of elements. This algorithm is a direct, diff --git a/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java b/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java index 4b7fa3e41..92d73a5ce 100755 --- a/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java +++ b/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java @@ -24,15 +24,14 @@ package org.broadinstitute.sting.utils; +import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import net.sf.samtools.Cigar; - -import java.util.*; - import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.StingException; +import java.util.*; + /** * Created by IntelliJ IDEA. * User: asivache diff --git a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java index c0370064d..f9997bfd8 100755 --- a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java @@ -27,12 +27,12 @@ package org.broadinstitute.sting.utils; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMReadGroupRecord; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java b/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java index a59c5134a..342087b41 100644 --- a/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java +++ b/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java @@ -1,7 +1,10 @@ package org.broadinstitute.sting.utils; +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; +import com.google.java.contract.Requires; + import java.io.PrintStream; -import com.google.java.contract.*; /** * A useful simple system for timing code. This code is not thread safe! diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 4c13d8b18..6a50badce 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -25,13 +25,12 @@ package org.broadinstitute.sting.utils; -import java.util.*; - import net.sf.samtools.util.StringUtil; - import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.collections.Pair; +import java.util.*; + /** * Created by IntelliJ IDEA. * User: depristo diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index 456f0fa0f..ef7cf751e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.utils.baq; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.ReferenceSequence; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java index 24d4152a5..26356a4a4 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java @@ -2,11 +2,10 @@ package org.broadinstitute.sting.utils.baq; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import net.sf.samtools.SAMRecord; import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java b/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java index abcae066f..b95165841 100644 --- a/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java @@ -1,11 +1,12 @@ package org.broadinstitute.sting.utils.bed; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; -import java.util.*; +import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java index 404bd80b6..e65b8f921 100755 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java @@ -29,9 +29,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.reflections.util.ClasspathHelper; -import java.lang.reflect.*; import java.io.File; import java.io.IOException; +import java.lang.reflect.*; import java.net.URL; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 95f3e160c..5449906b2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -4,9 +4,6 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.walkers.ClipReadsWalker; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.Vector; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 031467ed9..988d297f6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.utils.clipreads; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.walkers.ClipReadsWalker; import java.util.ArrayList; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java index a286ce789..fef6c4ea0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java @@ -30,7 +30,6 @@ import org.broad.tribble.readers.LineReader; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.regex.Matcher; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java index 618d9ce79..e169dbdfc 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java @@ -4,11 +4,11 @@ import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.NameAwareCodec; import org.broad.tribble.TribbleException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 01344a117..710127f7a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -7,6 +7,8 @@ import org.broad.tribble.NameAwareCodec; import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.ParsingUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -96,6 +98,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, for ( String str : headerStrings ) { if ( !str.startsWith(VCFHeader.METADATA_INDICATOR) ) { String[] strings = str.substring(1).split(VCFConstants.FIELD_SEPARATOR); + if ( strings.length < VCFHeader.HEADER_FIELDS.values().length ) + throw new TribbleException.InvalidHeader("there are not enough columns present in the header line: " + str); + int arrayIndex = 0; for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) { try { @@ -159,12 +164,11 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, } private Feature reallyDecode(String line) { - try { // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null; // our header cannot be null, we need the genotype sample names and counts - if (header == null) throw new IllegalStateException("VCF Header cannot be null when decoding a record"); + if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record"); if (parts == null) parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)]; @@ -174,17 +178,18 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, // if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data) if (( (header == null || (header != null && !header.hasGenotypingData())) && nParts != NUM_STANDARD_FIELDS) || (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) ) - throw new IllegalArgumentException("There aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) + - " tokens, and saw " + nParts + " )"); + throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) + + " tokens, and saw " + nParts + " )", lineNo); return parseVCFLine(parts); - } catch (TribbleException e) { - throw new TribbleException.InvalidDecodeLine(e.getMessage(), line); - } } protected void generateException(String message) { - throw new TribbleException.InvalidDecodeLine(message, lineNo); + throw new UserException.MalformedVCF(message, lineNo); + } + + private static void generateException(String message, int lineNo) { + throw new UserException.MalformedVCF(message, lineNo); } /** @@ -472,10 +477,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, return true; } - private static void generateException(String message, int lineNo) { - throw new TribbleException.InvalidDecodeLine(message, lineNo); - } - private static int computeForwardClipping(List unclippedAlleles, String ref) { boolean clipping = true; // Note that the computation of forward clipping here is meant only to see whether there is a common diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java index 31251c089..b7f4be39a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java @@ -32,13 +32,14 @@ import org.broad.tribble.index.IndexFactory; import org.broad.tribble.util.LittleEndianOutputStream; import org.broad.tribble.util.ParsingUtils; import org.broad.tribble.util.PositionalStream; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.*; -import java.util.*; import java.lang.reflect.Array; +import java.util.*; /** * this class writes VCF files @@ -123,12 +124,10 @@ public class StandardVCFWriter implements VCFWriter { try { // the file format field needs to be written first - mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n"); + mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_1.getFormatString() + "=" + VCFHeaderVersion.VCF4_1.getVersionString() + "\n"); for ( VCFHeaderLine line : mHeader.getMetaData() ) { - if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) || - line.getKey().equals(VCFHeaderVersion.VCF3_3.getFormatString()) || - line.getKey().equals(VCFHeaderVersion.VCF3_2.getFormatString()) ) + if ( VCFHeaderVersion.isFormatString(line.getKey()) ) continue; // are the records filtered (so we know what to put in the FILTER column of passing records) ? @@ -302,10 +301,7 @@ public class StandardVCFWriter implements VCFWriter { } else { List genotypeAttributeKeys = new ArrayList(); if ( vc.hasGenotypes() ) { - genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); - for ( String key : calcVCFGenotypeKeys(vc) ) { - genotypeAttributeKeys.add(key); - } + genotypeAttributeKeys.addAll(calcVCFGenotypeKeys(vc)); } else if ( mHeader.hasGenotypingData() ) { // this needs to be done in case all samples are no-calls genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); @@ -358,16 +354,8 @@ public class StandardVCFWriter implements VCFWriter { mWriter.write(key); if ( !entry.getValue().equals("") ) { - int numVals = 1; VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key); - if ( metaData != null ) - numVals = metaData.getCount(); - - // take care of unbounded encoding - if ( numVals == VCFInfoHeaderLine.UNBOUNDED ) - numVals = 1; - - if ( numVals > 0 ) { + if ( metaData == null || metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() != 0 ) { mWriter.write("="); mWriter.write(entry.getValue()); } @@ -397,16 +385,22 @@ public class StandardVCFWriter implements VCFWriter { continue; } - writeAllele(g.getAllele(0), alleleMap); - for (int i = 1; i < g.getPloidy(); i++) { - mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED); - writeAllele(g.getAllele(i), alleleMap); - } - List attrs = new ArrayList(genotypeFormatKeys.size()); for ( String key : genotypeFormatKeys ) { - if ( key.equals(VCFConstants.GENOTYPE_KEY) ) + + if ( key.equals(VCFConstants.GENOTYPE_KEY) ) { + if ( !g.isAvailable() ) { + throw new ReviewedStingException("GTs cannot be missing for some samples if they are available for others in the record"); + } + + writeAllele(g.getAllele(0), alleleMap); + for (int i = 1; i < g.getPloidy(); i++) { + mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED); + writeAllele(g.getAllele(i), alleleMap); + } + continue; + } Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4; @@ -423,7 +417,7 @@ public class StandardVCFWriter implements VCFWriter { VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key); if ( metaData != null ) { - int numInFormatField = metaData.getCount(); + int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size()); if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) { // If we have a missing field but multiple values are expected, we need to construct a new string with all fields. // For example, if Number=2, the string has to be ".,." @@ -450,9 +444,10 @@ public class StandardVCFWriter implements VCFWriter { break; } - for (String s : attrs ) { - mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR); - mWriter.write(s); + for (int i = 0; i < attrs.size(); i++) { + if ( i > 0 || genotypeFormatKeys.contains(VCFConstants.GENOTYPE_KEY) ) + mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR); + mWriter.write(attrs.get(i)); } } } @@ -498,10 +493,13 @@ public class StandardVCFWriter implements VCFWriter { private static List calcVCFGenotypeKeys(VariantContext vc) { Set keys = new HashSet(); + boolean sawGoodGT = false; boolean sawGoodQual = false; boolean sawGenotypeFilter = false; for ( Genotype g : vc.getGenotypes().values() ) { keys.addAll(g.getAttributes().keySet()); + if ( g.isAvailable() ) + sawGoodGT = true; if ( g.hasNegLog10PError() ) sawGoodQual = true; if (g.isFiltered() && g.isCalled()) @@ -514,7 +512,17 @@ public class StandardVCFWriter implements VCFWriter { if (sawGenotypeFilter) keys.add(VCFConstants.GENOTYPE_FILTER_KEY); - return ParsingUtils.sortList(new ArrayList(keys)); + List sortedList = ParsingUtils.sortList(new ArrayList(keys)); + + // make sure the GT is first + if ( sawGoodGT ) { + List newList = new ArrayList(sortedList.size()+1); + newList.add(VCFConstants.GENOTYPE_KEY); + newList.addAll(sortedList); + sortedList = newList; + } + + return sortedList; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java index f3c99e963..c29f2ba8b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java @@ -141,8 +141,6 @@ public class VCF3Codec extends AbstractVCFCodec { boolean missing = i >= GTValueSplitSize; if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) { - if (i != 0) - generateException("Saw GT at position " + i + ", but it must be at the first position for genotypes"); genotypeAlleleLocation = i; } else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) { GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]); @@ -156,12 +154,13 @@ public class VCF3Codec extends AbstractVCFCodec { } } - // check to make sure we found a gentoype field - if (genotypeAlleleLocation < 0) generateException("Unable to find required field GT for the record; we don't yet support a missing GT field"); + // check to make sure we found a genotype field + if ( genotypeAlleleLocation < 0 ) + generateException("Unable to find the GT field for the record; the GT field is required"); + if ( genotypeAlleleLocation > 0 ) + generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes"); - // todo -- assuming allele list length in the single digits is bad. Fix me. - // Check for > 1 for haploid genotypes - boolean phased = GTValueArray[genotypeAlleleLocation].length() > 1 && GTValueArray[genotypeAlleleLocation].charAt(1) == '|'; + boolean phased = GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1; // add it to the list try { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java new file mode 100644 index 000000000..a9de949d8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.utils.codecs.vcf; + +/** + * @author ebanks + * A class representing a key=value entry for ALT fields in the VCF header + */ +public class VCFAltHeaderLine extends VCFSimpleHeaderLine { + + /** + * create a VCF filter header line + * + * @param name the name for this header line + * @param description the description for this header line + */ + public VCFAltHeaderLine(String name, String description) { + super(name, description, SupportedHeaderLineType.ALT); + } + + /** + * create a VCF info header line + * + * @param line the header line + * @param version the vcf header version + */ + protected VCFAltHeaderLine(String line, VCFHeaderVersion version) { + super(line, version, SupportedHeaderLineType.ALT); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java index 0fb2940bb..05fff5d9e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java @@ -145,8 +145,6 @@ public class VCFCodec extends AbstractVCFCodec { // todo -- all of these on the fly parsing of the missing value should be static constants if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) { - if (i != 0) - generateException("Saw GT at position " + i + ", but it must be at the first position for genotypes"); genotypeAlleleLocation = i; } else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) { GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]); @@ -160,22 +158,24 @@ public class VCFCodec extends AbstractVCFCodec { } } - // check to make sure we found a gentoype field - // TODO -- This is no longer required in v4.1 - if (genotypeAlleleLocation < 0) generateException("Unable to find required field GT for the record; we don't yet support a missing GT field"); + // check to make sure we found a genotype field if we are a VCF4.0 file + if ( version == VCFHeaderVersion.VCF4_0 && genotypeAlleleLocation == -1 ) + generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0"); + if ( genotypeAlleleLocation > 0 ) + generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present"); - // todo -- assuming allele list length in the single digits is bad. Fix me. - // Check for > 1 for haploid genotypes - boolean phased = GTValueArray[genotypeAlleleLocation].length() > 1 && GTValueArray[genotypeAlleleLocation].charAt(1) == '|'; + List GTalleles = (genotypeAlleleLocation == -1 ? null : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap)); + boolean phased = genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1; // add it to the list try { - genotypes.put(sampleName, new Genotype(sampleName, - parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap), - GTQual, - genotypeFilters, - gtAttributes, - phased)); + genotypes.put(sampleName, + new Genotype(sampleName, + GTalleles, + GTQual, + genotypeFilters, + gtAttributes, + phased)); } catch (TribbleException e) { throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java index a799161ad..bb822f2ed 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java @@ -24,6 +24,8 @@ package org.broadinstitute.sting.utils.codecs.vcf; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + import java.util.Arrays; import java.util.LinkedHashMap; import java.util.Map; @@ -43,26 +45,43 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF // the field types private String name; - private int count; + private int count = -1; + private VCFHeaderLineCount countType; private String description; private VCFHeaderLineType type; // access methods public String getName() { return name; } - public int getCount() { return count; } public String getDescription() { return description; } public VCFHeaderLineType getType() { return type; } + public VCFHeaderLineCount getCountType() { return countType; } + public int getCount() { + if ( countType != VCFHeaderLineCount.INTEGER ) + throw new ReviewedStingException("Asking for header line count when type is not an integer"); + return count; + } - // - public void setNumberToUnbounded() { this.count = UNBOUNDED; } + // utility method + public int getCount(int numAltAlleles) { + int myCount; + switch ( countType ) { + case INTEGER: myCount = count; break; + case UNBOUNDED: myCount = -1; break; + case A: myCount = numAltAlleles; break; + case G: myCount = ((numAltAlleles + 1) * (numAltAlleles + 2) / 2); break; + default: throw new ReviewedStingException("Unknown count type: " + countType); + } + return myCount; + } + + public void setNumberToUnbounded() { + countType = VCFHeaderLineCount.UNBOUNDED; + count = -1; + } // our type of line, i.e. format, info, etc private final SupportedHeaderLineType lineType; - // line numerical values are allowed to be unbounded (or unknown), which is - // marked with a dot (.) - public static final int UNBOUNDED = -1; // the value we store internally for unbounded types - /** * create a VCF format header line * @@ -70,10 +89,12 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF * @param count the count for this header line * @param type the type for this header line * @param description the description for this header line + * @param lineType the header line type */ protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) { super(lineType.toString(), ""); this.name = name; + this.countType = VCFHeaderLineCount.INTEGER; this.count = count; this.type = type; this.description = description; @@ -81,20 +102,53 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF validate(); } + /** + * create a VCF format header line + * + * @param name the name for this header line + * @param count the count type for this header line + * @param type the type for this header line + * @param description the description for this header line + * @param lineType the header line type + */ + protected VCFCompoundHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) { + super(lineType.toString(), ""); + this.name = name; + this.countType = count; + this.type = type; + this.description = description; + this.lineType = lineType; + validate(); + } + /** * create a VCF format header line * * @param line the header line * @param version the VCF header version + * @param lineType the header line type * */ protected VCFCompoundHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) { super(lineType.toString(), ""); Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description")); name = mapping.get("ID"); - count = (version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) ? - mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) : - mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")); + count = -1; + final String numberStr = mapping.get("Number"); + if ( numberStr.equals(VCFConstants.PER_ALLELE_COUNT) ) { + countType = VCFHeaderLineCount.A; + } else if ( numberStr.equals(VCFConstants.PER_GENOTYPE_COUNT) ) { + countType = VCFHeaderLineCount.G; + } else if ( ((version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) && + numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v4)) || + ((version == VCFHeaderVersion.VCF3_2 || version == VCFHeaderVersion.VCF3_3) && + numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v3)) ) { + countType = VCFHeaderLineCount.UNBOUNDED; + } else { + countType = VCFHeaderLineCount.INTEGER; + count = Integer.valueOf(numberStr); + + } type = VCFHeaderLineType.valueOf(mapping.get("Type")); if (type == VCFHeaderLineType.Flag && !allowFlagValues()) throw new IllegalArgumentException("Flag is an unsupported type for this kind of field"); @@ -121,7 +175,15 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF protected String toStringEncoding() { Map map = new LinkedHashMap(); map.put("ID", name); - map.put("Number", count == UNBOUNDED ? VCFConstants.UNBOUNDED_ENCODING_v4 : count); + Object number; + switch ( countType ) { + case A: number = VCFConstants.PER_ALLELE_COUNT; break; + case G: number = VCFConstants.PER_GENOTYPE_COUNT; break; + case UNBOUNDED: number = VCFConstants.UNBOUNDED_ENCODING_v4; break; + case INTEGER: + default: number = count; + } + map.put("Number", number); map.put("Type", type); map.put("Description", description); return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map); @@ -136,15 +198,13 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF if ( !(o instanceof VCFCompoundHeaderLine) ) return false; VCFCompoundHeaderLine other = (VCFCompoundHeaderLine)o; - return name.equals(other.name) && - count == other.count && - description.equals(other.description) && - type == other.type && - lineType == other.lineType; + return equalsExcludingDescription(other) && + description.equals(other.description); } public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) { return count == other.count && + countType == other.countType && type == other.type && lineType == other.lineType && name.equals(other.name); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java index 695c46c27..91cf86c70 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java @@ -99,6 +99,8 @@ public final class VCFConstants { public static final String MISSING_DEPTH_v3 = "-1"; public static final String UNBOUNDED_ENCODING_v4 = "."; public static final String UNBOUNDED_ENCODING_v3 = "-1"; + public static final String PER_ALLELE_COUNT = "A"; + public static final String PER_GENOTYPE_COUNT = "G"; public static final String EMPTY_ALLELE = "."; public static final String EMPTY_GENOTYPE = "./."; public static final double MAX_GENOTYPE_QUAL = 99.0; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java index 9176fc16e..418b80074 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java @@ -1,19 +1,10 @@ package org.broadinstitute.sting.utils.codecs.vcf; -import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.Map; - - /** * @author ebanks * A class representing a key=value entry for FILTER fields in the VCF header */ -public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine { - - private String name; - private String description; - +public class VCFFilterHeaderLine extends VCFSimpleHeaderLine { /** * create a VCF filter header line @@ -22,12 +13,7 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader * @param description the description for this header line */ public VCFFilterHeaderLine(String name, String description) { - super("FILTER", ""); - this.name = name; - this.description = description; - - if ( name == null || description == null ) - throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description )); + super(name, description, SupportedHeaderLineType.FILTER); } /** @@ -37,34 +23,6 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader * @param version the vcf header version */ protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) { - super("FILTER", ""); - Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description")); - name = mapping.get("ID"); - description = mapping.get("Description"); - if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided - description = UNBOUND_DESCRIPTION; - } - - protected String toStringEncoding() { - Map map = new LinkedHashMap(); - map.put("ID", name); - map.put("Description", description); - return "FILTER=" + VCFHeaderLine.toStringEncoding(map); - } - - public boolean equals(Object o) { - if ( !(o instanceof VCFFilterHeaderLine) ) - return false; - VCFFilterHeaderLine other = (VCFFilterHeaderLine)o; - return name.equals(other.name) && - description.equals(other.description); - } - - public String getName() { - return name; - } - - public String getDescription() { - return description; + super(line, version, SupportedHeaderLineType.FILTER); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java index 352be3e97..474c8dd14 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java @@ -16,6 +16,10 @@ public class VCFFormatHeaderLine extends VCFCompoundHeaderLine { throw new IllegalArgumentException("Flag is an unsupported type for format fields"); } + public VCFFormatHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) { + super(name, count, type, description, SupportedHeaderLineType.FORMAT); + } + protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) { super(line, version, SupportedHeaderLineType.FORMAT); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java new file mode 100644 index 000000000..d615c7c78 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java @@ -0,0 +1,8 @@ +package org.broadinstitute.sting.utils.codecs.vcf; + +/** + * the count encodings we use for fields in VCF header lines + */ +public enum VCFHeaderLineCount { + INTEGER, A, G, UNBOUNDED; +} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java index 135a5c1a1..9b20f38a1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java @@ -13,6 +13,10 @@ public class VCFInfoHeaderLine extends VCFCompoundHeaderLine { super(name, count, type, description, SupportedHeaderLineType.INFO); } + public VCFInfoHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) { + super(name, count, type, description, SupportedHeaderLineType.INFO); + } + protected VCFInfoHeaderLine(String line, VCFHeaderVersion version) { super(line, version, SupportedHeaderLineType.INFO); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFParser.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFParser.java index dac996494..1dba351e2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFParser.java @@ -2,7 +2,9 @@ package org.broadinstitute.sting.utils.codecs.vcf; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; -import java.util.*; + +import java.util.List; +import java.util.Map; /** diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java new file mode 100644 index 000000000..152043f28 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java @@ -0,0 +1,81 @@ +package org.broadinstitute.sting.utils.codecs.vcf; + +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.Map; + + +/** + * @author ebanks + * A class representing a key=value entry for simple VCF header types + */ +public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine { + + public enum SupportedHeaderLineType { + FILTER, ALT; + } + + private String name; + private String description; + + // our type of line, i.e. filter, alt, etc + private final SupportedHeaderLineType lineType; + + + /** + * create a VCF filter header line + * + * @param name the name for this header line + * @param description the description for this header line + * @param lineType the header line type + */ + public VCFSimpleHeaderLine(String name, String description, SupportedHeaderLineType lineType) { + super(lineType.toString(), ""); + this.lineType = lineType; + this.name = name; + this.description = description; + + if ( name == null || description == null ) + throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description )); + } + + /** + * create a VCF info header line + * + * @param line the header line + * @param version the vcf header version + * @param lineType the header line type + */ + protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) { + super(lineType.toString(), ""); + this.lineType = lineType; + Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description")); + name = mapping.get("ID"); + description = mapping.get("Description"); + if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided + description = UNBOUND_DESCRIPTION; + } + + protected String toStringEncoding() { + Map map = new LinkedHashMap(); + map.put("ID", name); + map.put("Description", description); + return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map); + } + + public boolean equals(Object o) { + if ( !(o instanceof VCFSimpleHeaderLine) ) + return false; + VCFSimpleHeaderLine other = (VCFSimpleHeaderLine)o; + return name.equals(other.name) && + description.equals(other.description); + } + + public String getName() { + return name; + } + + public String getDescription() { + return description; + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index ecede068e..f43891e77 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -25,10 +25,10 @@ package org.broadinstitute.sting.utils.codecs.vcf; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; @@ -180,19 +180,4 @@ public class VCFUtils { return new HashSet(map.values()); } - - /** - * return a set of supported format lines; what we currently support for output in the genotype fields of a VCF - * @return a set of VCF format lines - */ - public static Set getSupportedHeaderStrings() { - Set result = new HashSet(); - result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); - result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality")); - result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)")); - result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, -1, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2")); - - return result; - } - } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java b/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java index a36ed9ac6..d280ac804 100755 --- a/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java @@ -25,7 +25,8 @@ package org.broadinstitute.sting.utils.collections; -import java.util.*; +import java.util.HashMap; +import java.util.Map; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/collections/RODMergingIterator.java b/public/java/src/org/broadinstitute/sting/utils/collections/RODMergingIterator.java index 3a731c2fd..6d6cb8272 100644 --- a/public/java/src/org/broadinstitute/sting/utils/collections/RODMergingIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/RODMergingIterator.java @@ -27,10 +27,13 @@ package org.broadinstitute.sting.utils.collections; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.PriorityQueue; public class RODMergingIterator implements Iterator, Iterable { PriorityQueue queue = new PriorityQueue(); diff --git a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index 41de5ef96..bba47c76c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -26,15 +26,18 @@ package org.broadinstitute.sting.utils.duplicates; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; -import java.util.List; import java.util.Arrays; +import java.util.List; public class DupUtils { private static SAMRecord tmpCopyRead(SAMRecord read) { diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 0be4bec91..7eab6f6c9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -28,9 +28,8 @@ import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.util.Arrays; @@ -154,6 +153,16 @@ public class UserException extends ReviewedStingException { } } + public static class MalformedVCF extends UserException { + public MalformedVCF(String message, String line) { + super(String.format("The provided VCF file is malformed at line %s: %s", line, message)); + } + + public MalformedVCF(String message, int lineNo) { + super(String.format("The provided VCF file is malformed at line nmber %d: %s", lineNo, message)); + } + } + public static class ReadMissingReadGroup extends MalformedBAM { public ReadMissingReadGroup(SAMRecord read) { super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName())); diff --git a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java index 0c5085cc7..43ef4aa74 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java +++ b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java @@ -25,15 +25,16 @@ package org.broadinstitute.sting.utils.fasta; import net.sf.picard.PicardException; -import net.sf.picard.reference.*; +import net.sf.picard.reference.FastaSequenceIndex; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.picard.reference.ReferenceSequence; import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.FileNotFoundException; import java.util.Arrays; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - /** * A caching version of the IndexedFastaSequenceFile that avoids going to disk as often as the raw indexer. * diff --git a/public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java b/public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java index cbfba848c..1c2cfe2e1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.utils.genotype; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.MathUtils; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java index 31791e805..a17e81461 100755 --- a/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java @@ -24,14 +24,14 @@ package org.broadinstitute.sting.utils.genotype; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.Allele; -import java.util.*; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; public class Haplotype { protected byte[] bases = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/help/ApplicationDetails.java b/public/java/src/org/broadinstitute/sting/utils/help/ApplicationDetails.java index d9e74640f..483c874dc 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/ApplicationDetails.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/ApplicationDetails.java @@ -25,13 +25,13 @@ package org.broadinstitute.sting.utils.help; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.List; -import java.util.Collections; import java.io.IOException; +import java.util.Collections; +import java.util.List; /** * Contains details additional details that the program can diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DescriptionTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/DescriptionTaglet.java index 68633a2b3..65c332048 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DescriptionTaglet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DescriptionTaglet.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.utils.help; import com.sun.tools.doclets.Taglet; -import com.sun.javadoc.Tag; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DisplayNameTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/DisplayNameTaglet.java index be6f7f3eb..6c6dad736 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DisplayNameTaglet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DisplayNameTaglet.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.utils.help; import com.sun.tools.doclets.Taglet; -import com.sun.javadoc.Tag; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java index 493f26e76..a9d71ef98 100755 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java @@ -25,22 +25,16 @@ package org.broadinstitute.sting.utils.help; +import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.ArgumentDefinition; import org.broadinstitute.sting.commandline.ArgumentDefinitionGroup; import org.broadinstitute.sting.commandline.ArgumentDefinitions; -import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.Utils; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; -import java.util.Formatter; -import java.util.List; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.Comparator; -import java.util.Collection; -import java.util.Collections; import java.text.DateFormat; import java.text.SimpleDateFormat; +import java.util.*; /** * Print out help for Sting command-line applications. */ diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpTaglet.java index b962664eb..b350b1a29 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpTaglet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpTaglet.java @@ -1,9 +1,7 @@ package org.broadinstitute.sting.utils.help; -import com.sun.tools.doclets.Taglet; import com.sun.javadoc.Tag; - -import java.util.Map; +import com.sun.tools.doclets.Taglet; /** * Basic functionality for the help taglet. diff --git a/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java index 4afac69c3..6ee12d42e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java @@ -26,14 +26,16 @@ package org.broadinstitute.sting.utils.help; import com.sun.javadoc.*; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.*; -import java.util.*; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.classloader.JVMUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.gatk.walkers.Walker; +import java.util.HashSet; +import java.util.Properties; +import java.util.Scanner; +import java.util.Set; /** * Extracts certain types of javadoc (specifically package and class descriptions) and makes them available diff --git a/public/java/src/org/broadinstitute/sting/utils/help/SummaryTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/SummaryTaglet.java index 036bbec4f..db8b55940 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/SummaryTaglet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/SummaryTaglet.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.utils.help; import com.sun.tools.doclets.Taglet; -import com.sun.javadoc.Tag; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/utils/instrumentation/Sizeof.java b/public/java/src/org/broadinstitute/sting/utils/instrumentation/Sizeof.java index 3420c9876..73a29ba4f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/instrumentation/Sizeof.java +++ b/public/java/src/org/broadinstitute/sting/utils/instrumentation/Sizeof.java @@ -27,9 +27,9 @@ package org.broadinstitute.sting.utils.instrumentation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.lang.instrument.Instrumentation; +import java.lang.reflect.Array; import java.lang.reflect.Field; import java.lang.reflect.Modifier; -import java.lang.reflect.Array; import java.util.IdentityHashMap; /** diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java index e722ac196..988240ef9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java @@ -25,16 +25,16 @@ package org.broadinstitute.sting.utils.interval; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; +import org.broadinstitute.sting.gatk.refdata.utils.StringToGenomeLocIteratorAdapter; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.gatk.iterators.PushbackIterator; -import org.broadinstitute.sting.gatk.refdata.utils.StringToGenomeLocIteratorAdapter; -import java.util.Iterator; import java.io.File; import java.io.FileNotFoundException; +import java.util.Iterator; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index 80dc35455..f551e1368 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -5,17 +5,17 @@ import net.sf.picard.util.IntervalList; import net.sf.samtools.SAMFileHeader; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.bed.BedParser; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; +import java.io.File; import java.io.IOException; import java.util.*; -import java.io.File; /** * Parse text representations of interval strings that diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIterator.java b/public/java/src/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIterator.java index 31a2f41fb..7e87ce8b5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIterator.java @@ -26,11 +26,10 @@ package org.broadinstitute.sting.utils.interval; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.PriorityQueue; import java.util.Iterator; +import java.util.PriorityQueue; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java b/public/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java index 0b63d582e..29ffb13e4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java @@ -25,9 +25,8 @@ package org.broadinstitute.sting.utils.interval; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; +import org.broadinstitute.sting.utils.GenomeLoc; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java index 543302446..3821c9c8a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -24,19 +24,16 @@ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.utils.HasGenomeLocation; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.gatk.iterators.IterableIterator; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.StingException; import java.util.*; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.exceptions.StingException; - /** * A generic implementation of read-backed pileups. * diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java index b6f3e9f09..26e66014c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java @@ -1,8 +1,5 @@ package org.broadinstitute.sting.utils.pileup; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.BaseUtils; import net.sf.samtools.SAMRecord; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java index 6c855c1c7..f7d237401 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java @@ -1,6 +1,9 @@ package org.broadinstitute.sting.utils.pileup; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; /** * An easy to access fragment-based pileup, which contains two separate pileups. The first diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java b/public/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java index d8af2ea8f..7005cf869 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java @@ -27,9 +27,9 @@ package org.broadinstitute.sting.utils.pileup; import net.sf.picard.util.PeekableIterator; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import java.util.PriorityQueue; import java.util.Comparator; import java.util.Iterator; +import java.util.PriorityQueue; /** * Merges multiple pileups broken down by sample. diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 51e02bf74..66e1afecb 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -1,9 +1,10 @@ package org.broadinstitute.sting.utils.pileup; -import org.broadinstitute.sting.utils.*; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.sam.ReadUtils; -import com.google.java.contract.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java index a32aa5645..8d43a368a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java @@ -24,15 +24,13 @@ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; -import java.util.Iterator; -import java.util.List; import java.util.Collection; - -import net.sf.samtools.SAMRecord; +import java.util.List; /** * A clean interface for working with extended event pileups. diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java index a1a08c95f..31d29430a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java @@ -23,15 +23,14 @@ */ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.*; -import net.sf.samtools.SAMRecord; - public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup implements ReadBackedExtendedEventPileup { private int nInsertions; private int maxDeletionLength; // cached value of the length of the longest deletion observed at the site diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java index c52cc0b52..36b8a8c65 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -24,14 +24,13 @@ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.iterators.IterableIterator; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.HasGenomeLocation; -import java.util.List; import java.util.Collection; +import java.util.List; /** * A data retrieval interface for accessing parts of the pileup. diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java index 70eba577c..e5b054961 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java @@ -23,9 +23,9 @@ */ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.GenomeLoc; -import net.sf.samtools.SAMRecord; import java.util.List; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index 4e4294b20..344eccb83 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -25,15 +25,16 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMRecord; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.ArrayList; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java index 5812c9aec..5f7db458a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java @@ -1,15 +1,13 @@ package org.broadinstitute.sting.utils.sam; -import org.broadinstitute.sting.gatk.traversals.TraversalEngine; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.apache.log4j.Logger; - -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.traversals.TraversalEngine; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.Walker; /* diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java index ce6ca570c..adf60b16b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java @@ -1,15 +1,17 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; - -import java.io.InputStream; -import java.io.ByteArrayInputStream; -import java.io.UnsupportedEncodingException; -import java.util.*; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; /** * User: hanna * Date: Jun 11, 2009 diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java index b683f5247..62e371bc0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java @@ -1,9 +1,8 @@ package org.broadinstitute.sting.utils.sam; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.ReadProperties; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java index 4f5dcca61..2b359fe6a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java @@ -1,13 +1,12 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.List; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - /* * Copyright (c) 2009 The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java index 01f1dfe96..31deb7535 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java @@ -26,8 +26,6 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; public class ComparableSAMRecord implements Comparable { diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java index 2b8ac387c..c7ffcab0c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.*; +import net.sf.samtools.SAMReadGroupRecord; /** * @author ebanks diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index f3d9edab9..517f9f75d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -1,12 +1,14 @@ package org.broadinstitute.sting.utils.sam; -import java.lang.reflect.Method; -import java.util.*; - import net.sf.samtools.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.lang.reflect.Method; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + /** * @author ebanks * GATKSAMRecord diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 080762039..6c15910b1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -25,13 +25,14 @@ package org.broadinstitute.sting.utils.sam; -import com.google.java.contract.*; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.samtools.*; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; import java.io.File; +import java.util.*; /** * A miscellaneous collection of utilities for working with SAM files, headers, etc. diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java b/public/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java index f8a0ca6a7..bb9db5d98 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java @@ -26,11 +26,10 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.SAMFileReader; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - /** * Allows the user to steadily accumulate information about what * components go into a SAM file writer, ultimately using this diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java b/public/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java index df2010e8b..60e1d9948 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java @@ -3,13 +3,6 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.util.Iterator; -import java.util.NoSuchElementException; /** * XXX diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index 9beb7895b..f6aa882ad 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.utils.text; import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.commandline.Tags; -import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; diff --git a/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java index 803d6ac0f..1d4251542 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java @@ -28,11 +28,11 @@ package org.broadinstitute.sting.utils.text; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; -import java.util.regex.Pattern; -import java.util.regex.Matcher; -import java.io.StringReader; import java.io.IOException; +import java.io.StringReader; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Common utilities for dealing with text formatting. diff --git a/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java b/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java index 064256388..52b6f3b01 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java @@ -25,10 +25,10 @@ package org.broadinstitute.sting.utils.text; -import java.util.Iterator; -import java.util.List; -import java.util.LinkedList; import java.io.*; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; /** * Support for Python-like xreadlines() function as a class. This is an iterator and iterable over diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/FileBackedGenomeLocProcessingTracker.java b/public/java/src/org/broadinstitute/sting/utils/threading/FileBackedGenomeLocProcessingTracker.java index cae099eeb..3763ec67d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/threading/FileBackedGenomeLocProcessingTracker.java +++ b/public/java/src/org/broadinstitute/sting/utils/threading/FileBackedGenomeLocProcessingTracker.java @@ -1,18 +1,14 @@ package org.broadinstitute.sting.utils.threading; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; -import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.List; -import java.util.concurrent.locks.ReentrantLock; /** * Keeps a copy of the processing locks in a file diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/NoOpGenomeLocProcessingTracker.java b/public/java/src/org/broadinstitute/sting/utils/threading/NoOpGenomeLocProcessingTracker.java index 4e61ef9e1..ad2a6d31b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/threading/NoOpGenomeLocProcessingTracker.java +++ b/public/java/src/org/broadinstitute/sting/utils/threading/NoOpGenomeLocProcessingTracker.java @@ -1,9 +1,5 @@ package org.broadinstitute.sting.utils.threading; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileLock.java b/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileLock.java index 3eb2be96b..0f47da413 100644 --- a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileLock.java +++ b/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileLock.java @@ -6,10 +6,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; -import java.io.FileNotFoundException; import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.channels.*; /** * User: depristo diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileThreadSafeLock.java b/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileThreadSafeLock.java index dec69f7c2..d70879a0a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileThreadSafeLock.java +++ b/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileThreadSafeLock.java @@ -2,13 +2,8 @@ package org.broadinstitute.sting.utils.threading; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.channels.*; /** * User: depristo diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java index a9ba46159..c3f437f11 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.utils.variantcontext; -import java.util.Arrays; -import java.util.List; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; +import java.util.List; /** * Immutable representation of an allele @@ -108,7 +108,7 @@ public class Allele implements Comparable { this.bases = bases; if ( ! acceptableAlleleBases(bases) ) - throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases)); + throw new IllegalArgumentException("Unexpected base in allele bases \'" + new String(bases)+"\'"); } private Allele(String bases, boolean isRef) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 3a87f1196..0b5976c3c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.variantcontext; import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.*; @@ -19,12 +20,14 @@ public class Genotype { protected InferredGeneticContext commonInfo; public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR; protected List alleles = null; // new ArrayList(); + protected Type type = null; protected boolean isPhased = false; - private boolean filtersWereAppliedToContext; + protected boolean filtersWereAppliedToContext; public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased) { - this.alleles = Collections.unmodifiableList(alleles); + if ( alleles != null ) + this.alleles = Collections.unmodifiableList(alleles); commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes); filtersWereAppliedToContext = filters != null; this.isPhased = isPhased; @@ -66,6 +69,9 @@ public class Genotype { } public List getAlleles(Allele allele) { + if ( getType() == Type.UNAVAILABLE ) + throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype"); + List al = new ArrayList(); for ( Allele a : alleles ) if ( a.equals(allele) ) @@ -75,6 +81,8 @@ public class Genotype { } public Allele getAllele(int i) { + if ( getType() == Type.UNAVAILABLE ) + throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype"); return alleles.get(i); } @@ -89,10 +97,21 @@ public class Genotype { NO_CALL, HOM_REF, HET, - HOM_VAR + HOM_VAR, + UNAVAILABLE } public Type getType() { + if ( type == null ) { + type = determineType(); + } + return type; + } + + protected Type determineType() { + if ( alleles == null ) + return Type.UNAVAILABLE; + Allele firstAllele = alleles.get(0); if ( firstAllele.isNoCall() ) { @@ -122,7 +141,8 @@ public class Genotype { * @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF) */ public boolean isNoCall() { return getType() == Type.NO_CALL; } - public boolean isCalled() { return getType() != Type.NO_CALL; } + public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; } + public boolean isAvailable() { return getType() != Type.UNAVAILABLE; } // // Useful methods for getting genotype likelihoods for a genotype object, if present @@ -157,8 +177,8 @@ public class Genotype { } public void validate() { - if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles"); - if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles"); + if ( alleles == null ) return; + if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0"); int nNoCalls = 0; for ( Allele allele : alleles ) { @@ -175,6 +195,9 @@ public class Genotype { } public String getGenotypeString(boolean ignoreRefState) { + if ( alleles == null ) + return null; + // Notes: // 1. Make sure to use the appropriate separator depending on whether the genotype is phased // 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java index 3feaf5e1c..dba16cf86 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java @@ -24,8 +24,8 @@ package org.broadinstitute.sting.utils.variantcontext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broad.tribble.TribbleException; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; public class GenotypeLikelihoods { public static final boolean CAP_PLS = false; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java index 4efba8825..a191670a4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java @@ -1,7 +1,10 @@ package org.broadinstitute.sting.utils.variantcontext; -import java.util.*; +import java.util.Collection; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; /** * Mutable version of VariantContext diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index da80a3431..eab392c4d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -2,8 +2,8 @@ package org.broadinstitute.sting.utils.variantcontext; import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broad.tribble.util.ParsingUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFParser; import java.util.*; @@ -1206,9 +1206,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati if ( ! name.equals(g.getSampleName()) ) throw new IllegalStateException("Bound sample name " + name + " does not equal the name of the genotype " + g.getSampleName()); - for ( Allele gAllele : g.getAlleles() ) { - if ( ! hasAllele(gAllele) && gAllele.isCalled() ) - throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles); + if ( g.isAvailable() ) { + for ( Allele gAllele : g.getAlleles() ) { + if ( ! hasAllele(gAllele) && gAllele.isCalled() ) + throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles); + } } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 5d58954aa..212600360 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -23,21 +23,26 @@ package org.broadinstitute.sting.utils.variantcontext; -import java.io.Serializable; -import java.util.*; - -import com.google.java.contract.*; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.util.StringUtil; -import org.apache.commons.jexl2.*; +import org.apache.commons.jexl2.Expression; +import org.apache.commons.jexl2.JexlEngine; import org.broad.tribble.util.popgen.HardyWeinbergCalculation; -import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec; import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.io.Serializable; +import java.util.*; + public class VariantContextUtils { final public static JexlEngine engine = new JexlEngine(); static { @@ -284,8 +289,8 @@ public class VariantContextUtils { /** * Returns a newly allocated VC that is the same as VC, but without genotypes - * @param vc - * @return + * @param vc variant context + * @return new VC without genotypes */ @Requires("vc != null") @Ensures("result != null") @@ -298,8 +303,8 @@ public class VariantContextUtils { /** * Returns a newly allocated list of VC, where each VC is the same as the input VCs, but without genotypes - * @param vcs - * @return + * @param vcs collection of VCs + * @return new VCs without genotypes */ @Requires("vcs != null") @Ensures("result != null") @@ -357,9 +362,9 @@ public class VariantContextUtils { * information per genotype. The master merge will add the PQ information from each genotype record, where * appropriate, to the master VC. * - * @param unsortedVCs - * @param masterName - * @return + * @param unsortedVCs collection of VCs + * @param masterName name of master VC + * @return master-merged VC */ public static VariantContext masterMerge(Collection unsortedVCs, String masterName) { VariantContext master = findMaster(unsortedVCs, masterName); @@ -430,11 +435,15 @@ public class VariantContextUtils { * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with * the sample name * - * @param unsortedVCs - * @param priorityListOfVCs - * @param filteredRecordMergeType - * @param genotypeMergeOptions - * @return + * @param genomeLocParser loc parser + * @param unsortedVCs collection of unsorted VCs + * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs + * @param filteredRecordMergeType merge type for filtered records + * @param genotypeMergeOptions merge option for genotypes + * @param annotateOrigin should we annotate the set it came from? + * @param printMessages should we print messages? + * @param inputRefBase the ref base + * @return new VariantContext */ public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, @@ -443,6 +452,24 @@ public class VariantContextUtils { return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false); } + /** + * Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided. + * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with + * the sample name + * + * @param genomeLocParser loc parser + * @param unsortedVCs collection of unsorted VCs + * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs + * @param filteredRecordMergeType merge type for filtered records + * @param genotypeMergeOptions merge option for genotypes + * @param annotateOrigin should we annotate the set it came from? + * @param printMessages should we print messages? + * @param inputRefBase the ref base + * @param setKey the key name of the set + * @param filteredAreUncalled are filtered records uncalled? + * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? + * @return new VariantContext + */ public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey, @@ -465,7 +492,7 @@ public class VariantContextUtils { if ( ! filteredAreUncalled || vc.isNotFiltered() ) VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false)); } - if ( VCs.size() == 0 ) // everything is filtered out and we're filteredareUncalled + if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled return null; // establish the baseline info from the first VC @@ -610,6 +637,17 @@ public class VariantContextUtils { return merged; } + public static Map> separateVariantContextsByType(Collection VCs) { + HashMap> mappedVCs = new HashMap>(); + for ( VariantContext vc : VCs ) { + if ( !mappedVCs.containsKey(vc.getType()) ) + mappedVCs.put(vc.getType(), new ArrayList()); + mappedVCs.get(vc.getType()).add(vc); + } + + return mappedVCs; + } + private static class AlleleMapper { private VariantContext vc = null; private Map map = null; @@ -829,6 +867,7 @@ public class VariantContextUtils { /** * create a genome location, given a variant context + * @param genomeLocParser parser * @param vc the variant context * @return the genomeLoc */ diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java index c5a3b6f2a..a59ed7abe 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java @@ -29,7 +29,10 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.*; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; /** * diff --git a/public/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java b/public/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java index ab91d0e2e..5782c2704 100755 --- a/public/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.utils.wiggle; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 61bb8b34b..ef46d4bff 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -4,6 +4,7 @@ import org.apache.commons.io.FileUtils; import org.apache.log4j.*; import org.apache.log4j.spi.LoggingEvent; import org.broadinstitute.sting.commandline.CommandLineUtils; +import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.testng.Assert; @@ -12,6 +13,7 @@ import java.io.*; import java.math.BigInteger; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.*; /** * @@ -78,11 +80,6 @@ public abstract class BaseTest { public static final String networkTempDir = "/broad/shptmp/"; public static final File networkTempDirFile = new File(networkTempDir); - /** - * Subdirectory under the ant build directory where we store integration test md5 results - */ - public static final String MD5_FILE_DB_SUBDIR = "integrationtests"; - public static final String testDir = "public/testdata/"; /** before the class starts up */ @@ -107,6 +104,57 @@ public abstract class BaseTest { } } + /** + * Simple generic utility class to creating TestNG data providers: + * + * 1: inherit this class, as in + * + * private class SummarizeDifferenceTest extends TestDataProvider { + * public SummarizeDifferenceTest() { + * super(SummarizeDifferenceTest.class); + * } + * ... + * } + * + * Provide a reference to your class to the TestDataProvider constructor. + * + * 2: Create instances of your subclass. Return from it the call to getTests, providing + * the class type of your test + * + * @DataProvider(name = "summaries" + * public Object[][] createSummaries() { + * new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2"); + * new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1"); + * return SummarizeDifferenceTest.getTests(SummarizeDifferenceTest.class); + * } + * + * This class magically tracks created objects of this + */ + public static class TestDataProvider { + private static final Map> tests = new HashMap>(); + + /** + * Create a new TestDataProvider instance bound to the class variable C + * @param c + */ + public TestDataProvider(Class c) { + if ( ! tests.containsKey(c) ) + tests.put(c, new ArrayList()); + tests.get(c).add(this); + } + + /** + * Return all of the data providers in the form expected by TestNG of type class C + * @param c + * @return + */ + public static Object[][] getTests(Class c) { + List params2 = new ArrayList(); + for ( Object x : tests.get(c) ) params2.add(new Object[]{x}); + return params2.toArray(new Object[][]{}); + } + } + /** * test if the file exists * @@ -150,192 +198,6 @@ public abstract class BaseTest { } } - /** - * a little utility function for all tests to md5sum a file - * Shameless taken from: - * - * http://www.javalobby.org/java/forums/t84420.html - * - * @param file the file - * @return a string - */ - public static String md5SumFile(File file) { - MessageDigest digest; - try { - digest = MessageDigest.getInstance("MD5"); - } catch (NoSuchAlgorithmException e) { - throw new ReviewedStingException("Unable to find MD5 digest"); - } - InputStream is; - try { - is = new FileInputStream(file); - } catch (FileNotFoundException e) { - throw new ReviewedStingException("Unable to open file " + file); - } - byte[] buffer = new byte[8192]; - int read; - try { - while ((read = is.read(buffer)) > 0) { - digest.update(buffer, 0, read); - } - byte[] md5sum = digest.digest(); - BigInteger bigInt = new BigInteger(1, md5sum); - return bigInt.toString(16); - - } - catch (IOException e) { - throw new ReviewedStingException("Unable to process file for MD5", e); - } - finally { - try { - is.close(); - } - catch (IOException e) { - throw new ReviewedStingException("Unable to close input stream for MD5 calculation", e); - } - } - } - - protected static void ensureMd5DbDirectory() { - // todo -- make path - File dir = new File(MD5_FILE_DB_SUBDIR); - if ( ! dir.exists() ) { - System.out.printf("##### Creating MD5 db %s%n", MD5_FILE_DB_SUBDIR); - if ( ! dir.mkdir() ) { - throw new ReviewedStingException("Infrastructure failure: failed to create md5 directory " + MD5_FILE_DB_SUBDIR); - } - } - } - - protected static File getFileForMD5(final String md5) { - final String basename = String.format("%s.integrationtest", md5); - return new File(MD5_FILE_DB_SUBDIR + "/" + basename); - } - - private static void updateMD5Db(final String md5, final File resultsFile) { - // todo -- copy results file to DB dir if needed under filename for md5 - final File dbFile = getFileForMD5(md5); - if ( ! dbFile.exists() ) { - // the file isn't already in the db, copy it over - System.out.printf("##### Updating MD5 file: %s%n", dbFile.getPath()); - try { - FileUtils.copyFile(resultsFile, dbFile); - } catch ( IOException e ) { - throw new ReviewedStingException(e.getMessage()); - } - } else { - System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath()); - - } - } - - private static String getMD5Path(final String md5, final String valueIfNotFound) { - // todo -- look up the result in the directory and return the path if it exists - final File dbFile = getFileForMD5(md5); - return dbFile.exists() ? dbFile.getPath() : valueIfNotFound; - } - - public static byte[] getBytesFromFile(File file) throws IOException { - InputStream is = new FileInputStream(file); - - // Get the size of the file - long length = file.length(); - - if (length > Integer.MAX_VALUE) { - // File is too large - } - - // Create the byte array to hold the data - byte[] bytes = new byte[(int) length]; - - // Read in the bytes - int offset = 0; - int numRead = 0; - while (offset < bytes.length - && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) { - offset += numRead; - } - - // Ensure all the bytes have been read in - if (offset < bytes.length) { - throw new IOException("Could not completely read file " + file.getName()); - } - - // Close the input stream and return bytes - is.close(); - return bytes; - } - - /** - * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL throw an exception if the MD5s are different. - * @param name Name of the test. - * @param resultsFile File to MD5. - * @param expectedMD5 Expected MD5 value. - * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. - * @return The calculated MD5. - */ - public static String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { - String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize); - - if (parameterize || expectedMD5.equals("")) { - // Don't assert - } else { - Assert.assertEquals(filemd5sum, expectedMD5, name + " Mismatching MD5s"); - System.out.println(String.format(" => %s PASSED", name)); - } - - return filemd5sum; - } - - - /** - * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL NOT throw an exception if the MD5s are different. - * @param name Name of the test. - * @param resultsFile File to MD5. - * @param expectedMD5 Expected MD5 value. - * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. - * @return The calculated MD5. - */ - public static String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { - try { - byte[] bytesOfMessage = getBytesFromFile(resultsFile); - byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage); - BigInteger bigInt = new BigInteger(1, thedigest); - String filemd5sum = bigInt.toString(16); - while (filemd5sum.length() < 32) filemd5sum = "0" + filemd5sum; // pad to length 32 - - // - // copy md5 to integrationtests - // - updateMD5Db(filemd5sum, resultsFile); - - if (parameterize || expectedMD5.equals("")) { - System.out.println(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b", - name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5))); - } else { - System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5)); - System.out.flush(); - - if ( ! expectedMD5.equals(filemd5sum) ) { - // we are going to fail for real in assertEquals (so we are counted by the testing framework). - // prepare ourselves for the comparison - System.out.printf("##### Test %s is going fail #####%n", name); - String pathToExpectedMD5File = getMD5Path(expectedMD5, "[No DB file found]"); - String pathToFileMD5File = getMD5Path(filemd5sum, "[No DB file found]"); - System.out.printf("##### Path to expected file (MD5=%s): %s%n", expectedMD5, pathToExpectedMD5File); - System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File); - System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File); - - // todo -- add support for simple inline display of the first N differences for text file - } - } - - return filemd5sum; - } catch (Exception e) { - throw new RuntimeException("Failed to read bytes from calls file: " + resultsFile, e); - } - } - /** * Creates a temp file that will be deleted on exit after tests are complete. * @param name Prefix of the file. diff --git a/public/java/test/org/broadinstitute/sting/MD5DB.java b/public/java/test/org/broadinstitute/sting/MD5DB.java new file mode 100644 index 000000000..bea9eaec5 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/MD5DB.java @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting; + +import org.apache.commons.io.FileUtils; +import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.testng.Assert; + +import java.io.*; +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/18/11 + * Time: 9:10 AM + * + * Utilities for manipulating the MD5 database of previous results + */ +public class MD5DB { + /** + * Subdirectory under the ant build directory where we store integration test md5 results + */ + public static final String LOCAL_MD5_DB_DIR = "integrationtests"; + public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests"; + + // ---------------------------------------------------------------------- + // + // MD5 DB stuff + // + // ---------------------------------------------------------------------- + + /** + * Create the MD5 file directories if necessary + */ + protected static void ensureMd5DbDirectory() { + File dir = new File(LOCAL_MD5_DB_DIR); + if ( ! dir.exists() ) { + System.out.printf("##### Creating MD5 db %s%n", LOCAL_MD5_DB_DIR); + if ( ! dir.mkdir() ) { + throw new ReviewedStingException("Infrastructure failure: failed to create md5 directory " + LOCAL_MD5_DB_DIR); + } + } + } + + /** + * Returns the path to an already existing file with the md5 contents, or valueIfNotFound + * if no such file exists in the db. + * + * @param md5 + * @param valueIfNotFound + * @return + */ + public static String getMD5FilePath(final String md5, final String valueIfNotFound) { + // we prefer the local db to the global DB, so match it first + for ( String dir : Arrays.asList(LOCAL_MD5_DB_DIR, GLOBAL_MD5_DB_DIR)) { + File f = getFileForMD5(md5, dir); + if ( f.exists() && f.canRead() ) + return f.getPath(); + } + + return valueIfNotFound; + } + + /** + * Utility function that given a file's md5 value and the path to the md5 db, + * returns the canonical name of the file. For example, if md5 is XXX and db is YYY, + * this will return YYY/XXX.integrationtest + * + * @param md5 + * @param dbPath + * @return + */ + private static File getFileForMD5(final String md5, final String dbPath) { + final String basename = String.format("%s.integrationtest", md5); + return new File(dbPath + "/" + basename); + } + + /** + * Copies the results file with md5 value to its canonical file name and db places + * + * @param md5 + * @param resultsFile + */ + private static void updateMD5Db(final String md5, final File resultsFile) { + copyFileToDB(getFileForMD5(md5, LOCAL_MD5_DB_DIR), resultsFile); + copyFileToDB(getFileForMD5(md5, GLOBAL_MD5_DB_DIR), resultsFile); + } + + /** + * Low-level utility routine that copies resultsFile to dbFile + * @param dbFile + * @param resultsFile + */ + private static void copyFileToDB(File dbFile, final File resultsFile) { + if ( ! dbFile.exists() ) { + // the file isn't already in the db, copy it over + System.out.printf("##### Updating MD5 file: %s%n", dbFile.getPath()); + try { + FileUtils.copyFile(resultsFile, dbFile); + } catch ( IOException e ) { + System.out.printf("##### Skipping update, cannot write file %s%n", dbFile); + } + } else { + System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath()); + } + } + + /** + * Returns the byte[] of the entire contents of file, for md5 calculations + * @param file + * @return + * @throws IOException + */ + private static byte[] getBytesFromFile(File file) throws IOException { + InputStream is = new FileInputStream(file); + + // Get the size of the file + long length = file.length(); + + if (length > Integer.MAX_VALUE) { + // File is too large + } + + // Create the byte array to hold the data + byte[] bytes = new byte[(int) length]; + + // Read in the bytes + int offset = 0; + int numRead = 0; + while (offset < bytes.length + && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) { + offset += numRead; + } + + // Ensure all the bytes have been read in + if (offset < bytes.length) { + throw new IOException("Could not completely read file " + file.getName()); + } + + // Close the input stream and return bytes + is.close(); + return bytes; + } + + /** + * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL throw an exception if the MD5s are different. + * @param name Name of the test. + * @param resultsFile File to MD5. + * @param expectedMD5 Expected MD5 value. + * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. + * @return The calculated MD5. + */ + public static String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { + String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize); + + if (parameterize || expectedMD5.equals("")) { + // Don't assert + } else if ( filemd5sum.equals(expectedMD5) ) { + System.out.println(String.format(" => %s PASSED", name)); + } else { + Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum)); + } + + return filemd5sum; + } + + + /** + * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL NOT throw an exception if the MD5s are different. + * @param name Name of the test. + * @param resultsFile File to MD5. + * @param expectedMD5 Expected MD5 value. + * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. + * @return The calculated MD5. + */ + public static String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { + try { + byte[] bytesOfMessage = getBytesFromFile(resultsFile); + byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage); + BigInteger bigInt = new BigInteger(1, thedigest); + String filemd5sum = bigInt.toString(16); + while (filemd5sum.length() < 32) filemd5sum = "0" + filemd5sum; // pad to length 32 + + // + // copy md5 to integrationtests + // + updateMD5Db(filemd5sum, resultsFile); + + if (parameterize || expectedMD5.equals("")) { + System.out.println(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b", + name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5))); + } else { + System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5)); + System.out.flush(); + + if ( ! expectedMD5.equals(filemd5sum) ) { + // we are going to fail for real in assertEquals (so we are counted by the testing framework). + // prepare ourselves for the comparison + System.out.printf("##### Test %s is going fail #####%n", name); + String pathToExpectedMD5File = getMD5FilePath(expectedMD5, "[No DB file found]"); + String pathToFileMD5File = getMD5FilePath(filemd5sum, "[No DB file found]"); + System.out.printf("##### Path to expected file (MD5=%s): %s%n", expectedMD5, pathToExpectedMD5File); + System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File); + System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File); + + // inline differences + DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0); + boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), params); + if ( success ) + System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n", + pathToExpectedMD5File, pathToFileMD5File); + } + } + + return filemd5sum; + } catch (Exception e) { + throw new RuntimeException("Failed to read bytes from calls file: " + resultsFile, e); + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index dacaf2738..386c17659 100755 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -26,7 +26,9 @@ package org.broadinstitute.sting; import org.apache.commons.lang.StringUtils; +import org.broad.tribble.FeatureCodec; import org.broad.tribble.Tribble; +import org.broad.tribble.index.Index; import org.broad.tribble.index.IndexFactory; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.gatk.CommandLineExecutable; @@ -51,7 +53,7 @@ public class WalkerTest extends BaseTest { } public String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5) { - return assertMatchingMD5(name, resultsFile, expectedMD5, parameterize()); + return MD5DB.assertMatchingMD5(name, resultsFile, expectedMD5, parameterize()); } public void maybeValidateSupplementaryFile(final String name, final File resultFile) { @@ -63,8 +65,20 @@ public class WalkerTest extends BaseTest { throw new StingException("Found an index created for file " + resultFile + " but we can only validate VCF files. Extend this code!"); } - System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile); - Assert.assertTrue(IndexFactory.onDiskIndexEqualToNewlyCreatedIndex(resultFile, indexFile, new VCFCodec()), "Index on disk from indexing on the fly not equal to the index created after the run completed"); + assertOnDiskIndexEqualToNewlyCreatedIndex(indexFile, name, resultFile); + } + } + + + public static void assertOnDiskIndexEqualToNewlyCreatedIndex(final File indexFile, final String name, final File resultFile) { + System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile); + Index indexFromOutputFile = IndexFactory.createIndex(resultFile, new VCFCodec()); + Index dynamicIndex = IndexFactory.loadIndex(indexFile.getAbsolutePath()); + + if ( ! indexFromOutputFile.equalsIgnoreTimestamp(dynamicIndex) ) { + Assert.fail(String.format("Index on disk from indexing on the fly not equal to the index created after the run completed. FileIndex %s vs. on-the-fly %s%n", + indexFromOutputFile.getProperties(), + dynamicIndex.getProperties())); } } @@ -177,7 +191,7 @@ public class WalkerTest extends BaseTest { } protected Pair, List> executeTest(final String name, WalkerTestSpec spec) { - ensureMd5DbDirectory(); // ensure the md5 directory exists + MD5DB.ensureMd5DbDirectory(); // ensure the md5 directory exists List tmpFiles = new ArrayList(); for (int i = 0; i < spec.nOutputFiles; i++) { diff --git a/public/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java new file mode 100644 index 000000000..a6af034cb --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java @@ -0,0 +1,27 @@ +package org.broadinstitute.sting.alignment; + +import org.testng.annotations.Test; +import org.broadinstitute.sting.WalkerTest; + +import java.util.Arrays; + +/** + * Integration tests for the aligner. + * + * @author mhanna + * @version 0.1 + */ +public class AlignerIntegrationTest extends WalkerTest { + @Test + public void testBasicAlignment() { + String md5 = "a2bdf907b18114a86ca47f9fc23791bf"; + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + GATKDataLocation + "bwa/human_b36_both.fasta" + + " -T Align" + + " -I " + validationDataLocation + "NA12878_Pilot1_20.trimmed.unmapped.bam" + + " -o %s", + 1, // just one output file + Arrays.asList(md5)); + executeTest("testBasicAlignment", spec); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 6ba6926c6..e6300e6c9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -15,7 +15,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("4cc077eb3d343e6b7ba12bff86ebe347")); + Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c")); executeTest("test file has annotations, not asking for annotations, #1", spec); } @@ -23,7 +23,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("1de8e943fbf55246ebd19efa32f22a58")); + Arrays.asList("964f1016ec9a3c55333f62dd834c14d6")); executeTest("test file has annotations, not asking for annotations, #2", spec); } @@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("93c110e45fd4aedb044a8a5501e23336")); + Arrays.asList("8e7de435105499cd71ffc099e268a83e")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("f5cb45910ed719f46159f9f71acaecf4")); + Arrays.asList("64b6804cb1e27826e3a47089349be581")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -47,7 +47,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("4b48e7d095ef73e3151542ea976ecd89")); + Arrays.asList("42ccee09fa9f8c58f4a0d4f1139c094f")); executeTest("test file doesn't have annotations, not asking for annotations, #1", spec); } @@ -55,7 +55,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("28dfbfd178aca071b948cd3dc2365357")); + Arrays.asList("f2ddfa8105c290b1f34b7a261a02a1ac")); executeTest("test file doesn't have annotations, not asking for annotations, #2", spec); } @@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("a330a5bc3ee72a51dbeb7e6c97a0db99")); + Arrays.asList("fd1ffb669800c2e07df1e2719aa38e49")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("3a31d1ef471acfb881a2dec7963fe3f4")); + Arrays.asList("09f8e840770a9411ff77508e0ed0837f")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } @@ -79,7 +79,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testOverwritingHeader() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1, - Arrays.asList("a63fd8ff7bafbd46b7f009144a7c2ad1")); + Arrays.asList("78d2c19f8107d865970dbaf3e12edd92")); executeTest("test overwriting header", spec); } @@ -87,7 +87,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoReads() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, - Arrays.asList("36378f1245bb99d902fbfe147605bc42")); + Arrays.asList("16e3a1403fc376320d7c69492cad9345")); executeTest("not passing it any reads", spec); } @@ -95,7 +95,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testDBTagWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, - Arrays.asList("0257a1cc3c703535b2d3c5046bf88ab7")); + Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d")); executeTest("getting DB tag with dbSNP", spec); } @@ -103,7 +103,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testDBTagWithHapMap() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, - Arrays.asList("2d7c73489dcf0db433bebdf79a068764")); + Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688")); executeTest("getting DB tag with HM3", spec); } @@ -111,13 +111,13 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testUsingExpression() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variant", 1, - Arrays.asList("2f6efd08d818faa1eb0631844437c64a")); + Arrays.asList("e9c0d832dc6b4ed06c955060f830c140")); executeTest("using expression", spec); } @Test public void testTabixAnnotations() { - final String MD5 = "6c7a6a1c0027bf82656542a9b2671a35"; + final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf"; for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -A HomopolymerRun -B:variant,VCF " + validationDataLocation + "/" + file + " -BTI variant -NO_HEADER", 1, diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java index c4f6d5ebc..c75a5b2dc 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java @@ -29,7 +29,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest { */ - String[] md5WithDashSArg = {"3d3b61a83c1189108eabb2df04218099"}; + String[] md5WithDashSArg = {"efba4ce1641cfa2ef88a64395f2ebce8"}; WalkerTestSpec specWithSArg = new WalkerTestSpec( "-T GenomicAnnotator -R " + b36KGReference + " -B:variant,vcf3 /humgen/gsa-hpprojects/GATK/data/Annotations/examples/CEU_hapmap_nogt_23_subset.vcf" + @@ -58,7 +58,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("caa562160733aa638e1ba413ede209ae") + Arrays.asList("772fc3f43b70770ec6c6acbb8bbbd4c0") ); executeTest("testGenomicAnnotatorOnIndels", testOnIndels); } @@ -76,7 +76,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("a4cf76f08fa90284b6988a464b6e0c17") + Arrays.asList("081ade7f3d2d3c5f19cb1e8651a626f3") ); executeTest("testGenomicAnnotatorOnSNPsAndIndels", testOnSNPsAndIndels); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java index 70c34e729..fef1b6e64 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java @@ -41,7 +41,7 @@ public class BeagleIntegrationTest extends WalkerTest { "-B:beagleR2,BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " + "-B:beagleProbs,BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " + "-B:beaglePhased,BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " + - "-o %s -NO_HEADER", 1, Arrays.asList("6bccee48ad2f06ba5a8c774fed444478")); + "-o %s -NO_HEADER", 1, Arrays.asList("3531451e84208264104040993889aaf4")); executeTest("test BeagleOutputToVCF", spec); } @@ -60,7 +60,7 @@ public class BeagleIntegrationTest extends WalkerTest { "-T ProduceBeagleInput -B:variant,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+ "-B:validation,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+ "-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta -NO_HEADER ",2, - Arrays.asList("660986891b30cdc937e0f2a3a5743faa","223fb977e8db567dcaf632c6ee51f294")); + Arrays.asList("660986891b30cdc937e0f2a3a5743faa","e96ddd51da9f4a797b2aa8c20e404166")); executeTest("test BeagleInputWithBootstrap",spec); } @@ -72,7 +72,7 @@ public class BeagleIntegrationTest extends WalkerTest { "-B:beagleR2,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+ "-B:beagleProbs,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+ "-B:beaglePhased,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+ - "-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("24b88ef8cdf6e347daab491f0256be5a")); + "-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("8dd6ec53994fb46c5c22af8535d22965")); executeTest("testBeagleChangesSitesToRef",spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java new file mode 100644 index 000000000..2ae19264e --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.gatk.walkers.diffengine; + + +// the imports for unit testing. + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Basic unit test for DifferableReaders in reduced reads + */ +public class DiffEngineUnitTest extends BaseTest { + DiffEngine engine; + + @BeforeClass(enabled = true) + public void createDiffEngine() { + engine = new DiffEngine(); + } + + // -------------------------------------------------------------------------------- + // + // Difference testing routines + // + // -------------------------------------------------------------------------------- + + private class DifferenceTest extends TestDataProvider { + public DiffElement tree1, tree2; + public List differences; + + private DifferenceTest(String tree1, String tree2) { + this(tree1, tree2, Collections.emptyList()); + } + + private DifferenceTest(String tree1, String tree2, String difference) { + this(tree1, tree2, Arrays.asList(difference)); + } + + private DifferenceTest(String tree1, String tree2, List differences) { + super(DifferenceTest.class); + this.tree1 = DiffNode.fromString(tree1); + this.tree2 = DiffNode.fromString(tree2); + this.differences = differences; + } + + public String toString() { + return String.format("tree1=%s tree2=%s diff=%s", + tree1.toOneLineString(), tree2.toOneLineString(), differences); + } + } + + @DataProvider(name = "trees") + public Object[][] createTrees() { + new DifferenceTest("A=X", "A=X"); + new DifferenceTest("A=X", "A=Y", "A:X!=Y"); + new DifferenceTest("A=X", "B=X", Arrays.asList("A:X!=MISSING", "B:MISSING!=X")); + new DifferenceTest("A=(X=1)", "B=(X=1)", Arrays.asList("A:(X=1)!=MISSING", "B:MISSING!=(X=1)")); + new DifferenceTest("A=(X=1)", "A=(X=1)"); + new DifferenceTest("A=(X=1 Y=2)", "A=(X=1 Y=2)"); + new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2 B=(Z=3))"); + new DifferenceTest("A=(X=1)", "A=(X=2)", "A.X:1!=2"); + new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2 B=(Z=4))", "A.B.Z:3!=4"); + new DifferenceTest("A=(X=1)", "A=(X=1 Y=2)", "A.Y:MISSING!=2"); + new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2)", "A.B:(Z=3)!=MISSING"); + return DifferenceTest.getTests(DifferenceTest.class); + } + + @Test(enabled = true, dataProvider = "trees") + public void testDiffs(DifferenceTest test) { + logger.warn("Test tree1: " + test.tree1.toOneLineString()); + logger.warn("Test tree2: " + test.tree2.toOneLineString()); + + List diffs = engine.diff(test.tree1, test.tree2); + logger.warn("Test expected diff : " + test.differences); + logger.warn("Observed diffs : " + diffs); + } + + // -------------------------------------------------------------------------------- + // + // Low-level routines for summarizing differences + // + // -------------------------------------------------------------------------------- + + @Test(enabled = true) + public void testLongestCommonPostfix() { + testLongestCommonPostfixHelper("A", "A", 1); + testLongestCommonPostfixHelper("A", "B", 0); + testLongestCommonPostfixHelper("A.B", "A.B", 2); + testLongestCommonPostfixHelper("A.B.C", "A.B.C", 3); + testLongestCommonPostfixHelper("A.B.C", "X.B.C", 2); + testLongestCommonPostfixHelper("A.B.C", "X.Y.C", 1); + testLongestCommonPostfixHelper("A.B.C", "X.Y.Z", 0); + testLongestCommonPostfixHelper("A.B.C", "A.X.C", 1); + testLongestCommonPostfixHelper("A.B.C", "A.X.Z", 0); + testLongestCommonPostfixHelper("A.B.C", "A.B.Z", 0); + } + + public void testLongestCommonPostfixHelper(String p1, String p2, int expected) { + String[] parts1 = p1.split("\\."); + String[] parts2 = p2.split("\\."); + int obs = DiffEngine.longestCommonPostfix(parts1, parts2); + Assert.assertEquals(obs, expected, "p1=" + p1 + " p2=" + p2 + " failed"); + } + + @Test(enabled = true, dependsOnMethods = "testLongestCommonPostfix") + public void testSummarizePath() { + testSummarizePathHelper("A", "A", "A"); + testSummarizePathHelper("A", "B", "*"); + testSummarizePathHelper("A.B", "A.B", "A.B"); + testSummarizePathHelper("A.B", "X.B", "*.B"); + testSummarizePathHelper("A.B", "X.Y", "*.*"); + testSummarizePathHelper("A.B.C", "A.B.C", "A.B.C"); + testSummarizePathHelper("A.B.C", "X.B.C", "*.B.C"); + testSummarizePathHelper("A.B.C", "X.Y.C", "*.*.C"); + testSummarizePathHelper("A.B.C", "X.Y.Z", "*.*.*"); + testSummarizePathHelper("A.B.C", "A.X.C", "*.*.C"); + testSummarizePathHelper("A.B.C", "A.X.Z", "*.*.*"); + testSummarizePathHelper("A.B.C", "A.B.Z", "*.*.*"); + } + + public void testSummarizePathHelper(String p1, String p2, String expected) { + String[] parts1 = DiffEngine.diffNameToPath(p1); + String[] parts2 = DiffEngine.diffNameToPath(p2); + int obs = DiffEngine.longestCommonPostfix(parts1, parts2); + String path = DiffEngine.summarizedPath(parts2, obs); + Assert.assertEquals(path, expected, "p1=" + p1 + " p2=" + p2 + " failed"); + } + + // -------------------------------------------------------------------------------- + // + // High-level difference summary + // + // -------------------------------------------------------------------------------- + + private class SummarizeDifferenceTest extends TestDataProvider { + List diffs = new ArrayList(); + List expecteds = new ArrayList(); + + public SummarizeDifferenceTest() { super(SummarizeDifferenceTest.class); } + + public SummarizeDifferenceTest addDiff(String... diffsToAdd) { + diffs.addAll(Arrays.asList(diffsToAdd)); + return this; + } + + public SummarizeDifferenceTest addSummary(String... expectedSummary) { + expecteds.addAll(Arrays.asList(expectedSummary)); + return this; + } + + public String toString() { + return String.format("diffs=%s => expected=%s", diffs, expecteds); + } + + public void test() { + List diffPaths = new ArrayList(diffs.size()); + for ( String diff : diffs ) { diffPaths.add(DiffEngine.diffNameToPath(diff)); } + + List sumDiffs = engine.summarizedDifferencesOfPathsFromString(diffs); + + Assert.assertEquals(sumDiffs.size(), expecteds.size(), "Unexpected number of summarized differences: " + sumDiffs); + + for ( int i = 0; i < sumDiffs.size(); i++ ) { + Difference sumDiff = sumDiffs.get(i); + String expected = expecteds.get(i); + String[] pathCount = expected.split(":"); + String path = pathCount[0]; + int count = Integer.valueOf(pathCount[1]); + Assert.assertEquals(sumDiff.getPath(), path, "Unexpected path at: " + expected + " obs=" + sumDiff + " all=" + sumDiffs); + Assert.assertEquals(sumDiff.getCount(), count, "Unexpected counts at: " + expected + " obs=" + sumDiff + " all=" + sumDiffs); + } + } + } + + @DataProvider(name = "summaries") + public Object[][] createSummaries() { + new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2"); + new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1"); + new SummarizeDifferenceTest().addDiff("A", "A", "A").addSummary("A:3"); + new SummarizeDifferenceTest().addDiff("A", "A", "A", "B").addSummary("A:3", "B:1"); + new SummarizeDifferenceTest().addDiff("A", "A", "A", "B", "B").addSummary("A:3", "B:2"); + new SummarizeDifferenceTest().addDiff("A", "A", "A", "B", "B", "C").addSummary("A:3", "B:2", "C:1"); + new SummarizeDifferenceTest().addDiff("A.X", "A.X").addSummary("A.X:2"); + new SummarizeDifferenceTest().addDiff("A.X", "A.X", "B.X").addSummary("*.X:3", "A.X:2", "B.X:1"); + new SummarizeDifferenceTest().addDiff("A.X", "A.X", "B.X", "B.X").addSummary("*.X:4", "A.X:2", "B.X:2"); + new SummarizeDifferenceTest().addDiff("A.B.C", "X.B.C").addSummary("*.B.C:2", "A.B.C:1", "X.B.C:1"); + new SummarizeDifferenceTest().addDiff("A.B.C", "X.Y.C", "X.Y.C").addSummary("*.*.C:3", "X.Y.C:2", "A.B.C:1"); + new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "X.Y.C").addSummary("*.*.C:3", "A.B.C:1", "A.X.C:1", "X.Y.C:1"); + new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "B.X.C").addSummary("*.*.C:3", "*.X.C:2", "A.B.C:1", "A.X.C:1", "B.X.C:1"); + new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "B.X.C", "B.X.C").addSummary("*.*.C:4", "*.X.C:3", "B.X.C:2", "A.B.C:1", "A.X.C:1"); + + return SummarizeDifferenceTest.getTests(SummarizeDifferenceTest.class); + } + + + @Test(enabled = true, dependsOnMethods = "testSummarizePath", dataProvider = "summaries") + public void testSummarizeDifferences(SummarizeDifferenceTest test) { + test.test(); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNodeUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNodeUnitTest.java new file mode 100644 index 000000000..534416d29 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNodeUnitTest.java @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.gatk.walkers.diffengine; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Basic unit test for DifferableReaders in reduced reads + */ +public class DiffNodeUnitTest extends BaseTest { + // Data is: + // MY_ROOT + // fields: A=A, B=B + // nodes: C, D + // C: fields: E=E, nodes: none + // D: fields: F=F, G=G, nodes: none + static DiffNode MY_ROOT = DiffNode.rooted("MY_ROOT"); + static DiffValue Value_A = new DiffValue("A", MY_ROOT, "A"); + static DiffValue Value_B = new DiffValue("B", MY_ROOT, "B"); + static DiffNode NODE_C = DiffNode.empty("C", MY_ROOT); + static DiffNode NODE_D = DiffNode.empty("D", MY_ROOT); + static DiffValue Value_E = new DiffValue("E", NODE_C, "E"); + static DiffValue Value_F = new DiffValue("F", NODE_D, "F"); + static DiffValue Value_G = new DiffValue("G", NODE_D, "G"); + + static { + MY_ROOT.add(Value_A); + MY_ROOT.add(Value_B); + MY_ROOT.add(NODE_C); + MY_ROOT.add(NODE_D); + NODE_C.add(Value_E); + NODE_D.add(Value_F); + NODE_D.add(Value_G); + } + + + // -------------------------------------------------------------------------------- + // + // Element testing routines + // + // -------------------------------------------------------------------------------- + + private class ElementTest extends TestDataProvider { + public DiffElement elt; + public String name; + public String fullName; + public DiffElement parent; + + private ElementTest(DiffValue elt, DiffValue parent, String name, String fullName) { + this(elt.getBinding(), parent.getBinding(), name, fullName); + } + + private ElementTest(DiffElement elt, DiffElement parent, String name, String fullName) { + super(ElementTest.class); + this.elt = elt; + this.name = name; + this.fullName = fullName; + this.parent = parent; + } + + public String toString() { + return String.format("ElementTest elt=%s name=%s fullName=%s parent=%s", + elt.toOneLineString(), name, fullName, parent.getName()); + } + } + + @DataProvider(name = "elementdata") + public Object[][] createElementData() { + new ElementTest(MY_ROOT.getBinding(), DiffElement.ROOT, "MY_ROOT", "MY_ROOT"); + new ElementTest(NODE_C, MY_ROOT, "C", "MY_ROOT.C"); + new ElementTest(NODE_D, MY_ROOT, "D", "MY_ROOT.D"); + new ElementTest(Value_A, MY_ROOT, "A", "MY_ROOT.A"); + new ElementTest(Value_B, MY_ROOT, "B", "MY_ROOT.B"); + new ElementTest(Value_E, NODE_C, "E", "MY_ROOT.C.E"); + new ElementTest(Value_F, NODE_D, "F", "MY_ROOT.D.F"); + new ElementTest(Value_G, NODE_D, "G", "MY_ROOT.D.G"); + return TestDataProvider.getTests(ElementTest.class); + } + + @Test(enabled = true, dataProvider = "elementdata") + public void testElementMethods(ElementTest test) { + Assert.assertNotNull(test.elt.getName()); + Assert.assertNotNull(test.elt.getParent()); + Assert.assertEquals(test.elt.getName(), test.name); + Assert.assertEquals(test.elt.getParent(), test.parent); + Assert.assertEquals(test.elt.fullyQualifiedName(), test.fullName); + } + + // -------------------------------------------------------------------------------- + // + // DiffValue testing routines + // + // -------------------------------------------------------------------------------- + + private class LeafTest extends TestDataProvider { + public DiffValue diffvalue; + public Object value; + + private LeafTest(DiffValue diffvalue, Object value) { + super(LeafTest.class); + this.diffvalue = diffvalue; + this.value = value; + } + + public String toString() { + return String.format("LeafTest diffvalue=%s value=%s", diffvalue.toOneLineString(), value); + } + } + + @DataProvider(name = "leafdata") + public Object[][] createLeafData() { + new LeafTest(Value_A, "A"); + new LeafTest(Value_B, "B"); + new LeafTest(Value_E, "E"); + new LeafTest(Value_F, "F"); + new LeafTest(Value_G, "G"); + return TestDataProvider.getTests(LeafTest.class); + } + + @Test(enabled = true, dataProvider = "leafdata") + public void testLeafMethods(LeafTest test) { + Assert.assertNotNull(test.diffvalue.getValue()); + Assert.assertEquals(test.diffvalue.getValue(), test.value); + } + + // -------------------------------------------------------------------------------- + // + // Node testing routines + // + // -------------------------------------------------------------------------------- + + private class NodeTest extends TestDataProvider { + public DiffNode node; + public Set fields; + public Set subnodes; + public Set allNames; + + private NodeTest(DiffNode node, List fields, List subnodes) { + super(NodeTest.class); + this.node = node; + this.fields = new HashSet(fields); + this.subnodes = new HashSet(subnodes); + this.allNames = new HashSet(fields); + allNames.addAll(subnodes); + } + + public String toString() { + return String.format("NodeTest node=%s fields=%s subnodes=%s", + node.toOneLineString(), fields, subnodes); + } + } + + @DataProvider(name = "nodedata") + public Object[][] createData1() { + new NodeTest(MY_ROOT, Arrays.asList("A", "B"), Arrays.asList("C", "D")); + new NodeTest(NODE_C, Arrays.asList("E"), Collections.emptyList()); + new NodeTest(NODE_D, Arrays.asList("F", "G"), Collections.emptyList()); + return TestDataProvider.getTests(NodeTest.class); + } + + @Test(enabled = true, dataProvider = "nodedata") + public void testNodeAccessors(NodeTest test) { + Assert.assertNotNull(test.node.getElements()); + + for ( String name : test.allNames ) { + DiffElement elt = test.node.getElement(name); + Assert.assertNotNull(elt, "Failed to find field " + elt + " in " + test.node); + Assert.assertEquals(elt.getName(), name); + Assert.assertEquals(elt.getValue().isAtomic(), test.fields.contains(name), "Failed atomic/compound expectation: " + test.node); + } + } + + // NOTE: add routines are being implicitly tested by the creation of the data structures + + @Test(enabled = true, dataProvider = "nodedata") + public void testCounts(NodeTest test) { + Assert.assertEquals(test.node.getElements().size(), test.allNames.size()); + Assert.assertEquals(test.node.getElementNames(), test.allNames); + } + + // -------------------------------------------------------------------------------- + // + // fromString testing routines + // + // -------------------------------------------------------------------------------- + + private class FromStringTest extends TestDataProvider { + public String string; + public DiffElement expected; + + private FromStringTest(String string, DiffElement expected) { + super(FromStringTest.class); + this.string = string; + this.expected = expected; + } + + public String toString() { + return String.format("FromStringTest string=%s expected=%s", string, expected.toOneLineString()); + } + } + + @DataProvider(name = "fromstringdata") + public Object[][] createFromData() { + new FromStringTest("A=A", Value_A.getBinding()); + new FromStringTest("B=B", Value_B.getBinding()); + new FromStringTest("C=(E=E)", NODE_C.getBinding()); + new FromStringTest("D=(F=F G=G)", NODE_D.getBinding()); + return TestDataProvider.getTests(FromStringTest.class); + } + + @Test(enabled = true, dataProvider = "fromstringdata") + public void parseFromString(FromStringTest test) { + logger.warn("Testing from string: " + test.string); + DiffElement elt = DiffNode.fromString(test.string); + Assert.assertEquals(elt.toOneLineString(), test.expected.toOneLineString()); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java new file mode 100644 index 000000000..77159d9c2 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class DiffObjectsIntegrationTest extends WalkerTest { + private class TestParams extends TestDataProvider { + public File master, test; + public String MD5; + + private TestParams(String master, String test, String MD5) { + super(TestParams.class); + this.master = new File(master); + this.test = new File(test); + this.MD5 = MD5; + } + + public String toString() { + return String.format("master=%s,test=%s,md5=%s", master, test, MD5); + } + } + + @DataProvider(name = "data") + public Object[][] createData() { + new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "4d9f4636de05b93c354d05011264546e"); + new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "37e6efd833b5cd6d860a9df3df9713fc"); + return TestParams.getTests(TestParams.class); + } + + @Test(enabled = true, dataProvider = "data") + public void testDiffs(TestParams params) { + WalkerTestSpec spec = new WalkerTestSpec( + "-T DiffObjects -R public/testdata/exampleFASTA.fasta " + + " -m " + params.master + + " -t " + params.test + + " -o %s", + Arrays.asList(params.MD5)); + executeTest("testDiffObjects:"+params, spec).getFirst(); + } +} + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java new file mode 100644 index 000000000..dee7bbd88 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.gatk.walkers.diffengine; + + +// the imports for unit testing. + + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.*; + +/** + * Basic unit test for DifferableReaders in reduced reads + */ +public class DiffableReaderUnitTest extends BaseTest { + DiffEngine engine; + + File vcfFile = new File(testDir + "diffTestMaster.vcf"); + File bamFile = new File(testDir + "exampleBAM.bam"); + + @BeforeClass(enabled = true) + public void createDiffEngine() { + engine = new DiffEngine(); + } + + @Test(enabled = true) + public void testPluggableDiffableReaders() { + logger.warn("testPluggableDiffableReaders"); + Map readers = engine.getReaders(); + Assert.assertNotNull(readers); + Assert.assertTrue(readers.size() > 0); + Assert.assertNotNull(readers.get("VCF")); + for ( Map.Entry e : engine.getReaders().entrySet() ) { + logger.warn("Found diffable reader: " + e.getKey()); + Assert.assertEquals(e.getValue().getName(), e.getKey()); + Assert.assertEquals(e.getValue(), engine.getReader(e.getKey())); + } + } + + private static void testLeaf(DiffNode rec, String field, Object expected) { + DiffElement value = rec.getElement(field); + Assert.assertNotNull(value, "Expected to see leaf named " + field + " in rec " + rec); + Assert.assertEquals(value.getValue().getValue(), expected, "Expected to leaf named " + field + " to have value " + expected + " in rec " + rec); + } + + @Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders") + public void testVCF1() { + logger.warn("testVCF1"); + DiffableReader vcfReader = engine.getReader("VCF"); + Assert.assertTrue(vcfReader.canRead(vcfFile)); + Assert.assertFalse(vcfReader.canRead(bamFile)); + + DiffElement diff = vcfReader.readFromFile(vcfFile, -1); + Assert.assertNotNull(diff); + + Assert.assertEquals(diff.getName(), vcfFile.getName()); + Assert.assertSame(diff.getParent(), DiffElement.ROOT); + + DiffNode node = diff.getValueAsNode(); + Assert.assertEquals(node.getElements().size(), 11); + + // chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03 + DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode(); + testLeaf(rec1, "CHROM", "chr1"); + testLeaf(rec1, "POS", 2646); + testLeaf(rec1, "ID", "rs62635284"); + testLeaf(rec1, "REF", Allele.create("G", true)); + testLeaf(rec1, "ALT", new HashSet(Arrays.asList(Allele.create("A")))); + testLeaf(rec1, "QUAL", 0.15); + testLeaf(rec1, "FILTER", Collections.emptySet()); + testLeaf(rec1, "AC", "2"); + testLeaf(rec1, "AF", "1.00"); + testLeaf(rec1, "AN", "2"); + } + + @Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders") + public void testBAM() { + logger.warn("testBAM"); + DiffableReader bamReader = engine.getReader("BAM"); + Assert.assertTrue(bamReader.canRead(bamFile)); + Assert.assertFalse(bamReader.canRead(vcfFile)); + + DiffElement diff = bamReader.readFromFile(bamFile, -1); + Assert.assertNotNull(diff); + + Assert.assertEquals(diff.getName(), bamFile.getName()); + Assert.assertSame(diff.getParent(), DiffElement.ROOT); + + DiffNode node = diff.getValueAsNode(); + Assert.assertEquals(node.getElements().size(), 33); + + // 30PPJAAXX090125:1:42:512:1817#0 99 chr1 200 0 76M = + // 255 -130 ACCCTAACCCTAACCCTAACCCTAACCATAACCCTAAGACTAACCCTAAACCTAACCCTCATAATCGAAATACAAC + // BBBBC@C?AABCBB<63>=B@>+B9-9+)2B8,+@327B5A>90((>-+''3?(/'''A)(''19('7.,**%)3: + // PG:Z:0 RG:Z:exampleBAM.bam SM:Z:exampleBAM.bam + + DiffNode rec1 = node.getElement("30PPJAAXX090125:1:42:512:1817#0_1").getValueAsNode(); + testLeaf(rec1, "NAME", "30PPJAAXX090125:1:42:512:1817#0"); + testLeaf(rec1, "FLAGS", 99); + testLeaf(rec1, "RNAME", "chr1"); + testLeaf(rec1, "POS", 200); + testLeaf(rec1, "MAPQ", 0); + testLeaf(rec1, "CIGAR", "76M"); + testLeaf(rec1, "RNEXT", "chr1"); + testLeaf(rec1, "PNEXT", 255); + testLeaf(rec1, "TLEN", -130); + testLeaf(rec1, "SEQ", "ACCCTAACCCTAACCCTAACCCTAACCATAACCCTAAGACTAACCCTAAACCTAACCCTCATAATCGAAATACAAC"); + testLeaf(rec1, "QUAL", "BBBBC@C?AABCBB<63>=B@>+B9-9+)2B8,+@327B5A>90((>-+''3?(/'''A)(''19('7.,**%)3:"); + testLeaf(rec1, "PG", "0"); + testLeaf(rec1, "RG", "exampleBAM.bam"); + testLeaf(rec1, "SM", "exampleBAM.bam"); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java new file mode 100644 index 000000000..4e4080bc7 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.gatk.walkers.diffengine; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * Basic unit test for DifferableReaders in reduced reads + */ +public class DifferenceUnitTest extends BaseTest { + // -------------------------------------------------------------------------------- + // + // testing routines + // + // -------------------------------------------------------------------------------- + + private class DifferenceTest extends TestDataProvider { + public DiffElement tree1, tree2; + public String difference; + + private DifferenceTest(String tree1, String tree2, String difference) { + this(DiffNode.fromString(tree1), DiffNode.fromString(tree2), difference); + } + + private DifferenceTest(DiffElement tree1, DiffElement tree2, String difference) { + super(DifferenceTest.class); + this.tree1 = tree1; + this.tree2 = tree2; + this.difference = difference; + } + + public String toString() { + return String.format("tree1=%s tree2=%s diff=%s", + tree1 == null ? "null" : tree1.toOneLineString(), + tree2 == null ? "null" : tree2.toOneLineString(), + difference); + } + } + + @DataProvider(name = "data") + public Object[][] createTrees() { + new DifferenceTest("A=X", "A=Y", "A:1:X!=Y"); + new DifferenceTest("A=Y", "A=X", "A:1:Y!=X"); + new DifferenceTest(DiffNode.fromString("A=X"), null, "A:1:X!=MISSING"); + new DifferenceTest(null, DiffNode.fromString("A=X"), "A:1:MISSING!=X"); + return DifferenceTest.getTests(DifferenceTest.class); + } + + @Test(enabled = true, dataProvider = "data") + public void testDiffToString(DifferenceTest test) { + logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString())); + logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString())); + logger.warn("Test expected diff : " + test.difference); + Difference diff = new Difference(test.tree1, test.tree2); + logger.warn("Observed diffs : " + diff); + Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference ); + + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 3d75fdc44..7bec67d2e 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testNoAction() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("4cc077eb3d343e6b7ba12bff86ebe347")); + Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c")); executeTest("test no action", spec); } @@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testClusteredSnps() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -window 10 -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("ada5540bb3d9b6eb8f1337ba01e90a94")); + Arrays.asList("27b13f179bb4920615dff3a32730d845")); executeTest("test clustered SNPs", spec); } @@ -32,17 +32,17 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testMasks() { WalkerTestSpec spec1 = new WalkerTestSpec( baseTestString() + " -mask foo -B:mask,VCF3 " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("b0fcac4af3526e3b2a37602ab4c0e6ae")); + Arrays.asList("578f9e774784c25871678e6464fd212b")); executeTest("test mask all", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("b64baabe905a5d197cc1ab594147d3d5")); + Arrays.asList("bfa86a674aefca1b13d341cb14ab3c4f")); executeTest("test mask some", spec2); WalkerTestSpec spec3 = new WalkerTestSpec( baseTestString() + " -mask foo -maskExtend 10 -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("0eff92fe72024d535c44b98e1e9e1993")); + Arrays.asList("5939f80d14b32d88587373532d7b90e5")); executeTest("test mask extend", spec3); } @@ -50,7 +50,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilter1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("7a40795147cbfa92941489d7239aad92")); + Arrays.asList("45219dbcfb6f81bba2ea0c35f5bfd368")); executeTest("test filter #1", spec); } @@ -58,7 +58,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilter2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("e9dd4991b1e325847c77d053dfe8ee54")); + Arrays.asList("c95845e817da7352b9b72bc9794f18fb")); executeTest("test filter #2", spec); } @@ -66,7 +66,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilterWithSeparateNames() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("9ded2cce63b8d97550079047051d80a3")); + Arrays.asList("b8cdd7f44ff1a395e0a9b06a87e1e530")); executeTest("test filter with separate names #2", spec); } @@ -74,12 +74,12 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testGenotypeFilters() { WalkerTestSpec spec1 = new WalkerTestSpec( baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("6696e3f65a62ce912230d47cdb0c129b")); + Arrays.asList("96b61e4543a73fe725e433f007260039")); executeTest("test genotype filter #1", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("26e5b4ee954c9e0b5eb044afd4b88ee9")); + Arrays.asList("6c8112ab17ce39c8022c891ae73bf38e")); executeTest("test genotype filter #2", spec2); } @@ -87,7 +87,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testDeletions() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo -B:variant,VCF " + validationDataLocation + "twoDeletions.vcf", 1, - Arrays.asList("e63b58be33c9126ad6cc55489aac539b")); + Arrays.asList("569546fd798afa0e65c5b61b440d07ac")); executeTest("test deletions", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 20fa7719f..1f23d262e 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("258e1954e6ae55c89abc6a716e19cbe0")); + Arrays.asList("c97829259463d04b0159591bb6fb44af")); executeTest("test MultiSample Pilot1", spec); } @@ -54,12 +54,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("edeb1db288a24baff59575ceedd94243")); + Arrays.asList("2b69667f4770e8c0c894066b7f27e440")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("581990130d90071b084024f4cd7caf91")); + Arrays.asList("b77fe007c2a97fcd59dfd5eef94d8b95")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -67,7 +67,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("d120db27d694a6da32367cc4fb5770fa")); + Arrays.asList("ee8a5e63ddd470726a749e69c0c20f60")); executeTest("test SingleSample Pilot2", spec); } @@ -77,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "75e5c430ed39f79f24e375037a388dc4"; + private final static String COMPRESSED_OUTPUT_MD5 = "ef31654a2b85b9b2d3bba4f4a75a17b6"; @Test public void testCompressedOutput() { @@ -107,7 +107,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "a29615dd37222a11b8dadd341b53e43c"; + String md5 = "46868a9c4134651c54535fb46b408aee"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -138,9 +138,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testCallingParameters() { HashMap e = new HashMap(); - e.put( "--min_base_quality_score 26", "93e6269e38db9bc1732555e9969e3648" ); - e.put( "--min_mapping_quality_score 26", "64be99183c100caed4aa5f8bad64c7e9" ); - e.put( "--p_nonref_model GRID_SEARCH", "0592fe33f705ad8e2f13619fcf157805" ); + e.put( "--min_base_quality_score 26", "5043c9a101e691602eb7a3f9704bdf20" ); + e.put( "--min_mapping_quality_score 26", "71a833eb8fd93ee62ae0d5a430f27940" ); + e.put( "--p_nonref_model GRID_SEARCH", "ddf443e9dcadef367476b26b4d52c134" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -153,9 +153,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameter() { HashMap e = new HashMap(); - e.put( "-sites_only", "1483e637dc0279935a7f90d136d147bb" ); - e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "adcd91bc7dae8020df8caf1a30060e98" ); - e.put( "--output_mode EMIT_ALL_SITES", "b708acc2fa40f336bcd2d0c70091e07e" ); + e.put( "-sites_only", "eaad6ceb71ab94290650a70bea5ab951" ); + e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "05bf7db8a3d19ef4a3d14772c90b732f" ); + e.put( "--output_mode EMIT_ALL_SITES", "e4b86740468d7369f0156550855586c7" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -169,12 +169,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("64be99183c100caed4aa5f8bad64c7e9")); + Arrays.asList("71a833eb8fd93ee62ae0d5a430f27940")); executeTest("test confidence 1", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("e76ca54232d02f0d92730e1affeb804e")); + Arrays.asList("79968844dc3ddecb97748c1acf2984c7")); executeTest("test confidence 2", spec2); } @@ -186,8 +186,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "18d37f7f107853b5e32c757b4e143205" ); - e.put( 1.0 / 1850, "2bcb90ce2f7542bf590f7612018fae8e" ); + e.put( 0.01, "4e878664f61d2d800146d3762303fde1" ); + e.put( 1.0 / 1850, "9204caec095ff5e63ca21a10b6fab453" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d")); + Arrays.asList("1a58ec52df545f946f80cc16c5736a91")); executeTest(String.format("test multiple technologies"), spec); } @@ -230,7 +230,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("0919ab7e513c377610e23a67d33608fa")); + Arrays.asList("62d0f6d9de344ce68ce121c13b1e78b1")); executeTest(String.format("test calling with BAQ"), spec); } @@ -244,7 +244,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq OFF", 1, - Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d")); + Arrays.asList("1a58ec52df545f946f80cc16c5736a91")); executeTest(String.format("test calling with BAQ OFF"), spec); } @@ -263,7 +263,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("cb37348c41b8181be829912730f747e1")); + Arrays.asList("631ae1f1eb6bc4c1a4136b8495250536")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -278,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("ca5b6a5fb53ae401b146cc3044f454f2")); + Arrays.asList("fd556585c79e2b892a5976668f45aa43")); executeTest(String.format("test indel caller in SLX witn low min allele count"), spec); } @@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("ca4343a4ab6d3cce94ce61d7d1910f81")); + Arrays.asList("9cd56feedd2787919e571383889fde70")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -301,14 +301,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("3f555b53e9dd14cf7cdf96c24e322364")); + Arrays.asList("315e1b78d7a403d7fcbcf0caa8c496b8")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("1b9764b783acf7822edc58e6822eef5b")); + Arrays.asList("cf89e0c54f14482a23c105b73a333d8a")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java new file mode 100644 index 000000000..21435dd7d --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public class MergeAndMatchHaplotypesIntegrationTest extends WalkerTest { + private static String mergeAndMatchHaplotypesTestDataRoot = validationDataLocation + "/MergeAndMatchHaplotypes"; + private static String fundamentalTestPBTVCF = mergeAndMatchHaplotypesTestDataRoot + "/" + "FundamentalsTest.pbt.vcf"; + private static String fundamentalTestRBPVCF = mergeAndMatchHaplotypesTestDataRoot + "/" + "FundamentalsTest.pbt.rbp.vcf"; + + @Test + public void testBasicFunctionality() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T MergeAndMatchHaplotypes", + "-R " + b37KGReference, + "-B:pbt,VCF " + fundamentalTestPBTVCF, + "-B:rbp,VCF " + fundamentalTestRBPVCF, + "-o %s" + ), + 1, + Arrays.asList("") + ); + executeTest("testBasicMergeAndMatchHaplotypesFunctionality", spec); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java index 3f87fc1a2..c88eac149 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java @@ -23,7 +23,7 @@ public class MergeMNPsIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 1) + " -L chr20:556259-756570", 1, - Arrays.asList("e312b7d3854d5b2834a370659514a813")); + Arrays.asList("7f11f7f75d1526077f0173c7ed1fc6c4")); executeTest("Merge MNP sites within genomic distance of 1 [TEST ONE]", spec); } @@ -33,7 +33,7 @@ public class MergeMNPsIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 10) + " -L chr20:556259-756570", 1, - Arrays.asList("681f50e45f1d697370d2c355df2e18bc")); + Arrays.asList("53dd312468296826bdd3c22387390c88")); executeTest("Merge MNP sites within genomic distance of 10 [TEST TWO]", spec); } @@ -43,7 +43,7 @@ public class MergeMNPsIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 100) + " -L chr20:556259-756570", 1, - Arrays.asList("0bccb0ef928a108418246bec01098083")); + Arrays.asList("e26f92d2fb9f4eaeac7f9d8ee27410ee")); executeTest("Merge MNP sites within genomic distance of 100 [TEST THREE]", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java index 009048c10..f855c1dd3 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java @@ -23,7 +23,7 @@ public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 1) + " -L chr20:556259-756570", 1, - Arrays.asList("e16f957d888054ae0518e25660295241")); + Arrays.asList("af5e1370822551c0c6f50f23447dc627")); executeTest("Merge sites within genomic distance of 1 [TEST ONE]", spec); } @@ -33,7 +33,7 @@ public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 10) + " -L chr20:556259-756570", 1, - Arrays.asList("122a482090677c7619c2105d44e00d11")); + Arrays.asList("dd8c44ae1ef059a7fe85399467e102eb")); executeTest("Merge sites within genomic distance of 10 [TEST TWO]", spec); } @@ -43,7 +43,7 @@ public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 100) + " -L chr20:556259-756570", 1, - Arrays.asList("bc6a8c8a42bb2601db98e88e9ad74748")); + Arrays.asList("f81fd72ecaa57b3215406fcea860bcc5")); executeTest("Merge sites within genomic distance of 100 [TEST THREE]", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java new file mode 100644 index 000000000..9f59adeb6 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java @@ -0,0 +1,44 @@ +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public class PhaseByTransmissionIntegrationTest extends WalkerTest { + private static String phaseByTransmissionTestDataRoot = validationDataLocation + "/PhaseByTransmission"; + private static String fundamentalTestVCF = phaseByTransmissionTestDataRoot + "/" + "FundamentalsTest.unfiltered.vcf"; + + @Test + public void testBasicFunctionalityWithoutFilters() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T PhaseByTransmission", + "-R " + b37KGReference, + "-B:variant,VCF " + fundamentalTestVCF, + "-f NA12892+NA12891=NA12878", + "-nofilters", + "-o %s" + ), + 1, + Arrays.asList("416a483e87358cdcb0b09a496e3254c0") + ); + executeTest("testBasicFunctionalityWithoutFilters", spec); + } + + @Test + public void testBasicFunctionalityWithFilters() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T PhaseByTransmission", + "-R " + b37KGReference, + "-B:variant,VCF " + fundamentalTestVCF, + "-f NA12892+NA12891=NA12878", + "-o %s" + ), + 1, + Arrays.asList("8c5db343567e90e97993912c7e541d0d") + ); + executeTest("testBasicFunctionalityWithFilters", spec); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java index 0ed16967a..1bf3e579f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java @@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:332341-382503", 1, - Arrays.asList("6020a68bbec97fcd87819c10cd4e2470")); + Arrays.asList("9568ba0b6624b97ac55a59bdee2d9150")); executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec); } @@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:1232503-1332503", 1, - Arrays.asList("712c2145df4756c9a15758865d8007b5")); + Arrays.asList("ce65194c24fe83b0ec90faa6c8e6109a")); executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec); } @@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30) + " -L chr20:332341-382503", 1, - Arrays.asList("297e0896e4761529d979f40f5ad694db")); + Arrays.asList("02d134fd544613b1e5dd7f7197fc3753")); executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec); } @@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100) + " -L chr20:332341-382503", 1, - Arrays.asList("52a17f14692d726d3b726cf0ae7f2a09")); + Arrays.asList("2f7ec9904fc054c2ba1a7db05eb29334")); executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec); } @@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10) + " -L chr20:332341-482503", 1, - Arrays.asList("af768f7958b8f4599c2374f1cc2fc613")); + Arrays.asList("da7a31725f229d1782dd3049848730aa")); executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec); } @@ -76,7 +76,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:652810-681757", 1, - Arrays.asList("3dd886672f59a47908b94136d0427bb0")); + Arrays.asList("e9d35cb88089fb0e8ae6678bfaeeac8c")); executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index b0f76229b..129161da3 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -19,9 +19,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { public void testCountCovariates1() { HashMap e = new HashMap(); e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7b5832d4b2a23b8ef2bb639eb59bfa88" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f4f8a49bb5764d2a8f61e055f64dcce4"); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9c006f8e9fb5752b1c139f5a8cc7ea88"); e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "e6f7b4ab9aa291022e0ba8b7dbe4c77e" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "570506533f079d738d70934dfe1c02cd" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "e6b98af01c5a08e4954b79ec42db6fc3" ); for ( String parallelism : Arrays.asList("", " -nt 4")) { for ( Map.Entry entry : e.entrySet() ) { @@ -53,9 +53,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { public void testTableRecalibrator1() { HashMap e = new HashMap(); e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "344d4252143df8c2cce6b568747553a5"); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "6797d7ffa4ef6c48413719ba32696ccf"); e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "2bb3374dde131791d7638031ae3b3e10" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "064c4a7bdd23974c3a9c5f924540df76" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1f9d8944b73169b367cb83b0d22e5432" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -107,7 +107,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorMaxQ70() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "344d4252143df8c2cce6b568747553a5" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -133,12 +133,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { } } - - @Test public void testCountCovariatesSolidIndelsRemoveRefBias() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "0a6cdb9611e5880ea6611205080aa267" ); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "c9ea5f995e1e2b7a5688533e678dcedc" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -164,7 +162,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorSolidIndelsRemoveRefBias() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9bc7e1ad223ba759fe5e8ddb4c07369c" ); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "993fae4270e7e1e15986f270acf247af" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -189,13 +187,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { } } - - - @Test public void testCountCovariatesVCF() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "3700eaf567e4937f442fc777a226d6ad"); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "170f0c3cc4b8d72c539136effeec9a16"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -219,7 +214,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesBED() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "6803891a3398821fc8a37e19ea8e5a00"); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "b460478d9683e827784e42bc352db8bb"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -243,7 +238,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCFPlusDBsnp() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f224c42fbc4026db973ccc91265ab5c7"); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "a3d892bd60d8f679affda3c1e3af96c1"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -268,69 +263,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { } } - @Test - public void testCountCovariatesNoReadGroups() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "c024e03f019aeceaf364fa58c8295ad8" ); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + - " -T CountCovariates" + - " -I " + bam + - " -L 1:10,000,000-10,200,000" + - " -cov ReadGroupCovariate" + - " -cov QualityScoreCovariate" + - " -cov CycleCovariate" + - " -cov DinucCovariate" + - " --default_read_group DefaultReadGroup" + - " --default_platform illumina" + - " --solid_recal_mode SET_Q_ZERO" + - " -recalFile %s", - 1, // just one output file - Arrays.asList(md5)); - List result = executeTest("testCountCovariatesNoReadGroups", spec).getFirst(); - paramsFilesNoReadGroupTest.put(bam, result.get(0).getAbsolutePath()); - } - } - - @Test - public void testTableRecalibratorNoReadGroups() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "1eefbe7ac0376fc1ed1392d85242171e" ); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - String paramsFile = paramsFilesNoReadGroupTest.get(bam); - System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile); - if ( paramsFile != null ) { - WalkerTestSpec spec = new WalkerTestSpec( - "-R " + b36KGReference + - " -T TableRecalibration" + - " -I " + bam + - " -L 1:10,100,000-10,300,000" + - " -o %s" + - " --no_pg_tag" + - " --solid_recal_mode SET_Q_ZERO" + - " --default_read_group DefaultReadGroup" + - " --default_platform illumina" + - " -recalFile " + paramsFile, - 1, // just one output file - Arrays.asList(md5)); - executeTest("testTableRecalibratorNoReadGroups", spec); - } - } - } - @Test public void testCountCovariatesNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "cfc31bb6f51436d1c3b34f62bb801dc8" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "284ccac1f8fe485e52c86333cac7c2d4" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -356,7 +292,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "83b848a16034c2fb423d1bb0f5be7784" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "c167799c2d9cab815d7c9b23337f162e" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -380,11 +316,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { } } - @Test public void testCountCovariatesFailWithoutDBSNP() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", ""); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", ""); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java deleted file mode 100755 index 850a3113e..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.sequenom; - -import org.broadinstitute.sting.WalkerTest; -import org.testng.annotations.Test; - -import java.util.Arrays; - -public class PickSequenomProbesIntegrationTest extends WalkerTest { - @Test - public void testProbes() { - String testVCF = validationDataLocation + "complexExample.vcf4"; - String testArgs = "-R " + b36KGReference + " -T PickSequenomProbes -L 1:10,000,000-11,000,000 -B:input,VCF "+testVCF+" -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("6b5409cc78960f1be855536ed89ea9dd")); - executeTest("Test probes", spec); - } - - @Test - public void testProbesUsingDbSNPMask() { - - String md5 = "46d53491af1d3aa0ee1f1e13d68b732d"; - String testVCF = validationDataLocation + "pickSeqIntegrationTest.vcf"; - - String testArgs = "-snp_mask " + validationDataLocation + "pickSeqIntegrationTest.bed -R " - + b36KGReference + " -omitWindow -nameConvention " - + "-project_id 1kgp3_s4_lf -T PickSequenomProbes -B:input,VCF "+testVCF+" -o %s"; - WalkerTestSpec spec1 = new WalkerTestSpec(testArgs, 1, Arrays.asList(md5)); - executeTest("Test probes", spec1); - - testArgs += " -nmw 1"; - WalkerTestSpec spec2 = new WalkerTestSpec(testArgs, 1, Arrays.asList(md5)); - executeTest("Test probes", spec2); - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java new file mode 100755 index 000000000..95f4ac0ae --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java @@ -0,0 +1,56 @@ +package org.broadinstitute.sting.gatk.walkers.validation; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +/** + * Created by IntelliJ IDEA. + * User: Ghost + * Date: 7/19/11 + * Time: 7:39 PM + * To change this template use File | Settings | File Templates. + */ +public class ValidationAmpliconsIntegrationTest extends WalkerTest { + + @Test(enabled=true) + public void testWikiExample() { + String siteVCF = validationDataLocation + "sites_to_validate.vcf"; + String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf"; + String intervalTable = validationDataLocation + "amplicon_interval_table1.table"; + String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s"; + testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF; + testArgs += " --virtualPrimerSize 30"; + WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, + Arrays.asList("27f9450afa132888a8994167f0035fd7")); + executeTest("Test probes", spec); + } + + @Test(enabled=true) + public void testWikiExampleNoBWA() { + String siteVCF = validationDataLocation + "sites_to_validate.vcf"; + String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf"; + String intervalTable = validationDataLocation + "amplicon_interval_table1.table"; + String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s"; + testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF; + testArgs += " --virtualPrimerSize 30 --doNotUseBWA"; + WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, + Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1")); + executeTest("Test probes", spec); + } + + @Test(enabled=true) + public void testWikiExampleMonoFilter() { + String siteVCF = validationDataLocation + "sites_to_validate.vcf"; + String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf"; + String intervalTable = validationDataLocation + "amplicon_interval_table1.table"; + String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s"; + testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF; + testArgs += " --virtualPrimerSize 30 --filterMonomorphic"; + WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, + Arrays.asList("77b3f30e38fedad812125bdf6cf3255f")); + executeTest("Test probes", spec); + } + +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index eb6a1a4c6..057053a1c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; +import org.broadinstitute.sting.MD5DB; import org.broadinstitute.sting.WalkerTest; import org.testng.annotations.Test; import org.testng.annotations.DataProvider; @@ -26,8 +27,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf", "d33212a84368e821cbedecd4f59756d6", // tranches - "a35cd067f378442eee8cd5edeea92be0", // recal file - "126d52843f4a57199ee97750ffc16a07"); // cut VCF + "4652dca41222bebdf9d9fda343b2a835", // recal file + "243a397a33a935fcaccd5deb6d16f0c0"); // cut VCF @DataProvider(name = "VRTest") public Object[][] createData1() { @@ -65,8 +66,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { " -NO_HEADER" + " -B:input,VCF " + params.inVCF + " -o %s" + - " -tranchesFile " + getFileForMD5(params.tranchesMD5) + - " -recalFile " + getFileForMD5(params.recalMD5), + " -tranchesFile " + MD5DB.getMD5FilePath(params.tranchesMD5, null) + + " -recalFile " + MD5DB.getMD5FilePath(params.recalMD5, null), Arrays.asList(params.cutVCFMD5)); executeTest("testApplyRecalibration-"+params.inVCF, spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 33a20f7b5..904a5b29b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -34,76 +34,75 @@ import java.util.Arrays; * Tests CombineVariants */ public class CombineVariantsIntegrationTest extends WalkerTest { -// public static String baseTestString(String args) { -// return "-T CombineVariants -NO_HEADER -L 1:1-50,000,000 -o %s -R " + b36KGReference + args; -// } -// -// public void test1InOut(String file, String md5, boolean vcf3) { -// test1InOut(file, md5, "", vcf3); -// } -// -// public void test1InOut(String file, String md5, String args, boolean vcf3) { -// WalkerTestSpec spec = new WalkerTestSpec( -// baseTestString(" -priority v1 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args), -// 1, -// Arrays.asList(md5)); -// executeTest("testInOut1--" + file, spec); -// } -// -// public void combine2(String file1, String file2, String args, String md5, boolean vcf3) { -// WalkerTestSpec spec = new WalkerTestSpec( -// baseTestString(" -priority v1,v2 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -B:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args), -// 1, -// Arrays.asList(md5)); -// executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); -// } -// -// public void combineSites(String args, String md5) { -// String file1 = "1000G_omni2.5.b37.sites.vcf"; -// String file2 = "hapmap_3.3.b37.sites.vcf"; -// WalkerTestSpec spec = new WalkerTestSpec( -// "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference -// + " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1 -// + " -B:hm3,VCF " + validationDataLocation + file2 + args, -// 1, -// Arrays.asList(md5)); -// executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); -// } -// -// -// @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2117fff6e0d182cd20be508e9661829c", true); } -// @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2cfaf7af3dd119df08b8a9c1f72e2f93", " -setKey foo", true); } -// @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "1474ac0fde2ce42a3c24f1c97eab333e", " -setKey null", true); } -// @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "7fc66df048a0ab08cf507906e1d4a308", false); } // official project VCF files in tabix format -// -// @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ec9715f53dbf4531570557c212822f12", false); } -// @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f1072be5f5c6ee810276d9ca6537224d", false); } -// -// @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "b77a1eec725201d9d8e74ee0c45638d3", false); } // official project VCF files in tabix format -// @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "802977fdfd2f4905b501bb06800f60af", false); } // official project VCF files in tabix format -// @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a67157287dd2b24b5cdf7ebf8fcbbe9a", false); } -// -// @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e1f4718a179f1196538a33863da04f53", false); } -// -// @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "b3783384b7c8e877b971033e90beba48", true); } -// -// @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "902e541c87caa72134db6293fc46f0ad"); } -// @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "f339ad4bb5863b58b9c919ce7d040bb9"); } -// -// @Test public void threeWayWithRefs() { -// WalkerTestSpec spec = new WalkerTestSpec( -// baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" + -// " -B:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" + -// " -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" + -// " -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" + -// " -setKey centerSet" + -// " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" + -// " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + -// " -genotypeMergeOptions UNIQUIFY -L 1"), -// 1, -// Arrays.asList("a07995587b855f3214fb71940bf23c0f")); -// executeTest("threeWayWithRefs", spec); -// } + public static String baseTestString(String args) { + return "-T CombineVariants -NO_HEADER -L 1:1-50,000,000 -o %s -R " + b36KGReference + args; + } + + public void test1InOut(String file, String md5, boolean vcf3) { + test1InOut(file, md5, "", vcf3); + } + + public void test1InOut(String file, String md5, String args, boolean vcf3) { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(" -priority v1 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args), + 1, + Arrays.asList(md5)); + executeTest("testInOut1--" + file, spec); + } + + public void combine2(String file1, String file2, String args, String md5, boolean vcf3) { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(" -priority v1,v2 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -B:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args), + 1, + Arrays.asList(md5)); + executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); + } + + public void combineSites(String args, String md5) { + String file1 = "1000G_omni2.5.b37.sites.vcf"; + String file2 = "hapmap_3.3.b37.sites.vcf"; + WalkerTestSpec spec = new WalkerTestSpec( + "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference + + " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1 + + " -B:hm3,VCF " + validationDataLocation + file2 + args, + 1, + Arrays.asList(md5)); + executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); + } + + @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); } + @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); } + @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); } + @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "c9c901ff9ef2a982624b203a8086dff0", false); } // official project VCF files in tabix format + + @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b", false); } + @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e", false); } + + @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format + @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a", false); } + + @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083", false); } + + @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); } + + @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4836086891f6cbdd40eebef3076d215a"); } + @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "6a34b5d743efda8b2f3b639f3a2f5de8"); } + + @Test public void threeWayWithRefs() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" + + " -B:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" + + " -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" + + " -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" + + " -setKey centerSet" + + " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" + + " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + + " -genotypeMergeOptions UNIQUIFY -L 1"), + 1, + Arrays.asList("1de95f91ca15d2a8856de35dee0ce33e")); + executeTest("threeWayWithRefs", spec); + } // complex examples with filtering, indels, and multiple alleles @@ -119,8 +118,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); } - @Test public void complexTestFull() { combineComplexSites("", "64b991fd3850f83614518f7d71f0532f"); } - @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "0db9ef50fe54b60426474273d7c7fa99"); } - @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "d20acb3d53ba0a02ce92d540ebeda2a9"); } - @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "8d1b3d120515f8b56b5a0d10bc5da713"); } + @Test public void complexTestFull() { combineComplexSites("", "b5a53ee92bdaacd2bb3327e9004ae058"); } + @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); } + @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f72a178137e25dbe0b931934cdc0079d"); } + @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java index d32ab6282..82c894c6f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java @@ -40,7 +40,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, - Arrays.asList("37e23efd7d6471fc0f807b31ccafe0eb")); + Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd")); executeTest("test b36 to hg19", spec); } @@ -49,7 +49,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, - Arrays.asList("b6ef4a2f026fd3843aeb9ed764a66921")); + Arrays.asList("3fd7ec2dc4064ef410786276b0dc9d08")); executeTest("test b36 to hg19, unsorted samples", spec); } @@ -58,7 +58,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T LiftoverVariants -o %s -R " + hg18Reference + " -B:variant,vcf " + validationDataLocation + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, - Arrays.asList("3275373b3c44ad14a270b50664b3f8a3")); + Arrays.asList("ab2c6254225d7e2ecf52eee604d5673b")); executeTest("test hg18 to hg19, unsorted", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index e18287a21..b5f41542e 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -18,7 +18,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' -B:variant,VCF3 " + testfile + " -NO_HEADER"), 1, - Arrays.asList("1b9d551298dc048c7d36b60440ff4d50") + Arrays.asList("d18516c1963802e92cb9e425c0b75fd6") ); executeTest("testComplexSelection--" + testfile, spec); @@ -31,7 +31,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -sn A -sn B -sn C -B:variant,VCF3 " + testfile + " -NO_HEADER"), 1, - Arrays.asList("5ba7536a0819421b330350a160e4261a") + Arrays.asList("b74038779fe6485dbb8734ae48178356") ); executeTest("testRepeatedLineSelection--" + testfile, spec); @@ -44,7 +44,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -disc myvar -L 20:1012700-1020000 -B:variant,VCF " + b37hapmapGenotypes + " -B:myvar,VCF " + testFile + " -o %s -NO_HEADER", 1, - Arrays.asList("97621ae8f29955eedfc4e0be3515fcb9") + Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e") ); executeTest("testDiscordance--" + testFile, spec); @@ -57,7 +57,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -conc hapmap -L 20:1012700-1020000 -B:hapmap,VCF " + b37hapmapGenotypes + " -B:variant,VCF " + testFile + " -o %s -NO_HEADER", 1, - Arrays.asList("a0ae016fdffcbe7bfb99fd3dbc311407") + Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a") ); executeTest("testConcordance--" + testFile, spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java index cf0673ee6..d7efe4212 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java @@ -60,7 +60,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest { " --NO_HEADER" + " -o %s", 1, - Arrays.asList("debbbf3e661b6857cc8d99ff7635bb1d") + Arrays.asList("658f580f7a294fd334bd897102616fed") ); executeTest("testSimpleVCFStreaming", spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java old mode 100644 new mode 100755 index 72647c8e1..1db712353 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java @@ -44,7 +44,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest { @Test(enabled = true) public void testComplexVariantsToTable() { WalkerTestSpec spec = new WalkerTestSpec(variantsToTableCmd(" -AMD"), - Arrays.asList("b2a3712c1bfad8f1383ffada8b5017ba")); + Arrays.asList("e8f771995127b727fb433da91dd4ee98")); executeTest("testComplexVariantsToTable", spec).getFirst(); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java index 64d0db14b..8c96c1e11 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java @@ -20,7 +20,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testVariantsToVCFUsingGeliInput() { List md5 = new ArrayList(); - md5.add("bd15d98adc76b5798e3bbeff3f936feb"); + md5.add("4accae035d271b35ee2ec58f403c68c6"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + @@ -38,7 +38,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testGenotypesToVCFUsingGeliInput() { List md5 = new ArrayList(); - md5.add("acd15d3f85bff5b545bc353e0e23cc6e"); + md5.add("71e8c98d7c3a73b6287ecc339086fe03"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + @@ -56,7 +56,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testGenotypesToVCFUsingHapMapInput() { List md5 = new ArrayList(); - md5.add("6f34528569f8cf5941cb365fa77288c1"); + md5.add("f343085305e80c7a2493422e4eaad983"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + @@ -73,7 +73,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testGenotypesToVCFUsingVCFInput() { List md5 = new ArrayList(); - md5.add("d8316fc1b9d8e954a58940354119a32e"); + md5.add("86f02e2e764ba35854cff2aa05a1fdd8"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + diff --git a/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java b/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java index aa6303a6f..77db34cbc 100644 --- a/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java @@ -34,7 +34,6 @@ import org.testng.annotations.Test; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.jna.lsf.v7_0_6.LibBat.*; -import javax.jws.soap.SOAPBinding; import java.io.File; /** @@ -55,25 +54,25 @@ public class LibBatIntegrationTest extends BaseTest { @Test public void testReadConfEnv() { - LibLsf.config_param[] unitsParam = (LibLsf.config_param[]) new LibLsf.config_param().toArray(4); + LibLsf.config_param[] configParams = (LibLsf.config_param[]) new LibLsf.config_param().toArray(4); - unitsParam[0].paramName = "LSF_UNIT_FOR_LIMITS"; - unitsParam[1].paramName = "LSF_CONFDIR"; - unitsParam[2].paramName = "MADE_UP_PARAMETER"; + configParams[0].paramName = "LSF_UNIT_FOR_LIMITS"; + configParams[1].paramName = "LSF_CONFDIR"; + configParams[2].paramName = "MADE_UP_PARAMETER"; - Structure.autoWrite(unitsParam); + Structure.autoWrite(configParams); - if (LibLsf.ls_readconfenv(unitsParam[0], null) != 0) { + if (LibLsf.ls_readconfenv(configParams[0], null) != 0) { Assert.fail(LibLsf.ls_sysmsg()); } - Structure.autoRead(unitsParam); + Structure.autoRead(configParams); - System.out.println("LSF_UNIT_FOR_LIMITS: " + unitsParam[0].paramValue); - Assert.assertNotNull(unitsParam[1].paramValue); - Assert.assertNull(unitsParam[2].paramValue); - Assert.assertNull(unitsParam[3].paramName); - Assert.assertNull(unitsParam[3].paramValue); + System.out.println("LSF_UNIT_FOR_LIMITS: " + configParams[0].paramValue); + Assert.assertNotNull(configParams[1].paramValue); + Assert.assertNull(configParams[2].paramValue); + Assert.assertNull(configParams[3].paramName); + Assert.assertNull(configParams[3].paramValue); } @Test diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java index 2f6b589f4..68a2ecf8d 100755 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java @@ -4,6 +4,7 @@ import org.broad.tribble.Tribble; import org.broad.tribble.index.*; import org.broad.tribble.iterators.CloseableTribbleIterator; import org.broad.tribble.source.BasicFeatureSource; +import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.testng.Assert; import org.testng.annotations.Test; @@ -75,7 +76,7 @@ public class IndexFactoryUnitTest { // test that the input index is the same as the one created from the identical input file // test that the dynamic index is the same as the output index, which is equal to the input index - Assert.assertTrue(IndexFactory.onDiskIndexEqualToNewlyCreatedIndex(outputFile, outputFileIndex, new VCFCodec())); + WalkerTest.assertOnDiskIndexEqualToNewlyCreatedIndex(outputFileIndex, "unittest", outputFile); } } } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java new file mode 100644 index 000000000..32ff25c7b --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.utils.codecs.vcf; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.Arrays; +import java.util.List; + +public class VCFIntegrationTest extends WalkerTest { + + @Test + public void testReadingAndWritingWitHNoChanges() { + + String md5ofInputVCF = "a990ba187a69ca44cb9bc2bb44d00447"; + String testVCF = validationDataLocation + "vcf4.1.example.vcf"; + + String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s "; + + String test1 = baseCommand + "-T VariantAnnotator -BTI variant -B:variant,vcf " + testVCF; + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(md5ofInputVCF)); + List result = executeTest("Test Variant Annotator with no changes", spec1).getFirst(); + + String test2 = baseCommand + "-T VariantsToVCF -B:variant,vcf " + result.get(0).getAbsolutePath(); + WalkerTestSpec spec2 = new WalkerTestSpec(test2, 1, Arrays.asList(md5ofInputVCF)); + executeTest("Test Variants To VCF from new output", spec2); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java index c4ca6a551..14e63191d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java @@ -2,15 +2,16 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.broad.tribble.readers.AsciiLineReader; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.testng.annotations.Test; -import java.io.File; -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringBufferInputStream; +import java.io.*; +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; /** * Created by IntelliJ IDEA. @@ -40,6 +41,52 @@ public class VCFHeaderUnitTest extends BaseTest { checkMD5ofHeaderFile(header, "ad8c4cf85e868b0261ab49ee2c613088"); } + /** + * a little utility function for all tests to md5sum a file + * Shameless taken from: + * + * http://www.javalobby.org/java/forums/t84420.html + * + * @param file the file + * @return a string + */ + private static String md5SumFile(File file) { + MessageDigest digest; + try { + digest = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + throw new ReviewedStingException("Unable to find MD5 digest"); + } + InputStream is; + try { + is = new FileInputStream(file); + } catch (FileNotFoundException e) { + throw new ReviewedStingException("Unable to open file " + file); + } + byte[] buffer = new byte[8192]; + int read; + try { + while ((read = is.read(buffer)) > 0) { + digest.update(buffer, 0, read); + } + byte[] md5sum = digest.digest(); + BigInteger bigInt = new BigInteger(1, md5sum); + return bigInt.toString(16); + + } + catch (IOException e) { + throw new ReviewedStingException("Unable to process file for MD5", e); + } + finally { + try { + is.close(); + } + catch (IOException e) { + throw new ReviewedStingException("Unable to close input stream for MD5 calculation", e); + } + } + } + private void checkMD5ofHeaderFile(VCFHeader header, String md5sum) { File myTempFile = null; PrintWriter pw = null; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index 5d42f8d0c..a344817a0 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -49,7 +49,7 @@ public class VariantContextIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s", 2, // just one output file - Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "e6673737acbb6bfabfcd92c4b2268241")); + Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "ff91731213fd0bbdc200ab6fd1c93e63")); executeTest("testToVCF", spec); } diff --git a/public/packages/PicardPrivate.xml b/public/packages/PicardPrivate.xml index 110b41d3f..581c47979 100644 --- a/public/packages/PicardPrivate.xml +++ b/public/packages/PicardPrivate.xml @@ -7,6 +7,8 @@ + + diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index b64ba8952..1f4f79993 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -201,9 +201,6 @@ class DataProcessingPipeline extends QScript { } - - - /**************************************************************************** * Main script ****************************************************************************/ diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 150d78019..934cf2a3c 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -15,8 +15,8 @@ class GATKResourcesBundle extends QScript { @Argument(doc="liftOverPerl", required=false) var liftOverPerl: File = new File("./perl/liftOverVCF.pl") - @Argument(shortName = "svn", doc="The SVN version of this release", required=true) - var SVN_VERSION: String = _ + @Argument(shortName = "ver", doc="The SVN version of this release", required=true) + var VERSION: String = _ @Argument(shortName = "bundleDir", doc="Path to root where resource files will be placed", required=false) val BUNDLE_ROOT = new File("/humgen/gsa-hpprojects/GATK/bundle") @@ -32,8 +32,8 @@ class GATKResourcesBundle extends QScript { val SITES_EXT: String = "sites" - def BUNDLE_DIR: File = BUNDLE_ROOT + "/" + SVN_VERSION - def DOWNLOAD_DIR: File = DOWNLOAD_ROOT + "/" + SVN_VERSION + def BUNDLE_DIR: File = BUNDLE_ROOT + "/" + VERSION + def DOWNLOAD_DIR: File = DOWNLOAD_ROOT + "/" + VERSION // REFERENCES class Reference( val name: String, val file: File ) { } @@ -113,6 +113,12 @@ class GATKResourcesBundle extends QScript { addResource(new Resource(hg19.file, "", hg19, false)) addResource(new Resource(hg18.file, "", hg18, false)) + // + // The b37_decoy reference + // + addResource(new Resource("/humgen/1kg/reference/human_g1k_v37_decoy.fasta", + "IGNORE", b37, false, false)) + // // standard VCF files. Will be lifted to each reference // diff --git a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala index 71970a36b..05c1a1775 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala @@ -45,7 +45,7 @@ class QSettings { var jobPriority: Option[Int] = None @Argument(fullName="default_memory_limit", shortName="memLimit", doc="Default memory limit for jobs, in gigabytes.", required=false) - var memoryLimit: Option[Int] = None + var memoryLimit: Option[Double] = None @Argument(fullName="run_directory", shortName="runDir", doc="Root directory to run functions from.", required=false) var runDirectory = new File(".") diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/CommandLineJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/CommandLineJobRunner.scala index 2fbfab5ec..2e3108136 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/CommandLineJobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/CommandLineJobRunner.scala @@ -33,12 +33,29 @@ import org.broadinstitute.sting.queue.util.{Logging, IOUtils} */ trait CommandLineJobRunner extends JobRunner[CommandLineFunction] with Logging { + /** The string representation of the identifier of the running job. */ + def jobIdString: String = null + /** A generated exec shell script. */ protected var jobScript: File = _ /** Which directory to use for the job status files. */ protected def jobStatusDir = function.jobTempDir + /** Amount of time a job can go without status before giving up. */ + private val unknownStatusMaxSeconds = 5 * 60 + + /** Last known status */ + protected var lastStatus: RunnerStatus.Value = _ + + /** The last time the status was updated */ + protected var lastStatusUpdate: Long = _ + + final override def status = this.lastStatus + + def residentRequestMB: Option[Double] = function.memoryLimit.map(_ * 1024) + def residentLimitMB: Option[Double] = residentRequestMB.map( _ * 1.2 ) + override def init() { super.init() var exec = new StringBuilder @@ -53,7 +70,21 @@ trait CommandLineJobRunner extends JobRunner[CommandLineFunction] with Logging { } exec.append(function.commandLine) - this.jobScript = IOUtils.writeTempFile(exec.toString, ".exec", "", jobStatusDir) + this.jobScript = IOUtils.writeTempFile(exec.toString(), ".exec", "", jobStatusDir) + } + + protected def updateStatus(updatedStatus: RunnerStatus.Value) { + this.lastStatus = updatedStatus + this.lastStatusUpdate = System.currentTimeMillis + } + + override def checkUnknownStatus() { + val unknownStatusMillis = (System.currentTimeMillis - lastStatusUpdate) + if (unknownStatusMillis > (unknownStatusMaxSeconds * 1000L)) { + // Unknown status has been returned for a while now. + updateStatus(RunnerStatus.FAILED) + logger.error("Unable to read status for %0.2f minutes: job id %d: %s".format(unknownStatusMillis/(60 * 1000D), jobIdString, function.description)) + } } override def cleanup() { diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/JobManager.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/JobManager.scala index d2be4939a..30187f7e2 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/JobManager.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/JobManager.scala @@ -44,9 +44,9 @@ trait JobManager[TFunction <: QFunction, TRunner <: JobRunner[TFunction]] { /** * Updates the status on a list of functions. * @param runners Runners to update. + * @return runners which were updated. */ - def updateStatus(runners: Set[TRunner]) { - } + def updateStatus(runners: Set[TRunner]): Set[TRunner] = Set.empty /** * Stops a list of functions. diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala index 4b4d44988..de5fbde05 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala @@ -52,6 +52,11 @@ trait JobRunner[TFunction <: QFunction] { */ def status: RunnerStatus.Value + /** + * Checks if the status has been unknown for an extended period of time. + */ + def checkUnknownStatus() {} + /** * Returns the function to be run. */ diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala index bfcc4d48c..a52e9c561 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala @@ -138,30 +138,32 @@ class QGraph extends Logging { validate() if (running && numMissingValues == 0) { - logger.info("Generating scatter gather jobs.") val scatterGathers = jobGraph.edgeSet.filter(edge => scatterGatherable(edge)) + if (!scatterGathers.isEmpty) { + logger.info("Generating scatter gather jobs.") - var addedFunctions = List.empty[QFunction] - for (scatterGather <- scatterGathers) { - val functions = scatterGather.asInstanceOf[FunctionEdge] - .function.asInstanceOf[ScatterGatherableFunction] - .generateFunctions() - addedFunctions ++= functions + var addedFunctions = List.empty[QFunction] + for (scatterGather <- scatterGathers) { + val functions = scatterGather.asInstanceOf[FunctionEdge] + .function.asInstanceOf[ScatterGatherableFunction] + .generateFunctions() + addedFunctions ++= functions + } + + logger.info("Removing original jobs.") + this.jobGraph.removeAllEdges(scatterGathers) + prune() + + logger.info("Adding scatter gather jobs.") + addedFunctions.foreach(function => if (running) this.add(function)) + + logger.info("Regenerating graph.") + fill + val scatterGatherDotFile = if (settings.expandedDotFile != null) settings.expandedDotFile else settings.dotFile + if (scatterGatherDotFile != null) + renderToDot(scatterGatherDotFile) + validate() } - - logger.info("Removing original jobs.") - this.jobGraph.removeAllEdges(scatterGathers) - prune() - - logger.info("Adding scatter gather jobs.") - addedFunctions.foreach(function => if (running) this.add(function)) - - logger.info("Regenerating graph.") - fill - val scatterGatherDotFile = if (settings.expandedDotFile != null) settings.expandedDotFile else settings.dotFile - if (scatterGatherDotFile != null) - renderToDot(scatterGatherDotFile) - validate() } } @@ -1003,7 +1005,10 @@ class QGraph extends Logging { .asInstanceOf[Set[JobRunner[QFunction]]] if (managerRunners.size > 0) try { - manager.updateStatus(managerRunners) + val updatedRunners = manager.updateStatus(managerRunners) + for (runner <- managerRunners.diff(updatedRunners)) { + runner.checkUnknownStatus() + } } catch { case e => /* ignore */ } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala index 82edf6221..8c639b5bb 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala @@ -40,12 +40,7 @@ class GridEngineJobRunner(val function: CommandLineFunction) extends CommandLine /** Job Id of the currently executing job. */ private var jobId: String = _ - - /** Last known status */ - private var lastStatus: RunnerStatus.Value = _ - - /** The last time the status was updated */ - protected var lastStatusUpdate: Long = _ + override def jobIdString = jobId def start() { GridEngineJobRunner.gridEngineSession.synchronized { @@ -82,11 +77,14 @@ class GridEngineJobRunner(val function: CommandLineFunction) extends CommandLine nativeSpecString += " -q " + function.jobQueue } - // If the memory limit is set (GB) specify the memory limit - if (function.memoryLimit.isDefined) { - val memAvl: String = function.memoryLimit.get + "G" - val memMax: String = (function.memoryLimit.get * 1.2 * 1024).ceil.toInt + "M" - nativeSpecString += " -l mem_free=" + memAvl + ",h_rss=" + memMax + // If the resident set size is requested pass on the memory request + if (residentRequestMB.isDefined) { + nativeSpecString += " -l mem_free=%dM".format(residentRequestMB.get.ceil.toInt) + } + + // If the resident set size limit is defined specify the memory limit + if (residentLimitMB.isDefined) { + nativeSpecString += " -l h_rss=%dM".format(residentLimitMB.get.ceil.toInt) } // If the priority is set (user specified Int) specify the priority @@ -121,21 +119,11 @@ class GridEngineJobRunner(val function: CommandLineFunction) extends CommandLine logger.info("Submitted Grid Engine job id: " + jobId) } } - - def status = this.lastStatus - - private def updateStatus(updatedStatus: RunnerStatus.Value) { - this.lastStatus = updatedStatus - this.lastStatusUpdate = System.currentTimeMillis - } } object GridEngineJobRunner extends Logging { private val gridEngineSession = SessionFactory.getFactory.getSession - /** Amount of time a job can go without status before giving up. */ - private val unknownStatusMaxSeconds = 5 * 60 - initGridEngine() /** @@ -156,16 +144,14 @@ object GridEngineJobRunner extends Logging { /** * Updates the status of a list of jobs. * @param runners Runners to update. + * @return runners which were updated. */ - def updateStatus(runners: Set[GridEngineJobRunner]) { + def updateStatus(runners: Set[GridEngineJobRunner]) = { var updatedRunners = Set.empty[GridEngineJobRunner] gridEngineSession.synchronized { runners.foreach(runner => if (updateRunnerStatus(runner)) {updatedRunners += runner}) } - - for (runner <- runners.diff(updatedRunners)) { - checkUnknownStatus(runner) - } + updatedRunners } /** @@ -219,20 +205,11 @@ object GridEngineJobRunner extends Logging { logger.warn("Unable to determine status of Grid Engine job id " + runner.jobId, de) } - Option(returnStatus) match { - case Some(returnStatus) => - runner.updateStatus(returnStatus) - return true - case None => return false - } - } - - private def checkUnknownStatus(runner: GridEngineJobRunner) { - val unknownStatusSeconds = (System.currentTimeMillis - runner.lastStatusUpdate) - if (unknownStatusSeconds > (unknownStatusMaxSeconds * 1000L)) { - // Unknown status has been returned for a while now. - runner.updateStatus(RunnerStatus.FAILED) - logger.error("Unable to read Grid Engine status for %d minutes: job id %d: %s".format(unknownStatusSeconds/60, runner.jobId, runner.function.description)) + if (returnStatus != null) { + runner.updateStatus(returnStatus) + true + } else { + false } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobManager.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobManager.scala index c0fff9125..23ddab619 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobManager.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobManager.scala @@ -34,6 +34,6 @@ class Lsf706JobManager extends CommandLineJobManager[Lsf706JobRunner] { def runnerType = classOf[Lsf706JobRunner] def create(function: CommandLineFunction) = new Lsf706JobRunner(function) - override def updateStatus(runners: Set[Lsf706JobRunner]) { Lsf706JobRunner.updateStatus(runners) } + override def updateStatus(runners: Set[Lsf706JobRunner]) = { Lsf706JobRunner.updateStatus(runners) } override def tryStop(runners: Set[Lsf706JobRunner]) { Lsf706JobRunner.tryStop(runners) } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala index 57d133dfe..46dd08332 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala @@ -32,8 +32,8 @@ import org.broadinstitute.sting.utils.Utils import org.broadinstitute.sting.jna.clibrary.LibC import org.broadinstitute.sting.jna.lsf.v7_0_6.LibBat.{submitReply, submit} import com.sun.jna.ptr.IntByReference -import com.sun.jna.{StringArray, NativeLong} import org.broadinstitute.sting.queue.engine.{RunnerStatus, CommandLineJobRunner} +import com.sun.jna.{Structure, StringArray, NativeLong} /** * Runs jobs on an LSF compute cluster. @@ -45,12 +45,7 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR /** Job Id of the currently executing job. */ private var jobId = -1L - - /** Last known status */ - private var lastStatus: RunnerStatus.Value = _ - - /** The last time the status was updated */ - protected var lastStatusUpdate: Long = _ + override def jobIdString = jobId.toString /** * Dispatches the function on the LSF cluster. @@ -85,12 +80,19 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR request.options |= LibBat.SUB_QUEUE } - // If the memory limit is set (GB) specify the memory limit - if (function.memoryLimit.isDefined) { - request.resReq = "rusage[mem=" + function.memoryLimit.get + "]" + // If the resident set size is requested pass on the memory request + if (residentRequestMB.isDefined) { + val memInUnits = Lsf706JobRunner.convertUnits(residentRequestMB.get) + request.resReq = "select[mem>%1$d] rusage[mem=%1$d]".format(memInUnits) request.options |= LibBat.SUB_RES_REQ } + // If the resident set size limit is defined specify the memory limit + if (residentLimitMB.isDefined) { + val memInUnits = Lsf706JobRunner.convertUnits(residentLimitMB.get) + request.rLimits(LibLsf.LSF_RLIMIT_RSS) = memInUnits + } + // If the priority is set (user specified Int) specify the priority if (function.jobPriority.isDefined) { request.userPriority = function.jobPriority.get @@ -122,11 +124,13 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR } } - def status = this.lastStatus - - private def updateStatus(updatedStatus: RunnerStatus.Value) { - this.lastStatus = updatedStatus - this.lastStatusUpdate = System.currentTimeMillis + override def checkUnknownStatus() { + // TODO: Need a second pass through either of the two archive logs using lsb_geteventrecbyline() for disappeared jobs. + // Can also tell if we wake up and the last time we saw status was greater than lsb_parameterinfo().cleanPeriod + // LSB_SHAREDIR/cluster_name/logdir/lsb.acct (man bacct) + // LSB_SHAREDIR/cluster_name/logdir/lsb.events (man bhist) + logger.debug("Job Id %s status / exitStatus / exitInfo: ??? / ??? / ???".format(jobId)) + super.checkUnknownStatus() } } @@ -137,17 +141,8 @@ object Lsf706JobRunner extends Logging { /** Number of seconds for a non-normal exit status before we give up on expecting LSF to retry the function. */ private val retryExpiredSeconds = 5 * 60 - /** Amount of time a job can go without status before giving up. */ - private val unknownStatusMaxSeconds = 5 * 60 - initLsf() - /** The name of the default queue. */ - private var defaultQueue: String = _ - - /** The run limits for each queue. */ - private var queueRlimitRun = Map.empty[String,Int] - /** * Initialize the Lsf library. */ @@ -161,8 +156,9 @@ object Lsf706JobRunner extends Logging { /** * Bulk updates job statuses. * @param runners Runners to update. + * @return runners which were updated. */ - def updateStatus(runners: Set[Lsf706JobRunner]) { + def updateStatus(runners: Set[Lsf706JobRunner]) = { var updatedRunners = Set.empty[Lsf706JobRunner] Lsf706JobRunner.lsfLibLock.synchronized { @@ -192,70 +188,7 @@ object Lsf706JobRunner extends Logging { } } - for (runner <- runners.diff(updatedRunners)) { - checkUnknownStatus(runner) - } - } - - /** - * Tries to stop any running jobs. - * @param runners Runners to stop. - */ - def tryStop(runners: Set[Lsf706JobRunner]) { - lsfLibLock.synchronized { - // lsb_killbulkjobs does not seem to forward SIGTERM, - // only SIGKILL, so send the Ctrl-C (SIGTERM) one by one. - for (runner <- runners.filterNot(_.jobId < 0)) { - try { - if (LibBat.lsb_signaljob(runner.jobId, SIGTERM) < 0) - logger.error(LibBat.lsb_sperror("Unable to kill job " + runner.jobId)) - } catch { - case e => - logger.error("Unable to kill job " + runner.jobId, e) - } - } - } - } - - - /** - * Returns the run limit in seconds for the queue. - * If the queue name is null returns the length of the default queue. - * @param queue Name of the queue or null for the default queue. - * @return the run limit in seconds for the queue. - */ - private def getRlimitRun(queue: String) = { - lsfLibLock.synchronized { - if (queue == null) { - if (defaultQueue != null) { - queueRlimitRun(defaultQueue) - } else { - // Get the info on the default queue. - val numQueues = new IntByReference(1) - val queueInfo = LibBat.lsb_queueinfo(null, numQueues, null, null, 0) - if (queueInfo == null) - throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for the default queue")) - defaultQueue = queueInfo.queue - val limit = queueInfo.rLimits(LibLsf.LSF_RLIMIT_RUN) - queueRlimitRun += defaultQueue -> limit - limit - } - } else { - queueRlimitRun.get(queue) match { - case Some(limit) => limit - case None => - // Cache miss. Go get the run limits from LSF. - val queues = new StringArray(Array[String](queue)) - val numQueues = new IntByReference(1) - val queueInfo = LibBat.lsb_queueinfo(queues, numQueues, null, null, 0) - if (queueInfo == null) - throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for queue: " + queue)) - val limit = queueInfo.rLimits(LibLsf.LSF_RLIMIT_RUN) - queueRlimitRun += queue -> limit - limit - } - } - } + updatedRunners } private def updateRunnerStatus(runner: Lsf706JobRunner, jobInfo: LibBat.jobInfoEnt) { @@ -280,20 +213,6 @@ object Lsf706JobRunner extends Logging { ) } - private def checkUnknownStatus(runner: Lsf706JobRunner) { - // TODO: Need a second pass through either of the two archive logs using lsb_geteventrecbyline() for disappeared jobs. - // Can also tell if we wake up and the last time we saw status was greater than lsb_parameterinfo().cleanPeriod - // LSB_SHAREDIR/cluster_name/logdir/lsb.acct (man bacct) - // LSB_SHAREDIR/cluster_name/logdir/lsb.events (man bhist) - logger.debug("Job Id %s status / exitStatus / exitInfo: ??? / ??? / ???".format(runner.jobId)) - val unknownStatusSeconds = (System.currentTimeMillis - runner.lastStatusUpdate) - if (unknownStatusSeconds > (unknownStatusMaxSeconds * 1000L)) { - // Unknown status has been returned for a while now. - runner.updateStatus(RunnerStatus.FAILED) - logger.error("Unable to read LSF status for %d minutes: job id %d: %s".format(unknownStatusSeconds/60, runner.jobId, runner.function.description)) - } - } - /** * Returns true if LSF is expected to retry running the function. * @param exitInfo The reason the job exited. @@ -309,4 +228,86 @@ object Lsf706JobRunner extends Logging { } } } + + /** + * Tries to stop any running jobs. + * @param runners Runners to stop. + */ + def tryStop(runners: Set[Lsf706JobRunner]) { + lsfLibLock.synchronized { + // lsb_killbulkjobs does not seem to forward SIGTERM, + // only SIGKILL, so send the Ctrl-C (SIGTERM) one by one. + for (runner <- runners.filterNot(_.jobId < 0)) { + try { + if (LibBat.lsb_signaljob(runner.jobId, SIGTERM) < 0) + logger.error(LibBat.lsb_sperror("Unable to kill job " + runner.jobId)) + } catch { + case e => + logger.error("Unable to kill job " + runner.jobId, e) + } + } + } + } + + /** The name of the default queue. */ + private lazy val defaultQueue: String = { + lsfLibLock.synchronized { + val numQueues = new IntByReference(1) + val queueInfo = LibBat.lsb_queueinfo(null, numQueues, null, null, 0) + if (queueInfo == null) + throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for the default queue")) + queueInfo.queue + } + } + + /** The run limits for each queue. */ + private var queueRlimitRun = Map.empty[String,Int] + + /** + * Returns the run limit in seconds for the queue. + * If the queue name is null returns the length of the default queue. + * @param queue Name of the queue or null for the default queue. + * @return the run limit in seconds for the queue. + */ + private def getRlimitRun(queueName: String) = { + lsfLibLock.synchronized { + val queue = if (queueName == null) defaultQueue else queueName + queueRlimitRun.get(queue) match { + case Some(limit) => limit + case None => + // Cache miss. Go get the run limits from LSF. + val queues = new StringArray(Array(queue)) + val numQueues = new IntByReference(1) + val queueInfo = LibBat.lsb_queueinfo(queues, numQueues, null, null, 0) + if (queueInfo == null) + throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for queue: " + queue)) + val limit = queueInfo.rLimits(LibLsf.LSF_RLIMIT_RUN) + queueRlimitRun += queue -> limit + limit + } + } + } + + private lazy val unitDivisor: Double = { + lsfLibLock.synchronized { + val unitsParam: Array[LibLsf.config_param] = new LibLsf.config_param().toArray(2).asInstanceOf[Array[LibLsf.config_param]] + unitsParam(0).paramName = "LSF_UNIT_FOR_LIMITS" + + Structure.autoWrite(unitsParam.asInstanceOf[Array[Structure]]) + if (LibLsf.ls_readconfenv(unitsParam(0), null) != 0) + throw new QException(LibBat.lsb_sperror("ls_readconfenv() failed")) + Structure.autoRead(unitsParam.asInstanceOf[Array[Structure]]) + + unitsParam(0).paramValue match { + case "MB" => 1D + case "GB" => 1024D + case "TB" => 1024D * 1024 + case "PB" => 1024D * 1024 * 1024 + case "EB" => 1024D * 1024 * 1024 * 1024 + case null => 1D + } + } + } + + private def convertUnits(mb: Double) = (mb / unitDivisor).ceil.toInt } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala index 603511a30..128d8773c 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala @@ -50,10 +50,10 @@ class ShellJobRunner(val function: CommandLineFunction) extends CommandLineJobRu // Allow advanced users to update the job. updateJobRun(job) - runStatus = RunnerStatus.RUNNING + updateStatus(RunnerStatus.RUNNING) job.run() - runStatus = RunnerStatus.DONE + updateStatus(RunnerStatus.FAILED) } - def status = runStatus + override def checkUnknownStatus() {} } diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala index 2b1abb2d0..c62fdcd7c 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala @@ -9,7 +9,7 @@ trait CommandLineFunction extends QFunction with Logging { def commandLine: String /** Upper memory limit */ - var memoryLimit: Option[Int] = None + var memoryLimit: Option[Double] = None /** Job project to run the command */ var jobProject: String = _ @@ -56,7 +56,7 @@ trait CommandLineFunction extends QFunction with Logging { if (memoryLimit.isEmpty) memoryLimit = qSettings.memoryLimit - super.freezeFieldValues + super.freezeFieldValues() } /** diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala index 72445442e..e8279f62b 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala @@ -47,7 +47,7 @@ trait JavaCommandLineFunction extends CommandLineFunction { /** * Memory limit for the java executable, or if None will use the default memoryLimit. */ - var javaMemoryLimit: Option[Int] = None + var javaMemoryLimit: Option[Double] = None /** * Returns the java executable to run. @@ -61,8 +61,8 @@ trait JavaCommandLineFunction extends CommandLineFunction { null } - override def freezeFieldValues = { - super.freezeFieldValues + override def freezeFieldValues() { + super.freezeFieldValues() if (javaMemoryLimit.isEmpty && memoryLimit.isDefined) javaMemoryLimit = memoryLimit @@ -72,7 +72,7 @@ trait JavaCommandLineFunction extends CommandLineFunction { } def javaOpts = "%s -Djava.io.tmpdir=%s" - .format(optional(" -Xmx", javaMemoryLimit, "g"), jobTempDir) + .format(optional(" -Xmx", javaMemoryLimit.map(gb => (gb * 1024).ceil.toInt), "m"), jobTempDir) def commandLine = "java%s %s" .format(javaOpts, javaExecutable) diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala index dc3cfd9d4..c2c956118 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala @@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.CommandLineProgram import java.util.Date import java.text.SimpleDateFormat import org.broadinstitute.sting.BaseTest +import org.broadinstitute.sting.MD5DB import org.broadinstitute.sting.queue.QCommandLine import org.broadinstitute.sting.queue.util.{Logging, ProcessController} import java.io.{FileNotFoundException, File} @@ -105,7 +106,7 @@ object PipelineTest extends BaseTest with Logging { private def assertMatchingMD5s(name: String, fileMD5s: Traversable[(File, String)], parameterize: Boolean) { var failed = 0 for ((file, expectedMD5) <- fileMD5s) { - val calculatedMD5 = BaseTest.testFileMD5(name, file, expectedMD5, parameterize) + val calculatedMD5 = MD5DB.testFileMD5(name, file, expectedMD5, parameterize) if (!parameterize && expectedMD5 != "" && expectedMD5 != calculatedMD5) failed += 1 } diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala index 0871e769b..7c76823da 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala @@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} class HelloWorldPipelineTest { @Test - def testHelloWorld { + def testHelloWorld() { val spec = new PipelineTestSpec spec.name = "HelloWorld" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" @@ -37,15 +37,23 @@ class HelloWorldPipelineTest { } @Test - def testHelloWorldWithPrefix { + def testHelloWorldWithPrefix() { val spec = new PipelineTestSpec spec.name = "HelloWorldWithPrefix" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala -jobPrefix HelloWorld" PipelineTest.executeTest(spec) } + @Test + def testHelloWorldWithMemoryLimit() { + val spec = new PipelineTestSpec + spec.name = "HelloWorldWithPrefix" + spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala -memLimit 1.25" + PipelineTest.executeTest(spec) + } + @Test(enabled=false) - def testHelloWorldWithPriority { + def testHelloWorldWithPriority() { val spec = new PipelineTestSpec spec.name = "HelloWorldWithPriority" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala -jobPriority 100" diff --git a/public/testdata/diffTestMaster.vcf b/public/testdata/diffTestMaster.vcf new file mode 100644 index 000000000..549f54345 --- /dev/null +++ b/public/testdata/diffTestMaster.vcf @@ -0,0 +1,11 @@ +##fileformat=VCFv4.0 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03 +chr1 2979 rs62635286 T G 83.67 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:31,32:9:-33.61,-2.71,-0.00:27.09 +chr1 2981 rs62028691 A G 14.69 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:31,33:9:-32.12,-2.71,-0.00:27.08 +chr1 4536 rs11582131 G C 0.18 PASS AC=1;AF=0.50;AN=2 GT:AD:DP:GL:GQ 0/1:42,33:16:-41.67,-4.82,-26.29:99 +chr1 4562 rs11490464 C G 0.14 PASS AC=1;AF=0.50;AN=2 GT:AD:DP:GL:GQ 0/1:26,30:9:-19.64,-2.72,-14.87:99 +chr1 4770 rs6682375 A G 0.32 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:9,111:84:-306.27,-28.58,-3.46:99 +chr1 4793 rs6682385 A G 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:4,115:109:-350.74,-32.88,-0.10:99 +chr1 5074 rs11586607 T G 0.01 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:29,97:39:-130.41,-11.75,-3.82:79.31 +chr1 5137 rs62636497 A T 140.49 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:0,74:39:-148.99,-11.75,-0.01:99 diff --git a/public/testdata/diffTestTest.vcf b/public/testdata/diffTestTest.vcf new file mode 100644 index 000000000..8699ab253 --- /dev/null +++ b/public/testdata/diffTestTest.vcf @@ -0,0 +1,11 @@ +##fileformat=VCFv4.0 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03 +chr1 2979 rs62635286 T G 83.67 CHANGED_FILTER AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:31,32:9:-33.61,-2.71,-0.00:27.09 +chr1 2981 rs62028691 A G 14.69 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:31,33:9:-32.12,-2.71,-0.00:27.08 +chr1 4536 rs11582131 G C 0.18 PASS AC=2;AF=0.50;AN=2 GT:AD:DP:GL:GQ 0/1:42,33:16:-41.67,-4.82,-26.29:99 +chr1 4562 rs11490464 C G 0.14 PASS AC=1;AF=0.50;AN=2 GT:AD:DP:GL:GQ 1/1:26,30:9:-19.64,-2.72,-14.87:99 +chr1 4770 rs6682375 A G 0.32 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 0/1:9,111:84:-306.27,-28.58,-3.46:99 +chr1 4793 rs6682385 A G 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:4,114:109:-350.74,-32.88,-0.10:99 +chr1 5074 rs11586607 T G 0.01 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:29,97:39:-130.41,-11.74,-3.82:79.31 +chr1 5137 rs62636497 A T 140.49 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:0,74:39:-148.99,-11.75,-0.01:9 diff --git a/public/testdata/exampleBAM.simple.bai b/public/testdata/exampleBAM.simple.bai new file mode 100644 index 000000000..2d8268b1d Binary files /dev/null and b/public/testdata/exampleBAM.simple.bai differ diff --git a/public/testdata/exampleBAM.simple.bam b/public/testdata/exampleBAM.simple.bam new file mode 100644 index 000000000..c3eb7ae7b Binary files /dev/null and b/public/testdata/exampleBAM.simple.bam differ diff --git a/settings/ivysettings.xml b/settings/ivysettings.xml index b77414df9..2b4a081d4 100644 --- a/settings/ivysettings.xml +++ b/settings/ivysettings.xml @@ -25,6 +25,7 @@ + diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1941.jar b/settings/repository/edu.mit.broad/picard-private-parts-1941.jar deleted file mode 100644 index 760db5cb8..000000000 Binary files a/settings/repository/edu.mit.broad/picard-private-parts-1941.jar and /dev/null differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1959.jar b/settings/repository/edu.mit.broad/picard-private-parts-1959.jar new file mode 100644 index 000000000..ae11e636b Binary files /dev/null and b/settings/repository/edu.mit.broad/picard-private-parts-1959.jar differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1941.xml b/settings/repository/edu.mit.broad/picard-private-parts-1959.xml similarity index 58% rename from settings/repository/edu.mit.broad/picard-private-parts-1941.xml rename to settings/repository/edu.mit.broad/picard-private-parts-1959.xml index 07d51ae53..e7c7e3a21 100644 --- a/settings/repository/edu.mit.broad/picard-private-parts-1941.xml +++ b/settings/repository/edu.mit.broad/picard-private-parts-1959.xml @@ -1,3 +1,3 @@ - + diff --git a/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2-sources.jar b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2-sources.jar new file mode 100644 index 000000000..dc77c7d33 Binary files /dev/null and b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2-sources.jar differ diff --git a/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.jar b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.jar new file mode 100644 index 000000000..f267be4b5 Binary files /dev/null and b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.jar differ diff --git a/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.xml b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.xml new file mode 100644 index 000000000..c6a8da052 --- /dev/null +++ b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/picard-1.47.869.xml b/settings/repository/net.sf/picard-1.47.869.xml deleted file mode 100644 index 86d07d4fa..000000000 --- a/settings/repository/net.sf/picard-1.47.869.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/picard-1.47.869.jar b/settings/repository/net.sf/picard-1.49.895.jar similarity index 87% rename from settings/repository/net.sf/picard-1.47.869.jar rename to settings/repository/net.sf/picard-1.49.895.jar index d277fd217..3ee1f2090 100644 Binary files a/settings/repository/net.sf/picard-1.47.869.jar and b/settings/repository/net.sf/picard-1.49.895.jar differ diff --git a/settings/repository/net.sf/picard-1.49.895.xml b/settings/repository/net.sf/picard-1.49.895.xml new file mode 100644 index 000000000..52d4900c5 --- /dev/null +++ b/settings/repository/net.sf/picard-1.49.895.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/sam-1.47.869.xml b/settings/repository/net.sf/sam-1.47.869.xml deleted file mode 100644 index 1b76fe2f9..000000000 --- a/settings/repository/net.sf/sam-1.47.869.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/sam-1.47.869.jar b/settings/repository/net.sf/sam-1.49.895.jar similarity index 91% rename from settings/repository/net.sf/sam-1.47.869.jar rename to settings/repository/net.sf/sam-1.49.895.jar index 933b9cfd6..c55ab0b72 100644 Binary files a/settings/repository/net.sf/sam-1.47.869.jar and b/settings/repository/net.sf/sam-1.49.895.jar differ diff --git a/settings/repository/net.sf/sam-1.49.895.xml b/settings/repository/net.sf/sam-1.49.895.xml new file mode 100644 index 000000000..0436ce881 --- /dev/null +++ b/settings/repository/net.sf/sam-1.49.895.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/org.broad/tribble-3.jar b/settings/repository/org.broad/tribble-16.jar similarity index 67% rename from settings/repository/org.broad/tribble-3.jar rename to settings/repository/org.broad/tribble-16.jar index f0ab44a05..331f28ec3 100644 Binary files a/settings/repository/org.broad/tribble-3.jar and b/settings/repository/org.broad/tribble-16.jar differ diff --git a/settings/repository/org.broad/tribble-3.xml b/settings/repository/org.broad/tribble-16.xml similarity index 57% rename from settings/repository/org.broad/tribble-3.xml rename to settings/repository/org.broad/tribble-16.xml index c35358331..e23eec339 100644 --- a/settings/repository/org.broad/tribble-3.xml +++ b/settings/repository/org.broad/tribble-16.xml @@ -1,4 +1,4 @@ -