From 3939971d78102bdfeaac3d063d4cff32f1eda77b Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Wed, 2 Apr 2014 03:55:21 +0800 Subject: [PATCH] After renaming the packages, instead of updating the JNI library used for testing bwa, moving the classes to the archive. NOTE: The migrated READEME.md has been added that will allow others to possibly ressurect this code as needed. --- .../ValidationAmpliconsIntegrationTest.java | 102 --- public/c/bwa/Makefile | 22 - public/c/bwa/README.md | 68 -- public/c/bwa/build_linux.sh | 8 - public/c/bwa/build_mac.sh | 8 - public/c/bwa/bwa_gateway.cpp | 279 --------- public/c/bwa/bwa_gateway.h | 85 --- ...atk_engine_alignment_bwa_c_BWACAligner.cpp | 473 -------------- ..._gatk_engine_alignment_bwa_c_BWACAligner.h | 61 -- .../gatk/engine/alignment/CheckAlignment.java | 167 ----- .../engine/alignment/bwa/c/BWACAligner.java | 284 --------- .../gatk/engine/alignment/bwa/c/BWAPath.java | 101 --- .../validation/ValidationAmplicons.java | 579 ------------------ 13 files changed, 2237 deletions(-) delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/validation/ValidationAmpliconsIntegrationTest.java delete mode 100644 public/c/bwa/Makefile delete mode 100644 public/c/bwa/README.md delete mode 100755 public/c/bwa/build_linux.sh delete mode 100755 public/c/bwa/build_mac.sh delete mode 100644 public/c/bwa/bwa_gateway.cpp delete mode 100644 public/c/bwa/bwa_gateway.h delete mode 100644 public/c/bwa/org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner.cpp delete mode 100644 public/c/bwa/org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner.h delete mode 100644 public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/alignment/CheckAlignment.java delete mode 100644 public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/alignment/bwa/c/BWACAligner.java delete mode 100644 public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/alignment/bwa/c/BWAPath.java delete mode 100644 public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/ValidationAmplicons.java diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/validation/ValidationAmpliconsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/validation/ValidationAmpliconsIntegrationTest.java deleted file mode 100644 index 2e9ddd18e..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/validation/ValidationAmpliconsIntegrationTest.java +++ /dev/null @@ -1,102 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012 Broad Institute, Inc. -* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 4. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 5. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 6. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 7. MISCELLANEOUS -* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.tools.walkers.validation; - -import org.broadinstitute.gatk.engine.walkers.WalkerTest; -import org.testng.annotations.Test; - -import java.util.Arrays; - -/** - * Created by IntelliJ IDEA. - * User: Ghost - * Date: 7/19/11 - * Time: 7:39 PM - * To change this template use File | Settings | File Templates. - */ -public class ValidationAmpliconsIntegrationTest extends WalkerTest { - - @Test(enabled=true) - public void testWikiExample() { - String siteVCF = validationDataLocation + "sites_to_validate.vcf"; - String maskVCF = privateTestDir + "amplicon_mask_sites.vcf"; - String intervalTable = privateTestDir + "amplicon_interval_table1.table"; - String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s"; - testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; - testArgs += " --virtualPrimerSize 30"; - WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("240d99b58f73985fb114abe9044c0271")); - executeTest("Test probes", spec); - } - - @Test(enabled=true) - public void testWikiExampleNoBWA() { - String siteVCF = privateTestDir + "sites_to_validate.vcf"; - String maskVCF = privateTestDir + "amplicon_mask_sites.vcf"; - String intervalTable = privateTestDir + "amplicon_interval_table1.table"; - String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s"; - testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; - testArgs += " --virtualPrimerSize 30 --doNotUseBWA"; - WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("6e7789445e29d91979a21e78d3d53295")); - executeTest("Test probes", spec); - } - - @Test(enabled=true) - public void testWikiExampleMonoFilter() { - String siteVCF = privateTestDir + "sites_to_validate.vcf"; - String maskVCF = privateTestDir + "amplicon_mask_sites.vcf"; - String intervalTable = privateTestDir + "amplicon_interval_table1.table"; - String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s"; - testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; - testArgs += " --virtualPrimerSize 30 --filterMonomorphic"; - WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("18d7236208db603e143b40db06ef2aca")); - executeTest("Test probes", spec); - } - -} diff --git a/public/c/bwa/Makefile b/public/c/bwa/Makefile deleted file mode 100644 index bf171cc78..000000000 --- a/public/c/bwa/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -CXX=g++ -CXXFLAGS=-g -Wall -O2 -m64 -fPIC - -.cpp.o: - $(CXX) -c $(CXXFLAGS) -I$(BWA_HOME) -I$(JAVA_INCLUDE) -I$(JAVA_PLATFORM_INCLUDE) $< -o $@ - -all: init lib - -init: - @echo Please make sure the following platforms are set correctly on your machine. - @echo BWA_HOME=$(BWA_HOME) - @echo JAVA_INCLUDE=$(JAVA_INCLUDE) - @echo JAVA_PLATFORM_INCLUDE=$(JAVA_PLATFORM_INCLUDE) - @echo TARGET_LIB=$(TARGET_LIB) - @echo EXTRA_LIBS=$(EXTRA_LIBS) - @echo LIBTOOL_COMMAND=$(LIBTOOL_COMMAND) - -lib: org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner.o bwa_gateway.o - $(LIBTOOL_COMMAND) $? -o $(TARGET_LIB) -L$(BWA_HOME) -lbwacore $(EXTRA_LIBS) - -clean: - rm *.o libbwa.* diff --git a/public/c/bwa/README.md b/public/c/bwa/README.md deleted file mode 100644 index ba2d9e14d..000000000 --- a/public/c/bwa/README.md +++ /dev/null @@ -1,68 +0,0 @@ -BWA C Aligner JNI Library -========================= - - -Basic Instructions ------------------- - -- Download BWA svn revision 47 -- Run autoconf and build bwa to create libbwacore.a -- Build mac or linux specific libbwa -- Update the libary path to point to the directory with libbwa - - -Obtaining compatible libbwacore -------------------------------- - -libbwa only works with a very specify range of versions from bwa's svn archive. r25-r47 are known to compile a -libbwacore.a compatible with building libbwa.so or libbwa.dylib. - -From the bwa directory, download bwa revision 47 from svn by running the command: - - svn co http://svn.code.sf.net/p/bio-bwa/code/trunk/bwa@47 bwasvn47 - - -Compiling libbwacore.a ----------------------- - -Once bwa has been downloaded from svn to the directory bwasvn47, using autoconf and make, one can compile libbwacore.a. - -- Install autoconf on Mac using MacPorts - `port install autoconf` - -- Or add autoconf on Linux via the dotkit - `reuse .autoconf-2.69` - -- Change directory to bwasvn47 - `cd bwasvn47` - -- Change permissions to make autogen.sh script executable - `chmod +x autogen.sh` - -- Run configure - `./configure` - -- Build - `make` - -- Return to the bwa directory - `cd ..` - - -Compiling libbwa.so / libbwa.dylib ----------------------------------- - -After successfully compiling libbwacore.a from the bwa svn source, the GATK specific libbwa.so / libbwa.dylib may be -compiled using libbwacore. There are two shell scripts that include libbwacore.a by using paths pointing to bwasvn47. - -- On Mac run `./build_mac.sh` -- On Linux run `./build_linux.sh` - - -Running the GATK with libbwa.so / libbwa.dylib ----------------------------------------------- - -The GATK loads libbwa.so or libbwa.dylib from a directory specified appended to the java.library.path. There are two -ways to specify this directory containing libbwa. Either add the directory to the LD_LIBRARY_PATH environment variable -`export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:`, or set java.library.path java variable on the command line -`-Djava.library.path=`. diff --git a/public/c/bwa/build_linux.sh b/public/c/bwa/build_linux.sh deleted file mode 100755 index e20377992..000000000 --- a/public/c/bwa/build_linux.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -export BWA_HOME="${PWD}/bwasvn47" -export JAVA_INCLUDE="${JAVA_HOME}/include" -export JAVA_PLATFORM_INCLUDE="${JAVA_HOME}/include/linux" -export TARGET_LIB="libbwa.so" -export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread" -export LIBTOOL_COMMAND="g++ -shared -Wl,-soname,libbwa.so" -make diff --git a/public/c/bwa/build_mac.sh b/public/c/bwa/build_mac.sh deleted file mode 100755 index 6879e1442..000000000 --- a/public/c/bwa/build_mac.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -export BWA_HOME="${PWD}/bwasvn47" -export JAVA_INCLUDE="${JAVA_HOME}/include" -export JAVA_PLATFORM_INCLUDE="${JAVA_HOME}/include/darwin" -export TARGET_LIB="libbwa.dylib" -export EXTRA_LIBS="-lc -lz -lstdc++" -export LIBTOOL_COMMAND="libtool -dynamic" -make diff --git a/public/c/bwa/bwa_gateway.cpp b/public/c/bwa/bwa_gateway.cpp deleted file mode 100644 index 088ee43bf..000000000 --- a/public/c/bwa/bwa_gateway.cpp +++ /dev/null @@ -1,279 +0,0 @@ -#include -#include -#include - -#include "bwase.h" -#include "bwa_gateway.h" - -BWA::BWA(const char* ann_filename, - const char* amb_filename, - const char* pac_filename, - const char* forward_bwt_filename, - const char* forward_sa_filename, - const char* reverse_bwt_filename, - const char* reverse_sa_filename) -{ - // Load the bns (?) and reference - bns = bns_restore_core(ann_filename,amb_filename,pac_filename); - reference = new ubyte_t[bns->l_pac/4+1]; - rewind(bns->fp_pac); - fread(reference, 1, bns->l_pac/4+1, bns->fp_pac); - fclose(bns->fp_pac); - bns->fp_pac = NULL; - - // Load the BWTs (both directions) and suffix arrays (both directions) - bwts[0] = bwt_restore_bwt(forward_bwt_filename); - bwt_restore_sa(forward_sa_filename, bwts[0]); - bwts[1] = bwt_restore_bwt(reverse_bwt_filename); - bwt_restore_sa(reverse_sa_filename, bwts[1]); - load_default_options(); - - // Always reinitialize the random seed whenever a new set of files are loaded. - initialize_random_seed(); - - // initialize the bwase subsystem - bwase_initialize(); -} - -BWA::~BWA() { - delete[] reference; - bns_destroy(bns); - bwt_destroy(bwts[0]); - bwt_destroy(bwts[1]); -} - -void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& paths, unsigned& num_paths, unsigned& best_path_count, unsigned& second_best_path_count) -{ - bwa_seq_t* sequence = create_sequence(bases, read_length); - - // Calculate the suffix array interval for each sequence, storing the result in sequence->aln (and sequence->n_aln). - // This method will destroy the contents of seq and rseq. - bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options); - - paths = new bwt_aln1_t[sequence->n_aln]; - memcpy(paths,sequence->aln,sequence->n_aln*sizeof(bwt_aln1_t)); - num_paths = sequence->n_aln; - - // Call aln2seq to initialize the type of match present. - bwa_aln2seq(sequence->n_aln,sequence->aln,sequence); - best_path_count = sequence->c1; - second_best_path_count = sequence->c2; - - bwa_free_read_seq(1,sequence); -} - -Alignment* BWA::generate_single_alignment(const char* bases, const unsigned read_length) { - bwa_seq_t* sequence = create_sequence(bases,read_length); - - // Calculate paths. - bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options); - - // Check for no alignments found and return null. - if(sequence->n_aln == 0) { - bwa_free_read_seq(1,sequence); - return NULL; - } - - // bwa_cal_sa_reg_gap destroys the bases / read length. Copy them back in. - copy_bases_into_sequence(sequence,bases,read_length); - - // Pick best alignment and propagate its information into the sequence. - bwa_aln2seq(sequence->n_aln,sequence->aln,sequence); - - // Generate the best alignment from the sequence. - Alignment* alignment = new Alignment; - *alignment = generate_final_alignment_from_sequence(sequence); - - bwa_free_read_seq(1,sequence); - - return alignment; -} - -void BWA::generate_alignments_from_paths(const char* bases, - const unsigned read_length, - bwt_aln1_t* paths, - const unsigned num_paths, - const unsigned best_count, - const unsigned second_best_count, - Alignment*& alignments, - unsigned& num_alignments) -{ - bwa_seq_t* sequence = create_sequence(bases,read_length); - - sequence->aln = paths; - sequence->n_aln = num_paths; - - // (Ab)use bwa_aln2seq to propagate values stored in the path out into the sequence itself. - bwa_aln2seq(sequence->n_aln,sequence->aln,sequence); - - // But overwrite key parts of the sequence in case the user passed back only a smaller subset - // of the paths. - sequence->c1 = best_count; - sequence->c2 = second_best_count; - sequence->type = sequence->c1 > 1 ? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE; - - num_alignments = 0; - for(unsigned i = 0; i < (unsigned)sequence->n_aln; i++) - num_alignments += (sequence->aln + i)->l - (sequence->aln + i)->k + 1; - - alignments = new Alignment[num_alignments]; - unsigned alignment_idx = 0; - - for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) { - // Stub in a 'working' path, so that only the desired alignment is local-aligned. - const bwt_aln1_t* path = paths + path_idx; - bwt_aln1_t working_path = *path; - - // Loop through all alignments, aligning each one individually. - for(unsigned sa_idx = path->k; sa_idx <= path->l; sa_idx++) { - working_path.k = working_path.l = sa_idx; - sequence->aln = &working_path; - sequence->n_aln = 1; - - sequence->sa = sa_idx; - sequence->strand = path->a; - sequence->score = path->score; - - // Each time through bwa_refine_gapped, seq gets reversed. Revert the reverse. - // TODO: Fix the interface to bwa_refine_gapped so its easier to work with. - if(alignment_idx > 0) - seq_reverse(sequence->len, sequence->seq, 0); - - // Copy the local alignment data into the alignment object. - *(alignments + alignment_idx) = generate_final_alignment_from_sequence(sequence); - - alignment_idx++; - } - } - - sequence->aln = NULL; - sequence->n_aln = 0; - - bwa_free_read_seq(1,sequence); -} - -Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) { - // Calculate the local coordinate and local alignment. - bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr); - bwa_refine_gapped(bns, 1, sequence, reference, NULL); - - // Copy the local alignment data into the alignment object. - Alignment alignment; - - // Populate basic path info - alignment.edit_distance = sequence->nm; - alignment.num_mismatches = sequence->n_mm; - alignment.num_gap_opens = sequence->n_gapo; - alignment.num_gap_extensions = sequence->n_gape; - alignment.num_best = sequence->c1; - alignment.num_second_best = sequence->c2; - - // Final alignment position. - alignment.type = sequence->type; - bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig); - alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1; - alignment.negative_strand = sequence->strand; - alignment.mapping_quality = sequence->mapQ; - - // Cigar step. - alignment.cigar = NULL; - if(sequence->cigar) { - alignment.cigar = new uint16_t[sequence->n_cigar]; - memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t)); - } - alignment.n_cigar = sequence->n_cigar; - - // MD tag with a better breakdown of differences in the cigar - alignment.md = strdup(sequence->md); - delete[] sequence->md; - sequence->md = NULL; - - return alignment; -} - -void BWA::load_default_options() -{ - options.s_mm = 3; - options.s_gapo = 11; - options.s_gape = 4; - options.mode = 3; - options.indel_end_skip = 5; - options.max_del_occ = 10; - options.max_entries = 2000000; - options.fnr = 0.04; - options.max_diff = -1; - options.max_gapo = 1; - options.max_gape = 6; - options.max_seed_diff = 2; - options.seed_len = 2147483647; - options.n_threads = 1; - options.max_top2 = 30; - options.trim_qual = 0; -} - -void BWA::initialize_random_seed() -{ - srand48(bns->seed); -} - -void BWA::set_max_edit_distance(float edit_distance) { - if(edit_distance > 0 && edit_distance < 1) { - options.fnr = edit_distance; - options.max_diff = -1; - } - else { - options.fnr = -1.0; - options.max_diff = (int)edit_distance; - } -} - -void BWA::set_max_gap_opens(int max_gap_opens) { options.max_gapo = max_gap_opens; } -void BWA::set_max_gap_extensions(int max_gap_extensions) { options.max_gape = max_gap_extensions; } -void BWA::set_disallow_indel_within_range(int indel_range) { options.indel_end_skip = indel_range; } -void BWA::set_mismatch_penalty(int penalty) { options.s_mm = penalty; } -void BWA::set_gap_open_penalty(int penalty) { options.s_gapo = penalty; } -void BWA::set_gap_extension_penalty(int penalty) { options.s_gape = penalty; } -void BWA::set_mode_nonstop() { options.mode |= BWA_MODE_NONSTOP; options.max_top2 = 0x7fffffff; } -void BWA::set_max_entries_in_queue(int max_entries) { options.max_entries = max_entries; } - -/** - * Create a sequence with a set of reasonable initial defaults. - * Will leave seq and rseq empty. - */ -bwa_seq_t* BWA::create_sequence(const char* bases, const unsigned read_length) -{ - bwa_seq_t* sequence = new bwa_seq_t; - - sequence->tid = -1; - - sequence->name = 0; - - copy_bases_into_sequence(sequence, bases, read_length); - - sequence->qual = 0; - sequence->aln = 0; - sequence->md = 0; - - sequence->cigar = NULL; - sequence->n_cigar = 0; - - sequence->multi = NULL; - sequence->n_multi = 0; - - return sequence; -} - -void BWA::copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length) -{ - // seq, rseq will ultimately be freed by bwa_cal_sa_reg_gap - sequence->seq = new ubyte_t[read_length]; - sequence->rseq = new ubyte_t[read_length]; - for(unsigned i = 0; i < read_length; i++) sequence->seq[i] = nst_nt4_table[(unsigned)bases[i]]; - memcpy(sequence->rseq,sequence->seq,read_length); - - // BWA expects the read bases to arrive reversed. - seq_reverse(read_length,sequence->seq,0); - seq_reverse(read_length,sequence->rseq,1); - - sequence->full_len = sequence->len = read_length; -} diff --git a/public/c/bwa/bwa_gateway.h b/public/c/bwa/bwa_gateway.h deleted file mode 100644 index 62756ec2a..000000000 --- a/public/c/bwa/bwa_gateway.h +++ /dev/null @@ -1,85 +0,0 @@ -#ifndef BWA_GATEWAY -#define BWA_GATEWAY - -#include - -#include "bntseq.h" -#include "bwt.h" -#include "bwtaln.h" - -class Alignment { - public: - uint32_t type; - int contig; - bwtint_t pos; - bool negative_strand; - uint32_t mapping_quality; - - uint16_t *cigar; - int n_cigar; - - uint8_t num_mismatches; - uint8_t num_gap_opens; - uint8_t num_gap_extensions; - uint16_t edit_distance; - - uint32_t num_best; - uint32_t num_second_best; - - char* md; -}; - -class BWA { - private: - bntseq_t *bns; - ubyte_t* reference; - bwt_t* bwts[2]; - gap_opt_t options; - - void load_default_options(); - void initialize_random_seed(); - bwa_seq_t* create_sequence(const char* bases, const unsigned read_length); - void copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length); - Alignment generate_final_alignment_from_sequence(bwa_seq_t* sequence); - - public: - BWA(const char* ann_filename, - const char* amb_filename, - const char* pac_filename, - const char* forward_bwt_filename, - const char* forward_sa_filename, - const char* reverse_bwt_filename, - const char* reverse_sa_filename); - ~BWA(); - - // Parameterize the aligner. - void set_max_edit_distance(float edit_distance); - void set_max_gap_opens(int max_gap_opens); - void set_max_gap_extensions(int max_gap_extensions); - void set_disallow_indel_within_range(int indel_range); - void set_mismatch_penalty(int penalty); - void set_gap_open_penalty(int penalty); - void set_gap_extension_penalty(int penalty); - void set_mode_nonstop(); - void set_max_entries_in_queue(int max_entries); - - // Perform the alignment - Alignment* generate_single_alignment(const char* bases, - const unsigned read_length); - void find_paths(const char* bases, - const unsigned read_length, - bwt_aln1_t*& paths, - unsigned& num_paths, - unsigned& best_path_count, - unsigned& second_best_path_count); - void generate_alignments_from_paths(const char* bases, - const unsigned read_length, - bwt_aln1_t* paths, - const unsigned num_paths, - const unsigned best_count, - const unsigned second_best_count, - Alignment*& alignments, - unsigned& num_alignments); -}; - -#endif // BWA_GATEWAY diff --git a/public/c/bwa/org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner.cpp b/public/c/bwa/org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner.cpp deleted file mode 100644 index a01e2d8cd..000000000 --- a/public/c/bwa/org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner.cpp +++ /dev/null @@ -1,473 +0,0 @@ -#include -#include -#include - -#include "bntseq.h" -#include "bwt.h" -#include "bwtaln.h" -#include "bwa_gateway.h" -#include "org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner.h" - -typedef void (BWA::*boolean_setter)(); -typedef void (BWA::*int_setter)(int value); -typedef void (BWA::*float_setter)(float value); - -static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment); -static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name); -static void set_boolean_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, boolean_setter setter); -static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter); -static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter); -static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message); - -JNIEXPORT jlong JNICALL Java_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner_create(JNIEnv* env, jobject instance, jobject bwtFiles, jobject configuration) -{ - jstring java_ann = get_configuration_file(env,bwtFiles,"annFile"); - if(java_ann == NULL) return 0L; - jstring java_amb = get_configuration_file(env,bwtFiles,"ambFile"); - if(java_amb == NULL) return 0L; - jstring java_pac = get_configuration_file(env,bwtFiles,"pacFile"); - if(java_pac == NULL) return 0L; - jstring java_forward_bwt = get_configuration_file(env,bwtFiles,"forwardBWTFile"); - if(java_forward_bwt == NULL) return 0L; - jstring java_forward_sa = get_configuration_file(env,bwtFiles,"forwardSAFile"); - if(java_forward_sa == NULL) return 0L; - jstring java_reverse_bwt = get_configuration_file(env,bwtFiles,"reverseBWTFile"); - if(java_reverse_bwt == NULL) return 0L; - jstring java_reverse_sa = get_configuration_file(env,bwtFiles,"reverseSAFile"); - if(java_reverse_sa == NULL) return 0L; - - const char* ann_filename = env->GetStringUTFChars(java_ann,JNI_FALSE); - if(env->ExceptionCheck()) return 0L; - const char* amb_filename = env->GetStringUTFChars(java_amb,JNI_FALSE); - if(env->ExceptionCheck()) return 0L; - const char* pac_filename = env->GetStringUTFChars(java_pac,JNI_FALSE); - if(env->ExceptionCheck()) return 0L; - const char* forward_bwt_filename = env->GetStringUTFChars(java_forward_bwt,JNI_FALSE); - if(env->ExceptionCheck()) return 0L; - const char* forward_sa_filename = env->GetStringUTFChars(java_forward_sa,JNI_FALSE); - if(env->ExceptionCheck()) return 0L; - const char* reverse_bwt_filename = env->GetStringUTFChars(java_reverse_bwt,JNI_FALSE); - if(env->ExceptionCheck()) return 0L; - const char* reverse_sa_filename = env->GetStringUTFChars(java_reverse_sa,JNI_FALSE); - if(env->ExceptionCheck()) return 0L; - - BWA* bwa = new BWA(ann_filename, - amb_filename, - pac_filename, - forward_bwt_filename, - forward_sa_filename, - reverse_bwt_filename, - reverse_sa_filename); - - Java_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner_updateConfiguration(env,instance,(jlong)bwa,configuration); - if(env->ExceptionCheck()) return 0L; - - env->ReleaseStringUTFChars(java_ann,ann_filename); - if(env->ExceptionCheck()) return 0L; - env->ReleaseStringUTFChars(java_amb,amb_filename); - if(env->ExceptionCheck()) return 0L; - env->ReleaseStringUTFChars(java_pac,pac_filename); - if(env->ExceptionCheck()) return 0L; - env->ReleaseStringUTFChars(java_forward_bwt,forward_bwt_filename); - if(env->ExceptionCheck()) return 0L; - env->ReleaseStringUTFChars(java_forward_sa,forward_sa_filename); - if(env->ExceptionCheck()) return 0L; - env->ReleaseStringUTFChars(java_reverse_bwt,reverse_bwt_filename); - if(env->ExceptionCheck()) return 0L; - env->ReleaseStringUTFChars(java_reverse_sa,reverse_sa_filename); - if(env->ExceptionCheck()) return 0L; - - return (jlong)bwa; -} - -JNIEXPORT void JNICALL Java_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner_destroy(JNIEnv* env, jobject instance, jlong java_bwa) -{ - BWA* bwa = (BWA*)java_bwa; - delete bwa; -} - -JNIEXPORT void JNICALL Java_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner_updateConfiguration(JNIEnv *env, jobject instance, jlong java_bwa, jobject configuration) { - BWA* bwa = (BWA*)java_bwa; - set_float_configuration_param(env, configuration, "maximumEditDistance", bwa, &BWA::set_max_edit_distance); - if(env->ExceptionCheck()) return; - set_int_configuration_param(env, configuration, "maximumGapOpens", bwa, &BWA::set_max_gap_opens); - if(env->ExceptionCheck()) return; - set_int_configuration_param(env, configuration, "maximumGapExtensions", bwa, &BWA::set_max_gap_extensions); - if(env->ExceptionCheck()) return; - set_int_configuration_param(env, configuration, "disallowIndelWithinRange", bwa, &BWA::set_disallow_indel_within_range); - if(env->ExceptionCheck()) return; - set_int_configuration_param(env, configuration, "mismatchPenalty", bwa, &BWA::set_mismatch_penalty); - if(env->ExceptionCheck()) return; - set_int_configuration_param(env, configuration, "gapOpenPenalty", bwa, &BWA::set_gap_open_penalty); - if(env->ExceptionCheck()) return; - set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty); - if(env->ExceptionCheck()) return; - set_boolean_configuration_param(env, configuration, "nonStopMode", bwa, &BWA::set_mode_nonstop); - if(env->ExceptionCheck()) return; - set_int_configuration_param(env, configuration, "maxEntriesInQueue", bwa, &BWA::set_max_entries_in_queue); - if(env->ExceptionCheck()) return; -} - -JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) -{ - BWA* bwa = (BWA*)java_bwa; - - const jsize read_length = env->GetArrayLength(java_bases); - if(env->ExceptionCheck()) return NULL; - - jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); - if(read_bases == NULL) return NULL; - - bwt_aln1_t* paths = NULL; - unsigned num_paths = 0; - - unsigned best_path_count, second_best_path_count; - bwa->find_paths((const char*)read_bases,read_length,paths,num_paths,best_path_count,second_best_path_count); - - jobjectArray java_paths = env->NewObjectArray(num_paths, env->FindClass("org/broadinstitute/gatk/engine/alignment/bwa/c/BWAPath"), NULL); - if(java_paths == NULL) return NULL; - - for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) { - bwt_aln1_t& path = *(paths + path_idx); - - jclass java_path_class = env->FindClass("org/broadinstitute/gatk/engine/alignment/bwa/c/BWAPath"); - if(java_path_class == NULL) return NULL; - - jmethodID java_path_constructor = env->GetMethodID(java_path_class, "", "(IIIZJJIII)V"); - if(java_path_constructor == NULL) return NULL; - - // Note that k/l are being cast to long. Bad things will happen if JNI assumes that they're ints. - jobject java_path = env->NewObject(java_path_class, - java_path_constructor, - path.n_mm, - path.n_gapo, - path.n_gape, - path.a, - (jlong)path.k, - (jlong)path.l, - path.score, - best_path_count, - second_best_path_count); - if(java_path == NULL) return NULL; - - env->SetObjectArrayElement(java_paths,path_idx,java_path); - if(env->ExceptionCheck()) return NULL; - - env->DeleteLocalRef(java_path_class); - if(env->ExceptionCheck()) return NULL; - } - - delete[] paths; - - env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE); - - return env->ExceptionCheck() ? NULL : java_paths; -} - -JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner_convertPathsToAlignments(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases, jobjectArray java_paths) -{ - BWA* bwa = (BWA*)java_bwa; - - const jsize read_length = env->GetArrayLength(java_bases); - if(env->ExceptionCheck()) return NULL; - - jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); - if(read_bases == NULL) return NULL; - - const jsize num_paths = env->GetArrayLength(java_paths); - bwt_aln1_t* paths = new bwt_aln1_t[num_paths]; - unsigned best_count = 0, second_best_count = 0; - - for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) { - jobject java_path = env->GetObjectArrayElement(java_paths,path_idx); - jclass java_path_class = env->GetObjectClass(java_path); - if(java_path_class == NULL) return NULL; - - bwt_aln1_t& path = *(paths + path_idx); - - jfieldID mismatches_field = env->GetFieldID(java_path_class, "numMismatches", "I"); - if(mismatches_field == NULL) return NULL; - path.n_mm = env->GetIntField(java_path,mismatches_field); - if(env->ExceptionCheck()) return NULL; - - jfieldID gap_opens_field = env->GetFieldID(java_path_class, "numGapOpens", "I"); - if(gap_opens_field == NULL) return NULL; - path.n_gapo = env->GetIntField(java_path,gap_opens_field); - if(env->ExceptionCheck()) return NULL; - - jfieldID gap_extensions_field = env->GetFieldID(java_path_class, "numGapExtensions", "I"); - if(gap_extensions_field == NULL) return NULL; - path.n_gape = env->GetIntField(java_path,gap_extensions_field); - if(env->ExceptionCheck()) return NULL; - - jfieldID negative_strand_field = env->GetFieldID(java_path_class, "negativeStrand", "Z"); - if(negative_strand_field == NULL) return NULL; - path.a = env->GetBooleanField(java_path,negative_strand_field); - if(env->ExceptionCheck()) return NULL; - - jfieldID k_field = env->GetFieldID(java_path_class, "k", "J"); - if(k_field == NULL) return NULL; - path.k = env->GetLongField(java_path,k_field); - if(env->ExceptionCheck()) return NULL; - - jfieldID l_field = env->GetFieldID(java_path_class, "l", "J"); - if(l_field == NULL) return NULL; - path.l = env->GetLongField(java_path,l_field); - if(env->ExceptionCheck()) return NULL; - - jfieldID score_field = env->GetFieldID(java_path_class, "score", "I"); - if(score_field == NULL) return NULL; - path.score = env->GetIntField(java_path,score_field); - if(env->ExceptionCheck()) return NULL; - - jfieldID best_count_field = env->GetFieldID(java_path_class, "bestCount", "I"); - if(best_count_field == NULL) return NULL; - best_count = env->GetIntField(java_path,best_count_field); - if(env->ExceptionCheck()) return NULL; - - jfieldID second_best_count_field = env->GetFieldID(java_path_class, "secondBestCount", "I"); - if(second_best_count_field == NULL) return NULL; - second_best_count = env->GetIntField(java_path,second_best_count_field); - if(env->ExceptionCheck()) return NULL; - } - - Alignment* alignments = NULL; - unsigned num_alignments = 0; - bwa->generate_alignments_from_paths((const char*)read_bases,read_length,paths,num_paths,best_count,second_best_count,alignments,num_alignments); - - jobjectArray java_alignments = env->NewObjectArray(num_alignments, env->FindClass("org/broadinstitute/gatk/engine/alignment/Alignment"), NULL); - if(java_alignments == NULL) return NULL; - - for(unsigned alignment_idx = 0; alignment_idx < (unsigned)num_alignments; alignment_idx++) { - Alignment& alignment = *(alignments + alignment_idx); - jobject java_alignment = convert_to_java_alignment(env,read_bases,read_length,alignment); - if(java_alignment == NULL) return NULL; - env->SetObjectArrayElement(java_alignments,alignment_idx,java_alignment); - if(env->ExceptionCheck()) return NULL; - } - - delete[] alignments; - delete[] paths; - - env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE); - - return env->ExceptionCheck() ? NULL : java_alignments; -} - -JNIEXPORT jobject JNICALL Java_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner_getBestAlignment(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) { - BWA* bwa = (BWA*)java_bwa; - - const jsize read_length = env->GetArrayLength(java_bases); - if(env->ExceptionCheck()) return NULL; - - jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); - if(read_bases == NULL) return NULL; - - Alignment* best_alignment = bwa->generate_single_alignment((const char*)read_bases,read_length); - jobject java_best_alignment = (best_alignment != NULL) ? convert_to_java_alignment(env,read_bases,read_length,*best_alignment) : NULL; - delete best_alignment; - - env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE); - - return java_best_alignment; -} - -static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment) { - unsigned cigar_length; - if(alignment.type == BWA_TYPE_NO_MATCH) cigar_length = 0; - else if(!alignment.cigar) cigar_length = 1; - else cigar_length = alignment.n_cigar; - - jcharArray java_cigar_operators = env->NewCharArray(cigar_length); - if(java_cigar_operators == NULL) return NULL; - jintArray java_cigar_lengths = env->NewIntArray(cigar_length); - if(java_cigar_lengths == NULL) return NULL; - - if(alignment.cigar) { - for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) { - jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14]; - jint cigar_length = alignment.cigar[cigar_idx]&0x3fff; - - env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator); - if(env->ExceptionCheck()) return NULL; - env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length); - if(env->ExceptionCheck()) return NULL; - } - } - else { - if(alignment.type != BWA_TYPE_NO_MATCH) { - jchar cigar_operator = 'M'; - env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator); - if(env->ExceptionCheck()) return NULL; - env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length); - if(env->ExceptionCheck()) return NULL; - } - } - delete[] alignment.cigar; - - jclass java_alignment_class = env->FindClass("org/broadinstitute/gatk/engine/alignment/Alignment"); - if(java_alignment_class == NULL) return NULL; - - jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "", "(IIZI[C[IILjava/lang/String;IIIII)V"); - if(java_alignment_constructor == NULL) return NULL; - - jstring java_md = env->NewStringUTF(alignment.md); - if(java_md == NULL) return NULL; - delete[] alignment.md; - - jobject java_alignment = env->NewObject(java_alignment_class, - java_alignment_constructor, - alignment.contig, - alignment.pos, - alignment.negative_strand, - alignment.mapping_quality, - java_cigar_operators, - java_cigar_lengths, - alignment.edit_distance, - java_md, - alignment.num_mismatches, - alignment.num_gap_opens, - alignment.num_gap_extensions, - alignment.num_best, - alignment.num_second_best); - if(java_alignment == NULL) return NULL; - - env->DeleteLocalRef(java_alignment_class); - if(env->ExceptionCheck()) return NULL; - - return java_alignment; -} - -static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name) { - jclass configuration_class = env->GetObjectClass(configuration); - if(configuration_class == NULL) return NULL; - - jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/io/File;"); - if(configuration_field == NULL) return NULL; - - jobject configuration_file = (jobject)env->GetObjectField(configuration,configuration_field); - - jclass file_class = env->FindClass("java/io/File"); - if(file_class == NULL) return NULL; - - jmethodID path_extractor = env->GetMethodID(file_class,"getAbsolutePath", "()Ljava/lang/String;"); - if(path_extractor == NULL) return NULL; - - jstring path = (jstring)env->CallObjectMethod(configuration_file,path_extractor); - if(path == NULL) return NULL; - - env->DeleteLocalRef(configuration_class); - env->DeleteLocalRef(file_class); - env->DeleteLocalRef(configuration_file); - - return path; -} - -static void set_boolean_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, boolean_setter setter) { - jclass configuration_class = env->GetObjectClass(configuration); - if(configuration_class == NULL) return; - - jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Boolean;"); - if(configuration_field == NULL) return; - - jobject boxed_value = env->GetObjectField(configuration,configuration_field); - if(env->ExceptionCheck()) return; - - if(boxed_value != NULL) { - jclass boolean_box_class = env->FindClass("java/lang/Boolean"); - if(boolean_box_class == NULL) return; - - jmethodID boolean_extractor = env->GetMethodID(boolean_box_class,"booleanValue", "()Z"); - if(boolean_extractor == NULL) return; - - jboolean value = env->CallBooleanMethod(boxed_value,boolean_extractor); - if(env->ExceptionCheck()) return; - - if(value) - (bwa->*setter)(); - - env->DeleteLocalRef(boolean_box_class); - } - - env->DeleteLocalRef(boxed_value); - env->DeleteLocalRef(configuration_class); -} - -static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) { - jclass configuration_class = env->GetObjectClass(configuration); - if(configuration_class == NULL) return; - - jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Integer;"); - if(configuration_field == NULL) return; - - jobject boxed_value = env->GetObjectField(configuration,configuration_field); - if(env->ExceptionCheck()) return; - - if(boxed_value != NULL) { - jclass int_box_class = env->FindClass("java/lang/Integer"); - if(int_box_class == NULL) return; - - jmethodID int_extractor = env->GetMethodID(int_box_class,"intValue", "()I"); - if(int_extractor == NULL) return; - - jint value = env->CallIntMethod(boxed_value,int_extractor); - if(env->ExceptionCheck()) return; - - if(value < 0) - { - throw_config_value_exception(env,field_name,"cannot be set to a negative value"); - return; - } - - (bwa->*setter)(value); - - env->DeleteLocalRef(int_box_class); - } - - env->DeleteLocalRef(boxed_value); - env->DeleteLocalRef(configuration_class); -} - -static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter) -{ - jclass configuration_class = env->GetObjectClass(configuration); - if(configuration_class == NULL) return; - - jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Float;"); - if(configuration_field == NULL) return; - - jobject boxed_value = env->GetObjectField(configuration,configuration_field); - if(boxed_value != NULL) { - jclass float_box_class = env->FindClass("java/lang/Float"); - if(float_box_class == NULL) return; - - jmethodID float_extractor = env->GetMethodID(float_box_class,"floatValue", "()F"); - if(float_extractor == NULL) return; - - jfloat value = env->CallFloatMethod(boxed_value,float_extractor); - if(env->ExceptionCheck()) return; - - if(value < 0) - { - throw_config_value_exception(env,field_name,"cannot be set to a negative value"); - return; - } - - (bwa->*setter)(value); - - env->DeleteLocalRef(float_box_class); - } - - env->DeleteLocalRef(boxed_value); - env->DeleteLocalRef(configuration_class); -} - -static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message) -{ - char* buffer = new char[strlen(field_name)+1+strlen(message)+1]; - sprintf(buffer,"%s %s",field_name,message); - jclass gatk_exception_class = env->FindClass("org/broadinstitute/gatk/utils/exceptions/GATKException"); - if(gatk_exception_class == NULL) return; - env->ThrowNew(gatk_exception_class, buffer); - delete[] buffer; -} diff --git a/public/c/bwa/org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner.h b/public/c/bwa/org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner.h deleted file mode 100644 index 6334472dc..000000000 --- a/public/c/bwa/org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner.h +++ /dev/null @@ -1,61 +0,0 @@ -/* DO NOT EDIT THIS FILE - it is machine generated */ -#include -/* Header for class org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner */ - -#ifndef _Included_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner -#define _Included_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner -#ifdef __cplusplus -extern "C" { -#endif -/* - * Class: org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner - * Method: create - * Signature: (Lorg/broadinstitute/gatk/engine/alignment/bwa/BWTFiles;Lorg/broadinstitute/gatk/engine/alignment/bwa/BWAConfiguration;)J - */ -JNIEXPORT jlong JNICALL Java_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner_create - (JNIEnv *, jobject, jobject, jobject); - -/* - * Class: org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner - * Method: updateConfiguration - * Signature: (JLorg/broadinstitute/gatk/engine/alignment/bwa/BWAConfiguration;)V - */ -JNIEXPORT void JNICALL Java_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner_updateConfiguration - (JNIEnv *, jobject, jlong, jobject); - -/* - * Class: org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner - * Method: destroy - * Signature: (J)V - */ -JNIEXPORT void JNICALL Java_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner_destroy - (JNIEnv *, jobject, jlong); - -/* - * Class: org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner - * Method: getPaths - * Signature: (J[B)[Lorg/broadinstitute/gatk/engine/alignment/bwa/c/BWAPath; - */ -JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner_getPaths - (JNIEnv *, jobject, jlong, jbyteArray); - -/* - * Class: org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner - * Method: convertPathsToAlignments - * Signature: (J[B[Lorg/broadinstitute/gatk/engine/alignment/bwa/c/BWAPath;)[Lorg/broadinstitute/gatk/engine/alignment/Alignment; - */ -JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner_convertPathsToAlignments - (JNIEnv *, jobject, jlong, jbyteArray, jobjectArray); - -/* - * Class: org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner - * Method: getBestAlignment - * Signature: (J[B)Lorg/broadinstitute/gatk/engine/alignment/Alignment; - */ -JNIEXPORT jobject JNICALL Java_org_broadinstitute_gatk_engine_alignment_bwa_c_BWACAligner_getBestAlignment - (JNIEnv *, jobject, jlong, jbyteArray); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/alignment/CheckAlignment.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/alignment/CheckAlignment.java deleted file mode 100644 index 5996418ac..000000000 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/alignment/CheckAlignment.java +++ /dev/null @@ -1,167 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.gatk.engine.alignment; - -import org.broadinstitute.gatk.engine.alignment.bwa.BWAConfiguration; -import org.broadinstitute.gatk.engine.alignment.bwa.BWTFiles; -import org.broadinstitute.gatk.engine.alignment.bwa.c.BWACAligner; -import org.broadinstitute.gatk.utils.commandline.Argument; -import org.broadinstitute.gatk.engine.CommandLineGATK; -import org.broadinstitute.gatk.engine.contexts.ReferenceContext; -import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; -import org.broadinstitute.gatk.engine.walkers.ReadWalker; -import org.broadinstitute.gatk.utils.BaseUtils; -import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; -import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; -import org.broadinstitute.gatk.utils.help.HelpConstants; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; - -import java.util.Iterator; - -/** - * Validates consistency of the aligner interface - * - *

Validates consistency of the aligner interface by taking reads already aligned by BWA in a BAM file, stripping them - * of their alignment data, realigning them, and making sure one of the best resulting realignments matches the original - * alignment from the input file.

- * - *

Caveat

- *

This tool requires that BWA be available on the java path.

- * - * @author mhanna - * @version 0.1 - */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) -public class CheckAlignment extends ReadWalker { - /** - * The supporting BWT index generated using BWT. - */ - @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false) - private String prefix = null; - - /** - * The instance used to generate alignments. - */ - private BWACAligner aligner = null; - - /** - * Create an aligner object. The aligner object will load and hold the BWT until close() is called. - */ - @Override - public void initialize() { - if(prefix == null) - prefix = getToolkit().getArguments().referenceFile.getAbsolutePath(); - BWTFiles bwtFiles = new BWTFiles(prefix); - BWAConfiguration configuration = new BWAConfiguration(); - aligner = new BWACAligner(bwtFiles,configuration); - } - - /** - * Aligns a read to the given reference. - * - * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. - * @param read Read to align. - * @return Number of reads aligned by this map (aka 1). - */ - @Override - public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) { - //logger.info(String.format("examining read %s", read.getReadName())); - - byte[] bases = read.getReadBases(); - if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases); - - boolean matches = true; - Iterable alignments = aligner.getAllAlignments(bases); - Iterator alignmentIterator = alignments.iterator(); - - if(!alignmentIterator.hasNext()) { - matches = read.getReadUnmappedFlag(); - } - else { - Alignment[] alignmentsOfBestQuality = alignmentIterator.next(); - for(Alignment alignment: alignmentsOfBestQuality) { - matches = (alignment.getContigIndex() == read.getReferenceIndex()); - matches &= (alignment.getAlignmentStart() == read.getAlignmentStart()); - matches &= (alignment.isNegativeStrand() == read.getReadNegativeStrandFlag()); - matches &= (alignment.getCigar().equals(read.getCigar())); - matches &= (alignment.getMappingQuality() == read.getMappingQuality()); - if(matches) break; - } - } - - if(!matches) { - logger.error("Found mismatch!"); - logger.error(String.format("Read %s:",read.getReadName())); - logger.error(String.format(" Contig index: %d",read.getReferenceIndex())); - logger.error(String.format(" Alignment start: %d", read.getAlignmentStart())); - logger.error(String.format(" Negative strand: %b", read.getReadNegativeStrandFlag())); - logger.error(String.format(" Cigar: %s%n", read.getCigarString())); - logger.error(String.format(" Mapping quality: %s%n", read.getMappingQuality())); - for(Alignment[] alignmentsByScore: alignments) { - for(int i = 0; i < alignmentsByScore.length; i++) { - logger.error(String.format("Alignment %d:",i)); - logger.error(String.format(" Contig index: %d",alignmentsByScore[i].getContigIndex())); - logger.error(String.format(" Alignment start: %d", alignmentsByScore[i].getAlignmentStart())); - logger.error(String.format(" Negative strand: %b", alignmentsByScore[i].isNegativeStrand())); - logger.error(String.format(" Cigar: %s", alignmentsByScore[i].getCigarString())); - logger.error(String.format(" Mapping quality: %s%n", alignmentsByScore[i].getMappingQuality())); - } - } - throw new ReviewedGATKException(String.format("Read %s mismatches!", read.getReadName())); - } - - return 1; - } - - /** - * Initial value for reduce. In this case, validated reads will be counted. - * @return 0, indicating no reads yet validated. - */ - @Override - public Integer reduceInit() { return 0; } - - /** - * Calculates the number of reads processed. - * @param value Number of reads processed by this map. - * @param sum Number of reads processed before this map. - * @return Number of reads processed up to and including this map. - */ - @Override - public Integer reduce(Integer value, Integer sum) { - return value + sum; - } - - /** - * Cleanup. - * @param result Number of reads processed. - */ - @Override - public void onTraversalDone(Integer result) { - aligner.close(); - super.onTraversalDone(result); - } - -} diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/alignment/bwa/c/BWACAligner.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/alignment/bwa/c/BWACAligner.java deleted file mode 100644 index 892dbebcc..000000000 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/alignment/bwa/c/BWACAligner.java +++ /dev/null @@ -1,284 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.gatk.engine.alignment.bwa.c; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMRecord; -import org.broadinstitute.gatk.engine.alignment.Alignment; -import org.broadinstitute.gatk.engine.alignment.bwa.BWAAligner; -import org.broadinstitute.gatk.engine.alignment.bwa.BWAConfiguration; -import org.broadinstitute.gatk.engine.alignment.bwa.BWTFiles; -import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; - -import java.util.Arrays; -import java.util.Iterator; - -/** - * An aligner using the BWA/C implementation. - * - * @author mhanna - * @version 0.1 - */ -public class BWACAligner extends BWAAligner { - static { - System.loadLibrary("bwa"); - } - - /** - * A pointer to the C++ object representing the BWA engine. - */ - private long thunkPointer = 0; - - public BWACAligner(BWTFiles bwtFiles, BWAConfiguration configuration) { - super(bwtFiles,configuration); - if(thunkPointer != 0) - throw new ReviewedGATKException("BWA/C attempting to reinitialize."); - - if(!bwtFiles.annFile.exists()) throw new ReviewedGATKException("ANN file is missing; please rerun 'bwa aln' to regenerate it."); - if(!bwtFiles.ambFile.exists()) throw new ReviewedGATKException("AMB file is missing; please rerun 'bwa aln' to regenerate it."); - if(!bwtFiles.pacFile.exists()) throw new ReviewedGATKException("PAC file is missing; please rerun 'bwa aln' to regenerate it."); - if(!bwtFiles.forwardBWTFile.exists()) throw new ReviewedGATKException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it."); - if(!bwtFiles.forwardSAFile.exists()) throw new ReviewedGATKException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it."); - if(!bwtFiles.reverseBWTFile.exists()) throw new ReviewedGATKException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it."); - if(!bwtFiles.reverseSAFile.exists()) throw new ReviewedGATKException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it."); - - thunkPointer = create(bwtFiles,configuration); - } - - /** - * Create an aligner object using an array of bytes as a reference. - * @param referenceSequence Reference sequence to encode ad-hoc. - * @param configuration Configuration for the given aligner. - */ - public BWACAligner(byte[] referenceSequence, BWAConfiguration configuration) { - this(BWTFiles.createFromReferenceSequence(referenceSequence),configuration); - // Now that the temporary files are created, the temporary files can be destroyed. - bwtFiles.close(); - } - - /** - * Update the configuration passed to the BWA aligner. - * @param configuration New configuration to set. - */ - @Override - public void updateConfiguration(BWAConfiguration configuration) { - if(thunkPointer == 0) - throw new ReviewedGATKException("BWA/C: attempting to update configuration of uninitialized aligner."); - updateConfiguration(thunkPointer,configuration); - } - - /** - * Close this instance of the BWA pointer and delete its resources. - */ - @Override - public void close() { - if(thunkPointer == 0) - throw new ReviewedGATKException("BWA/C close attempted, but BWA/C is not properly initialized."); - destroy(thunkPointer); - } - - /** - * Allow the aligner to choose one alignment randomly from the pile of best alignments. - * @param bases Bases to align. - * @return An align - */ - @Override - public Alignment getBestAlignment(final byte[] bases) { - if(thunkPointer == 0) - throw new ReviewedGATKException("BWA/C getBestAlignment attempted, but BWA/C is not properly initialized."); - return getBestAlignment(thunkPointer,bases); - } - - /** - * Get the best aligned read, chosen randomly from the pile of best alignments. - * @param read Read to align. - * @param newHeader New header to apply to this SAM file. Can be null, but if so, read header must be valid. - * @return Read with injected alignment data. - */ - @Override - public SAMRecord align(final SAMRecord read, final SAMFileHeader newHeader) { - if(bwtFiles.autogenerated) - throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable"); - return Alignment.convertToRead(getBestAlignment(read.getReadBases()),read,newHeader); - } - - /** - * Get a iterator of alignments, batched by mapping quality. - * @param bases List of bases. - * @return Iterator to alignments. - */ - @Override - public Iterable getAllAlignments(final byte[] bases) { - final BWAPath[] paths = getPaths(bases); - return new Iterable() { - public Iterator iterator() { - return new Iterator() { - /** - * The last position accessed. - */ - private int position = 0; - - /** - * Whether all alignments have been seen based on the current position. - * @return True if any more alignments are pending. False otherwise. - */ - public boolean hasNext() { return position < paths.length; } - - /** - * Return the next cross-section of alignments, based on mapping quality. - * @return Array of the next set of alignments of a given mapping quality. - */ - public Alignment[] next() { - if(position >= paths.length) - throw new UnsupportedOperationException("Out of alignments to return."); - int score = paths[position].score; - int startingPosition = position; - while(position < paths.length && paths[position].score == score) position++; - return convertPathsToAlignments(bases,Arrays.copyOfRange(paths,startingPosition,position)); - } - - /** - * Unsupported. - */ - public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); } - }; - } - }; - } - - /** - * Get a iterator of aligned reads, batched by mapping quality. - * @param read Read to align. - * @param newHeader Optional new header to use when aligning the read. If present, it must be null. - * @return Iterator to alignments. - */ - @Override - public Iterable alignAll(final SAMRecord read, final SAMFileHeader newHeader) { - if(bwtFiles.autogenerated) - throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable"); - final Iterable alignments = getAllAlignments(read.getReadBases()); - return new Iterable() { - public Iterator iterator() { - final Iterator alignmentIterator = alignments.iterator(); - return new Iterator() { - /** - * Whether all alignments have been seen based on the current position. - * @return True if any more alignments are pending. False otherwise. - */ - public boolean hasNext() { return alignmentIterator.hasNext(); } - - /** - * Return the next cross-section of alignments, based on mapping quality. - * @return Array of the next set of alignments of a given mapping quality. - */ - public SAMRecord[] next() { - Alignment[] alignmentsOfQuality = alignmentIterator.next(); - SAMRecord[] reads = new SAMRecord[alignmentsOfQuality.length]; - for(int i = 0; i < alignmentsOfQuality.length; i++) { - reads[i] = Alignment.convertToRead(alignmentsOfQuality[i],read,newHeader); - } - return reads; - } - - /** - * Unsupported. - */ - public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); } - }; - } - }; - } - - /** - * Get the paths associated with the given base string. - * @param bases List of bases. - * @return A set of paths through the BWA. - */ - public BWAPath[] getPaths(byte[] bases) { - if(thunkPointer == 0) - throw new ReviewedGATKException("BWA/C getPaths attempted, but BWA/C is not properly initialized."); - return getPaths(thunkPointer,bases); - } - - /** - * Create a pointer to the BWA/C thunk. - * @param files BWT source files. - * @param configuration Configuration of the aligner. - * @return Pointer to the BWA/C thunk. - */ - protected native long create(BWTFiles files, BWAConfiguration configuration); - - /** - * Update the configuration passed to the BWA aligner. For internal use only. - * @param thunkPointer pointer to BWA object. - * @param configuration New configuration to set. - */ - protected native void updateConfiguration(long thunkPointer, BWAConfiguration configuration); - - /** - * Destroy the BWA/C thunk. - * @param thunkPointer Pointer to the allocated thunk. - */ - protected native void destroy(long thunkPointer); - - /** - * Do the extra steps involved in converting a local alignment to a global alignment. - * @param bases ASCII representation of byte array. - * @param paths Paths through the current BWT. - * @return A list of alignments. - */ - protected Alignment[] convertPathsToAlignments(byte[] bases, BWAPath[] paths) { - if(thunkPointer == 0) - throw new ReviewedGATKException("BWA/C convertPathsToAlignments attempted, but BWA/C is not properly initialized."); - return convertPathsToAlignments(thunkPointer,bases,paths); - } - - /** - * Caller to the path generation functionality within BWA/C. Call this method's getPaths() wrapper (above) instead. - * @param thunkPointer pointer to the C++ object managing BWA/C. - * @param bases ASCII representation of byte array. - * @return A list of paths through the specified BWT. - */ - protected native BWAPath[] getPaths(long thunkPointer, byte[] bases); - - /** - * Do the extra steps involved in converting a local alignment to a global alignment. - * Call this method's convertPathsToAlignments() wrapper (above) instead. - * @param thunkPointer pointer to the C++ object managing BWA/C. - * @param bases ASCII representation of byte array. - * @param paths Paths through the current BWT. - * @return A list of alignments. - */ - protected native Alignment[] convertPathsToAlignments(long thunkPointer, byte[] bases, BWAPath[] paths); - - /** - * Gets the best alignment from BWA/C, randomly selected from all best-aligned reads. - * @param thunkPointer Pointer to BWA thunk. - * @param bases bases to align. - * @return The best alignment from BWA/C. - */ - protected native Alignment getBestAlignment(long thunkPointer, byte[] bases); -} diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/alignment/bwa/c/BWAPath.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/alignment/bwa/c/BWAPath.java deleted file mode 100644 index 77e0711b5..000000000 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/alignment/bwa/c/BWAPath.java +++ /dev/null @@ -1,101 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.gatk.engine.alignment.bwa.c; - -/** - * Models a BWA path. - * - * @author mhanna - * @version 0.1 - */ -public class BWAPath { - /** - * Number of mismatches encountered along this path. - */ - public final int numMismatches; - - /** - * Number of gap opens encountered along this path. - */ - public final int numGapOpens; - - /** - * Number of gap extensions along this path. - */ - public final int numGapExtensions; - - /** - * Whether this alignment was found on the positive or negative strand. - */ - public final boolean negativeStrand; - - /** - * Starting coordinate in the BWT. - */ - public final long k; - - /** - * Ending coordinate in the BWT. - */ - public final long l; - - /** - * The score of this path. - */ - public final int score; - - /** - * The number of best alignments seen along this path. - */ - public final int bestCount; - - /** - * The number of second best alignments seen along this path. - */ - public final int secondBestCount; - - /** - * Create a new path with the given attributes. - * @param numMismatches Number of mismatches along path. - * @param numGapOpens Number of gap opens along path. - * @param numGapExtensions Number of gap extensions along path. - * @param k Index to first coordinate within BWT. - * @param l Index to last coordinate within BWT. - * @param score Score of this alignment. Not the mapping quality. - */ - public BWAPath(int numMismatches, int numGapOpens, int numGapExtensions, boolean negativeStrand, long k, long l, int score, int bestCount, int secondBestCount) { - this.numMismatches = numMismatches; - this.numGapOpens = numGapOpens; - this.numGapExtensions = numGapExtensions; - this.negativeStrand = negativeStrand; - this.k = k; - this.l = l; - this.score = score; - this.bestCount = bestCount; - this.secondBestCount = secondBestCount; - } - -} diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/ValidationAmplicons.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/ValidationAmplicons.java deleted file mode 100644 index 45e408ffa..000000000 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/ValidationAmplicons.java +++ /dev/null @@ -1,579 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.gatk.tools.walkers.validation; - -import htsjdk.samtools.reference.ReferenceSequenceFileFactory; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMSequenceDictionary; -import org.broadinstitute.gatk.engine.alignment.Alignment; -import org.broadinstitute.gatk.engine.alignment.bwa.BWAConfiguration; -import org.broadinstitute.gatk.engine.alignment.bwa.BWTFiles; -import org.broadinstitute.gatk.engine.alignment.bwa.c.BWACAligner; -import org.broadinstitute.gatk.utils.commandline.*; -import org.broadinstitute.gatk.engine.CommandLineGATK; -import org.broadinstitute.gatk.engine.contexts.AlignmentContext; -import org.broadinstitute.gatk.engine.contexts.ReferenceContext; -import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; -import org.broadinstitute.gatk.utils.codecs.table.TableFeature; -import org.broadinstitute.gatk.engine.walkers.DataSource; -import org.broadinstitute.gatk.engine.walkers.Requires; -import org.broadinstitute.gatk.engine.walkers.RodWalker; -import org.broadinstitute.gatk.utils.BaseUtils; -import org.broadinstitute.gatk.utils.GenomeLoc; -import org.broadinstitute.gatk.utils.Utils; -import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; -import org.broadinstitute.gatk.utils.help.HelpConstants; -import htsjdk.variant.variantcontext.VariantContext; - -import java.io.File; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedList; -import java.util.List; - -/** - * Creates FASTA sequences for use in Seqenom or PCR utilities for site amplification and subsequent validation - * - *

- * ValidationAmplicons consumes a VCF and an Interval list and produces FASTA sequences from which PCR primers or probe - * sequences can be designed. In addition, ValidationAmplicons uses BWA to check for specificity of tracts of bases within - * the output amplicon, lower-casing non-specific tracts, allows for users to provide sites to mask out, and specifies - * reasons why the site may fail validation (nearby variation, for example). - *

- * - *

Input

- *

- * Requires a VCF containing alleles to design amplicons towards, a VCF of variants to mask out of the amplicons, and an - * interval list defining the size of the amplicons around the sites to be validated - *

- * - *

Output

- *

- * Output is a FASTA-formatted file with some modifications at probe sites. For instance: - *

- * >20:207414 INSERTION=1,VARIANT_TOO_NEAR_PROBE=1, 20_207414
- * CCAACGTTAAGAAAGAGACATGCGACTGGGTgcggtggctcatgcctggaaccccagcactttgggaggccaaggtgggc[A/G*]gNNcacttgaggtcaggagtttgagaccagcctggccaacatggtgaaaccccgtctctactgaaaatacaaaagttagC
- * >20:792122 Valid 20_792122
- * TTTTTTTTTagatggagtctcgctcttatcgcccaggcNggagtgggtggtgtgatcttggctNactgcaacttctgcct[-/CCC*]cccaggttcaagtgattNtcctgcctcagccacctgagtagctgggattacaggcatccgccaccatgcctggctaatTT
- * >20:994145 Valid 20_994145
- * TCCATGGCCTCCCCCTGGCCCACGAAGTCCTCAGCCACCTCCTTCCTGGAGGGCTCAGCCAAAATCAGACTGAGGAAGAAG[AAG/-*]TGGTGGGCACCCACCTTCTGGCCTTCCTCAGCCCCTTATTCCTAGGACCAGTCCCCATCTAGGGGTCCTCACTGCCTCCC
- * >20:1074230 SITE_IS_FILTERED=1, 20_1074230
- * ACCTGATTACCATCAATCAGAACTCATTTCTGTTCCTATCTTCCACCCACAATTGTAATGCCTTTTCCATTTTAACCAAG[T/C*]ACTTATTATAtactatggccataacttttgcagtttgaggtatgacagcaaaaTTAGCATACATTTCATTTTCCTTCTTC
- * >20:1084330 DELETION=1, 20_1084330
- * CACGTTCGGcttgtgcagagcctcaaggtcatccagaggtgatAGTTTAGGGCCCTCTCAAGTCTTTCCNGTGCGCATGG[GT/AC*]CAGCCCTGGGCACCTGTNNNNNNNNNNNNNTGCTCATGGCCTTCTAGATTCCCAGGAAATGTCAGAGCTTTTCAAAGCCC
- *
- * are amplicon sequences resulting from running the tool. The flags (preceding the sequence itself) can be: - *
- * Valid                     // amplicon is valid
- * SITE_IS_FILTERED=1        // validation site is not marked 'PASS' or '.' in its filter field ("you are trying to validate a filtered variant")
- * VARIANT_TOO_NEAR_PROBE=1  // there is a variant too near to the variant to be validated, potentially shifting the mass-spec peak
- * MULTIPLE_PROBES=1,        // multiple variants to be validated found inside the same amplicon
- * DELETION=6,INSERTION=5,   // 6 deletions and 5 insertions found inside the amplicon region (from the "mask" VCF), will be potentially difficult to validate
- * DELETION=1,               // deletion found inside the amplicon region, could shift mass-spec peak
- * START_TOO_CLOSE,          // variant is too close to the start of the amplicon region to give sequenom a good chance to find a suitable primer
- * END_TOO_CLOSE,            // variant is too close to the end of the amplicon region to give sequenom a good chance to find a suitable primer
- * NO_VARIANTS_FOUND,        // no variants found within the amplicon region
- * INDEL_OVERLAPS_VALIDATION_SITE, // an insertion or deletion interferes directly with the site to be validated (i.e. insertion directly preceding or postceding, or a deletion that spans the site itself)
- * 

- * - *

Examples

- *
- *    java
- *      -jar GenomeAnalysisTK.jar
- *      -T ValidationAmplicons
- *      -R /humgen/1kg/reference/human_g1k_v37.fasta
- *      -L:table interval_table.table
- *      -ProbeIntervals:table interval_table.table
- *      -ValidateAlleles:vcf sites_to_validate.vcf
- *      -MaskAlleles:vcf mask_sites.vcf
- *      --virtualPrimerSize 30
- *      -o probes.fasta
- * 
- * - * @author chartl - * @since July 2011 - */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} ) -@Requires(value={DataSource.REFERENCE}) -public class ValidationAmplicons extends RodWalker { - /** - * A Table-formatted file listing amplicon contig, start, stop, and a name for the amplicon (or probe) - */ - @Input(fullName = "ProbeIntervals", doc="A collection of intervals in table format with optional names that represent the "+ - "intervals surrounding the probe sites amplicons should be designed for", required=true) - RodBinding probeIntervals; - /** - * A VCF file containing the bi-allelic sites for validation. Filtered records will prompt a warning, and will be flagged as filtered in the output fastq. - */ - @Input(fullName = "ValidateAlleles", doc="A VCF containing the sites and alleles you want to validate. Restricted to *BI-Allelic* sites", required=true) - RodBinding validateAlleles; - /** - * A VCF file containing variants to be masked. A mask variant overlapping a validation site will be ignored at the validation site. - */ - @Input(fullName = "MaskAlleles", doc="A VCF containing the sites you want to MASK from the designed amplicon (e.g. by Ns or lower-cased bases)", required=true) - RodBinding maskAlleles; - - @Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false) - boolean lowerCaseSNPs = false; - - /** - * If onlyOutputValidAmplicons is true, the output fasta file will contain only valid sequences. - * Useful for producing delivery-ready files. - */ - @Argument(doc="Only output valid sequences.",fullName="onlyOutputValidAmplicons",required=false) - boolean onlyOutputValidAmplicons = false; - - /** - * If ignoreComplexEvents is true, the output fasta file will contain only sequences coming from SNPs and Indels. - * Complex substitutions will be ignored. - */ - @Argument(doc="Ignore complex genomic records.",fullName="ignoreComplexEvents",required=false) - boolean ignoreComplexEvents = false; - - /** - * BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased. - * This changes the size of the k-mer used for alignment. - */ - @Argument(doc="Size of the virtual primer to use for lower-casing regions with low specificity",fullName="virtualPrimerSize",required=false) - int virtualPrimerSize = 20; - - @Argument(doc="Monomorphic sites in the mask file will be treated as filtered",fullName="filterMonomorphic",required=false) - boolean filterMonomorphic = false; - - @Argument(doc="Do not use BWA, lower-case repeats only",fullName="doNotUseBWA",required=false) - boolean doNotUseBWA = false; - - @Hidden - @Argument(doc="Use Sequenom output format instead of regular FASTA",fullName="sqnm",required=false) - boolean sequenomOutput = false; - - @Hidden - @Argument(doc="Use ILMN output format instead of regular FASTA",fullName="ilmn",required=false) - boolean ilmnOutput = false; - - - GenomeLoc prevInterval; - GenomeLoc allelePos; - String probeName; - StringBuilder sequence; - StringBuilder rawSequence; - boolean sequenceInvalid; - boolean isSiteSNP; - boolean isSiteIndel; - List invReason; - int indelCounter; - - @Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " + - "generated by bwa index -d bwtsw. If unspecified, will default " + - "to the reference specified via the -R argument.",required=false) - private File targetReferenceFile = null; - - @Output - PrintStream out; - - BWACAligner aligner = null; - - private SAMFileHeader header = null; - - public void initialize() { - if ( ! doNotUseBWA ) { - if(targetReferenceFile == null) - targetReferenceFile = getToolkit().getArguments().referenceFile; - BWTFiles bwtFiles = new BWTFiles(targetReferenceFile.getAbsolutePath()); - BWAConfiguration configuration = new BWAConfiguration(); - aligner = new BWACAligner(bwtFiles,configuration); - header = new SAMFileHeader(); - SAMSequenceDictionary referenceDictionary = - ReferenceSequenceFileFactory.getReferenceSequenceFile(targetReferenceFile).getSequenceDictionary(); - header.setSequenceDictionary(referenceDictionary); - header.setSortOrder(SAMFileHeader.SortOrder.unsorted); - } - - if (ilmnOutput) - out.println("Locus_Name,Target_Type,Sequence,Chromosome,Coordinate,Genome_Build_Version,Source,Source_Version,Sequence_Orientation,Plus_Minus,Force_Infinium_I"); - } - - public Integer reduceInit() { - prevInterval = null; - sequence = null; - rawSequence = null; - sequenceInvalid = false; - probeName = null; - invReason = null; - indelCounter = 0; - return 0; - } - - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null || ! tracker.hasValues(probeIntervals)) { return null; } - - TableFeature feature = tracker.getFirstValue(probeIntervals); - GenomeLoc interval = feature.getLocation(); - //logger.debug(interval); - if ( prevInterval == null || ! interval.equals(prevInterval) ) { - // we're in a new interval, we should: - // 1) print out previous data - // 2) reset internal data - // 3) instantiate traversal of this interval - - // step 1: - if ( prevInterval != null ) { - // there was a previous interval - validateSequence(); // ensure the sequence in the region is valid - // next line removed in favor of the one after - if ( doNotUseBWA ) { - lowerRepeats(); // change repeats in sequence to lower case - } else { - lowerNonUniqueSegments(); - } - print(); // print out the fasta sequence - } - - // step 2: - prevInterval = interval; - allelePos = null; - sequence = new StringBuilder(); - rawSequence = new StringBuilder(); - sequenceInvalid = false; - invReason = new LinkedList(); - logger.debug(Utils.join("\t",feature.getAllValues())); - probeName = feature.getValue(1); - indelCounter = 0; - } - - // step 3 (or 1 if not new): - // build up the sequence - - VariantContext mask = tracker.getFirstValue(maskAlleles, ref.getLocus()); - VariantContext validate = tracker.getFirstValue(validateAlleles,ref.getLocus()); - - if ( mask == null && validate == null ) { - if ( indelCounter > 0 ) { - sequence.append('N'); - indelCounter--; - } else { - sequence.append(Character.toUpperCase((char) ref.getBase())); - } - rawSequence.append(Character.toUpperCase((char) ref.getBase())); - } else if ( validate != null ) { - // record variant type in case it's needed in output format - isSiteSNP = (validate.isSNP()); - isSiteIndel = (validate.isIndel()); - // doesn't matter if there's a mask here too -- this is what we want to validate - if ( validate.isFiltered() ) { - logger.warn("You are attempting to validate a filtered site. Why are you attempting to validate a filtered site? You should not be attempting to validate a filtered site."); - sequenceInvalid = true; - invReason.add("SITE_IS_FILTERED"); - } - - String refString = validate.getReference().getDisplayString(); - String altString = validate.getAlternateAllele(0).getDisplayString(); - - if ( validate.isIndel() ) { - sequence.append(Character.toUpperCase((char)ref.getBase())); - rawSequence.append(Character.toUpperCase((char)ref.getBase())); - final byte[] refAllele = validate.getReference().getBases(); - refString = new String(Arrays.copyOfRange(refAllele, 1, refAllele.length)); - if ( refString.isEmpty() ) - refString = "-"; - final byte[] altAllele = validate.getAlternateAllele(0).getBases(); - altString = new String(Arrays.copyOfRange(altAllele, 1, altAllele.length)); - if ( altString.isEmpty() ) - altString = "-"; - } - - sequence.append('['); - sequence.append(altString); - sequence.append('/'); - sequence.append(refString); - sequence.append(']'); - // do this to the raw sequence to -- the indeces will line up that way - rawSequence.append('['); - rawSequence.append(altString); - rawSequence.append('/'); - rawSequence.append(refString); - rawSequence.append(']'); - allelePos = ref.getLocus(); - if ( indelCounter > 0 ) { - logger.warn("An indel event overlaps the event to be validated. This completely invalidates the probe."); - sequenceInvalid = true; - invReason.add("INDEL_OVERLAPS_VALIDATION_SITE"); - if ( validate.isSNP() ) { - indelCounter--; - } else { - indelCounter -= validate.getEnd()-validate.getStart(); - } - } - } else /* (mask != null && validate == null ) */ { - if ( ! mask.isSNP() && ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphicInSamples() )) { - logger.warn("Mask Variant Context on the following warning line is not a SNP. Currently we can only mask out SNPs. This probe will not be designed."); - logger.warn(String.format("%s:%d-%d\t%s\t%s",mask.getChr(),mask.getStart(),mask.getEnd(),mask.isSimpleInsertion() ? "INS" : "DEL", Utils.join(",",mask.getAlleles()))); - sequenceInvalid = true; - invReason.add(mask.isSimpleInsertion() ? "INSERTION" : "DELETION"); - // note: indelCounter could be > 0 (could have small deletion within larger one). This always selects - // the larger event. - int indelCounterNew = mask.isSimpleInsertion() ? 2 : mask.getEnd()-mask.getStart(); - if ( indelCounterNew > indelCounter ) { - indelCounter = indelCounterNew; - } - //sequence.append((char) ref.getBase()); - //sequence.append(mask.isSimpleInsertion() ? 'I' : 'D'); - sequence.append("N"); - indelCounter--; - rawSequence.append(Character.toUpperCase((char) ref.getBase())); - } else if ( indelCounter > 0 ) { - // previous section resets the indel counter. Doesn't matter if there's a SNP underlying this, we just want to append an 'N' and move on. - sequence.append('N'); - indelCounter--; - rawSequence.append(Character.toUpperCase((char)ref.getBase())); - } else if ( ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphicInSamples() )){ - logger.debug("SNP in mask found at " + ref.getLocus().toString()); - - if ( lowerCaseSNPs ) { - sequence.append(Character.toLowerCase((char) ref.getBase())); - } else { - sequence.append((char) BaseUtils.Base.N.base); - } - - rawSequence.append(Character.toUpperCase((char) ref.getBase())); - } else if ( mask.isSNP() ) { - logger.debug("SNP in mask found at "+ref.getLocus().toString()+" but was either filtered or monomorphic"); - sequence.append((Character.toUpperCase((char) ref.getBase()))); - rawSequence.append(Character.toUpperCase((char) ref.getBase())); - } - } - - return 1; - } - - public Integer reduce(Integer i, Integer j) { - return 0; - } - - public void onTraversalDone(Integer fin ) { - validateSequence(); - if ( doNotUseBWA ) { - lowerRepeats(); - } else { - lowerNonUniqueSegments(); - aligner.close(); - } - print(); - } - - public void validateSequence() { - // code for ensuring primer sequence is valid goes here - - // validate that there are no masked sites near to the variant site - String seq = sequence.toString(); - int start = seq.indexOf('[') - 4; - int end = seq.indexOf(']') + 5; - - if ( start < 50 ) { - logger.warn("There is not enough sequence before the start position of the probed allele for adequate probe design. This site will not be designed."); - sequenceInvalid = true; - invReason.add("START_TOO_CLOSE"); - } else if ( end > seq.length() - 50 ) { - logger.warn("There is not enough sequence after the end position of the probed allele fore adequate probe design. This site will not be desinged. "); - sequenceInvalid = true; - invReason.add("END_TOO_CLOSE"); - } else { - boolean maskNearVariantSite = false; - for ( int i = start; i < end; i++ ) { - maskNearVariantSite |= (seq.charAt(i) == 'N' || Character.isLowerCase(seq.charAt(i))); - } - - if ( maskNearVariantSite ) { - logger.warn("There is one (or more) mask variants within 4 basepair of the variant given for validation. This site will not be designed."); - sequenceInvalid = true; - invReason.add("VARIANT_TOO_NEAR_PROBE"); - } - } - - if ( seq.indexOf("[") != seq.lastIndexOf("[") ) { - logger.warn("Multiple probe variants were found within this interval. Please fix the definitions of the intervals so they do not overlap."); - sequenceInvalid = true; - invReason.add("MULTIPLE_PROBES"); - } - - if ( seq.indexOf("[") < 0 ) { - logger.warn("No variants in region were found. This site will not be designed."); - sequenceInvalid = true; - invReason.add("NO_VARIANTS_FOUND"); - } - } - - public void lowerNonUniqueSegments() { - if ( ! invReason.contains("MULTIPLE_PROBES") && !invReason.contains("NO_VARIANTS_FOUND") ) { - String leftFlank = rawSequence.toString().split("\\[")[0]; - String rightFlank = rawSequence.toString().split("\\]")[1]; - List badLeft = getBadIndeces(leftFlank); - List badRight = getBadIndeces(rightFlank); - // propagate lowercases into the printed sequence - for ( int idx = 0; idx < leftFlank.length(); idx++ ) { - while ( badLeft.size() > 0 && idx > badLeft.get(0) + virtualPrimerSize ) { - badLeft.remove(0); - } - - if ( badLeft.size() > 0 && badLeft.get(0) <= idx && idx <= badLeft.get(0) + virtualPrimerSize ) { - sequence.setCharAt(idx,Character.toLowerCase(sequence.charAt(idx))); - } - } - - int offset = 1 + rawSequence.indexOf("]"); - for ( int i= 0; i < rightFlank.length(); i++ ) { - int idx = i + offset; - while ( badRight.size() > 0 && i > badRight.get(0) + virtualPrimerSize ) { - //logger.debug("Removing "+badRight.get(0)+" because "+(badRight.get(0)+virtualPrimerSize)+" < "+i); - badRight.remove(0); - } - - if ( badRight.size() > 0 && badRight.get(0) <= i && i <= badRight.get(0) + virtualPrimerSize ) { - //logger.debug("Resetting character on right flank: "+idx+" "+i+" offset="+offset); - //logger.debug(sequence); - sequence.setCharAt(idx,Character.toLowerCase(sequence.charAt(idx))); - //logger.debug(sequence); - } - } - } - } - - private List getBadIndeces(String sequence) { - - List badLeftIndeces = new ArrayList(sequence.length()-virtualPrimerSize); - for ( int i = 0; i < sequence.length()-virtualPrimerSize ; i++ ) { - String toAlign = sequence.substring(i,i+virtualPrimerSize); - Iterable allAlignments = aligner.getAllAlignments(toAlign.getBytes()); - for ( Alignment[] alignments : allAlignments ) { - if ( alignments.length > 1 ) { - if ( alignments[0].getMappingQuality() == 0 ) { - // this region is bad -- multiple MQ alignments - badLeftIndeces.add(i); - } - } - } - } - - return badLeftIndeces; - } - - - /** - * Note- this is an old function - a proxy for identifying regions with low specificity to genome. Saved in case the alignment-based version - * turns out to be worse than just doing a simple repeat-lowering method. - */ - public void lowerRepeats() { - // convert to lower case low-complexity repeats, e.g. tandem k-mers - final int K_LIM = 8; - String seq = sequence.toString(); - StringBuilder newSequence = new StringBuilder(); - int start_pos = 0; - while( start_pos < seq.length() ) { - boolean broke = false; - for ( int length = K_LIM; length > 1; length -- ) { - //logger.debug(String.format("start1: %d end1: %d start2: %d end2: %d str: %d",start_pos,start_pos+length,start_pos+length,start_pos+2*length,seq.length())); - if ( start_pos + 2*length> seq.length() ) { - continue; - } - if ( equalsIgnoreNs(seq.substring(start_pos,start_pos+length),seq.substring(start_pos+length,start_pos+2*length)) ) { - newSequence.append(seq.substring(start_pos,start_pos+length).toLowerCase()); - newSequence.append(seq.substring(start_pos+length,start_pos+2*length).toLowerCase()); - start_pos += 2*length; - broke = true; - break; - } - } - - if ( ! broke ) { - newSequence.append(seq.substring(start_pos,start_pos+1)); - start_pos++; - } - - } - - if ( seq.indexOf("[") != seq.lastIndexOf("[") ) { - return; - } - - sequence = newSequence; - } - - public boolean equalsIgnoreNs(String one, String two) { - if ( one.length() != two.length() ) { return false; } - for ( int idx = 0; idx < one.length(); idx++ ) { - if ( Character.toUpperCase(one.charAt(idx)) != Character.toUpperCase(two.charAt(idx)) ) { - if ( Character.toUpperCase(one.charAt(idx)) != 'N' && Character.toUpperCase(two.charAt(idx)) != 'N' ) { - return false; - } - } - } - - //logger.debug(String.format("one: %s two: %s",one,two)); - - return true; - } - - public void print() { - String valid; - if ( sequenceInvalid ) { - valid = ""; - while ( invReason.size() > 0 ) { - String reason = invReason.get(0); - invReason.remove(reason); - int num = 1; - while ( invReason.contains(reason) ) { - num++; - invReason.remove(reason); - } - valid += String.format("%s=%d,",reason,num); - } - } else { - valid = "Valid"; - } - - - if (ignoreComplexEvents && !isSiteIndel && !isSiteSNP) - return; - - if (!onlyOutputValidAmplicons || !sequenceInvalid) { - String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D'); - if (sequenomOutput) { - seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record - probeName = probeName.replace("amplicon_","a"); - out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity); - } - else if (ilmnOutput) { - String type = isSiteSNP?"SNP":(isSiteIndel?"INDEL":"OTHER"); - seqIdentity = seqIdentity.replace("*",""); // no * in ref allele - out.printf("%s,%s,%s,%s,%d,37,1000G,ExomePhase1,Forward,Plus,FALSE%n",probeName,type,seqIdentity,allelePos.getContig(),allelePos.getStart()); - } - else{ - out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity); - } - } - } -}