diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeResolver.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeResolver.java index ddbd5dc76..abb1277d1 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeResolver.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeResolver.java @@ -339,18 +339,18 @@ public class HaplotypeResolver extends RodWalker { return overlap; } - private static final double SW_MATCH = 4.0; - private static final double SW_MISMATCH = -10.0; - private static final double SW_GAP = -25.0; - private static final double SW_GAP_EXTEND = -1.3; - private static final double SW_EPSILON = 0.00001; + private static final int SW_MATCH = 40; + private static final int SW_MISMATCH = -100; + private static final int SW_GAP = -250; + private static final int SW_GAP_EXTEND = -13; + private void resolveByHaplotype(final ReferenceContext refContext) { final byte[] source1Haplotype = generateHaplotype(sourceVCs1, refContext); final byte[] source2Haplotype = generateHaplotype(sourceVCs2, refContext); - final SWPairwiseAlignment swConsensus1 = new SWPairwiseAlignment( refContext.getBases(), source1Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND, SW_EPSILON ); - final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( refContext.getBases(), source2Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND, SW_EPSILON ); + final SWPairwiseAlignment swConsensus1 = new SWPairwiseAlignment( refContext.getBases(), source1Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); + final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( refContext.getBases(), source2Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); // protect against SW failures if( swConsensus1.getCigar().toString().contains("S") || swConsensus1.getCigar().getReferenceLength() < 20 || diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java index dbd216a03..578057b9b 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java @@ -329,7 +329,7 @@ public class IndelRealigner extends ReadWalker { // fraction of mismatches that need to no longer mismatch for a column to be considered cleaned private static final double MISMATCH_COLUMN_CLEANED_FRACTION = 0.75; - private final static Parameters swParameters = new Parameters(30.0, -10.0, -10.0, -2.0,0.0001); + private final static Parameters swParameters = new Parameters(30, -10, -10, -2); // reference base padding size // TODO -- make this a command-line argument if the need arises diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java index 8601a6879..2fd7a64f7 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java @@ -203,7 +203,7 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest { } public Map calcAlignment() { - final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap, new Parameters(1.0,-1.0/3.0,-1 - 1./3., -1./3.,0.00001)); + final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap, new Parameters(3,-1,-4, -1)); final Haplotype h = new Haplotype(hap, false, alignment.getAlignmentStart2wrt1(), alignment.getCigar()); return HaplotypeCallerGenotypingEngine.generateVCsFromAlignment(h, ref, genomeLocParser.createGenomeLoc("4", 1, 1 + ref.length), "name"); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/smithwaterman/SWPairwiseAlignmentUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/smithwaterman/SWPairwiseAlignmentUnitTest.java index c8da24ee7..f39be109c 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/smithwaterman/SWPairwiseAlignmentUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/smithwaterman/SWPairwiseAlignmentUnitTest.java @@ -46,6 +46,10 @@ package org.broadinstitute.gatk.utils.smithwaterman; +import htsjdk.samtools.Cigar; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import org.broadinstitute.gatk.utils.sam.CigarUtils; import org.broadinstitute.gatk.utils.BaseTest; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -78,16 +82,16 @@ public class SWPairwiseAlignmentUnitTest extends BaseTest { final String ref1 = "AAAGACTACTG"; final String read1 = "AACGGACACTG"; - tests.add(new Object[]{ref1, read1, 5.0, -10.0, -22.0, -1.2, 0.0001, 1, "2M2I3M1D4M"}); - tests.add(new Object[]{ref1, read1, 20.0, -5.0, -30.0, -2.2, 0.0001, 0, "11M"}); + tests.add(new Object[]{ref1, read1, 50, -100, -220, -12, 1, "2M2I3M1D4M"}); + tests.add(new Object[]{ref1, read1, 200, -50, -300, -22, 0, "11M"}); return tests.toArray(new Object[][]{}); } @Test(dataProvider = "OddNoAlignment", enabled = true) - public void testOddNoAlignment(final String reference, final String read, final double match, final double mismatch, final double gap, final double gap_extend, final double epsilon, + public void testOddNoAlignment(final String reference, final String read, final int match, final int mismatch, final int gap, final int gap_extend, final int expectedStart, final String expectedCigar) { - final SWPairwiseAlignment sw = new SWPairwiseAlignment(reference.getBytes(), read.getBytes(), match, mismatch, gap, gap_extend, epsilon); + final SWPairwiseAlignment sw = new SWPairwiseAlignment(reference.getBytes(), read.getBytes(), match, mismatch, gap, gap_extend); Assert.assertEquals(sw.getAlignmentStart2wrt1(), expectedStart); Assert.assertEquals(sw.getCigar().toString(), expectedCigar); } @@ -117,4 +121,46 @@ public class SWPairwiseAlignmentUnitTest extends BaseTest { Assert.assertEquals(sw.getAlignmentStart2wrt1(), expectedStart); Assert.assertEquals(sw.getCigar().toString(), expectedCigar); } + + @Test(enabled = true) + public void testForIdenticalAlignmentsWithDifferingFlankLengths() { + //This test is designed to ensure that the indels are correctly placed + //if the region flanking these indels is extended by a varying amount. + //It checks for problems caused by floating point rounding leading to different + //paths being selected. + + //Create two versions of the same sequence with different flanking regions. + byte[] paddedRef="GCGTCGCAGTCTTAAGGCCCCGCCTTTTCAGACAGCTTCCGCTGGGCCTGGGCCGCTGCGGGGCGGTCACGGCCCCTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGAGGGGGCCCGGGGCCGCGTCCCTGGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGACCGGGCCGAGCCGGGGGAAGGGCTCCGGTGACT".getBytes(); + byte[] paddedHap="GCGTCGCAGTCTTAAGGCCCCGCCTTTTCAGACAGCTTCCGCTGGGCCTGGGCCGCTGCGGGGCGGTCACGGCCCCTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGA--GGGCC---------------GGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGACCGGGCCGAGCCGGGGGAAGGGCTCCGGTGACT".replace("-","").getBytes(); + byte[] notPaddedRef= "CTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGAGGGGGCCCGGGGCCGCGTCCCTGGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGA".getBytes(); + byte[] notPaddedHap= "CTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGA---------GGGCC--------GGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGA".replace("-","").getBytes(); + //a simplified version of the getCigar routine in the haplotype caller to align these + final String SW_PAD = "NNNNNNNNNN"; + final String paddedsRef = SW_PAD + new String(paddedRef) + SW_PAD; + final String paddedsHap = SW_PAD + new String(paddedHap) + SW_PAD; + final String notPaddedsRef = SW_PAD + new String(notPaddedRef) + SW_PAD; + final String notpaddedsHap = SW_PAD + new String(notPaddedHap) + SW_PAD; + final SmithWaterman paddedAlignment = new SWPairwiseAlignment( paddedsRef.getBytes(), paddedsHap.getBytes(), CigarUtils.NEW_SW_PARAMETERS ); + final SmithWaterman notPaddedAlignment = new SWPairwiseAlignment( notPaddedsRef.getBytes(), notpaddedsHap.getBytes(), CigarUtils.NEW_SW_PARAMETERS ); + //Now verify that the two sequences have the same alignment and not match positions. + Cigar rawPadded = paddedAlignment.getCigar(); + Cigar notPadded= notPaddedAlignment.getCigar(); + List paddedC=rawPadded.getCigarElements(); + List notPaddedC=notPadded.getCigarElements(); + Assert.assertEquals(paddedC.size(),notPaddedC.size()); + for(int i=0;i lce = new LinkedList(SW_result.cigar.getCigarElements()); if ( matchingPrefix > 0 ) diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/Parameters.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/Parameters.java index 248b48a1f..46cb8bee1 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/Parameters.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/Parameters.java @@ -35,11 +35,10 @@ package org.broadinstitute.gatk.utils.smithwaterman; * Time: 12:03 PM */ public final class Parameters { - public final double w_match; - public final double w_mismatch; - public final double w_open; - public final double w_extend; - public final double epsilon; + public final int w_match; + public final int w_mismatch; + public final int w_open; + public final int w_extend; /** * Create a new set of SW parameters @@ -47,20 +46,17 @@ public final class Parameters { * @param w_mismatch the mismatch penalty * @param w_open the gap open penalty * @param w_extend the gap extension penalty - * @param epsilon weight comparison error + */ - public Parameters(final double w_match, final double w_mismatch, final double w_open, final double w_extend, final double epsilon) { + public Parameters(final int w_match, final int w_mismatch, final int w_open, final int w_extend) { if ( w_mismatch > 0 ) throw new IllegalArgumentException("w_mismatch must be <= 0 but got " + w_mismatch); if ( w_open> 0 ) throw new IllegalArgumentException("w_open must be <= 0 but got " + w_open); if ( w_extend> 0 ) throw new IllegalArgumentException("w_extend must be <= 0 but got " + w_extend); - if ( Double.isNaN(epsilon)) throw new IllegalArgumentException("epsilon cannot be a NaN"); - if ( epsilon < 0.0 ) throw new IllegalArgumentException("epsilon cannot be negative"); this.w_match = w_match; this.w_mismatch = w_mismatch; this.w_open = w_open; this.w_extend = w_extend; - this.epsilon = epsilon; } } diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/SWPairwiseAlignment.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/SWPairwiseAlignment.java index f65cffae9..aa38c064d 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/SWPairwiseAlignment.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/SWPairwiseAlignment.java @@ -99,7 +99,7 @@ public class SWPairwiseAlignment implements SmithWaterman { /** * The SW scoring matrix, stored for debugging purposes if keepScoringMatrix is true */ - protected double[] SW = null; + protected int[][] SW = null; /** * Only for testing purposes in the SWPairwiseAlignmentMain function @@ -113,10 +113,8 @@ public class SWPairwiseAlignment implements SmithWaterman { * @deprecated in favor of constructors using the Parameter or ParameterSet class */ @Deprecated - public SWPairwiseAlignment(final byte[] seq1, final byte[] seq2, - final double match, final double mismatch, final double open, final double extend, - final double epsilon ) { - this(seq1, seq2, new Parameters(match, mismatch, open, extend, epsilon)); + public SWPairwiseAlignment(byte[] seq1, byte[] seq2, int match, int mismatch, int open, int extend ) { + this(seq1, seq2, new Parameters(match, mismatch, open, extend)); } /** @@ -149,22 +147,6 @@ public class SWPairwiseAlignment implements SmithWaterman { align(seq1, seq2); } - /** - * Create a new SW pairwise aligner - * - * After creating the object the two sequences are aligned with an internal call to align(seq1, seq2, parameters, strategy) - * - * @param seq1 the first sequence we want to align - * @param seq2 the second sequence we want to align - * @param parameters the parameters to use - * @param strategy the overhang strategy to use - */ - public SWPairwiseAlignment(final byte[] seq1, final byte[] seq2, final Parameters parameters, final OVERHANG_STRATEGY strategy) { - this(parameters); - overhang_strategy = strategy; - align(seq1, seq2); - } - /** * Create a new SW pairwise aligner, without actually doing any alignment yet * @@ -207,14 +189,14 @@ public class SWPairwiseAlignment implements SmithWaterman { if ( reference == null || reference.length == 0 || alternate == null || alternate.length == 0 ) throw new IllegalArgumentException("Non-null, non-empty sequences are required for the Smith-Waterman calculation"); - final int n = reference.length; - final int m = alternate.length; - double [] sw = new double[(n+1)*(m+1)]; + final int n = reference.length+1; + final int m = alternate.length+1; + int[][] sw = new int[n][m]; if ( keepScoringMatrix ) SW = sw; - int [] btrack = new int[(n+1)*(m+1)]; + int[][] btrack=new int[n][m]; calculateMatrix(reference, alternate, sw, btrack); - alignmentResult = calculateCigar(n, m, sw, btrack, overhang_strategy); // length of the segment (continuous matches, insertions or deletions) + alignmentResult = calculateCigar(sw, btrack, overhang_strategy); // length of the segment (continuous matches, insertions or deletions) } /** @@ -225,7 +207,7 @@ public class SWPairwiseAlignment implements SmithWaterman { * @param sw the Smith-Waterman matrix to populate * @param btrack the back track matrix to populate */ - protected void calculateMatrix(final byte[] reference, final byte[] alternate, double[] sw, int[] btrack) { + protected void calculateMatrix(final byte[] reference, final byte[] alternate, final int[][] sw, final int[][] btrack) { calculateMatrix(reference, alternate, sw, btrack, overhang_strategy); } @@ -238,62 +220,54 @@ public class SWPairwiseAlignment implements SmithWaterman { * @param btrack the back track matrix to populate * @param overhang_strategy the strategy to use for dealing with overhangs */ - protected void calculateMatrix(final byte[] reference, final byte[] alternate, double[] sw, int[] btrack, final OVERHANG_STRATEGY overhang_strategy) { + protected void calculateMatrix(final byte[] reference, final byte[] alternate, final int[][] sw, final int[][] btrack, final OVERHANG_STRATEGY overhang_strategy) { if ( reference.length == 0 || alternate.length == 0 ) throw new IllegalArgumentException("Non-null, non-empty sequences are required for the Smith-Waterman calculation"); - final int n = reference.length+1; - final int m = alternate.length+1; + final int ncol = sw[0].length;//alternate.length+1; formerly m + final int nrow = sw.length;// reference.length+1; formerly n - //final double MATRIX_MIN_CUTOFF=-1e100; // never let matrix elements drop below this cutoff - final double MATRIX_MIN_CUTOFF; // never let matrix elements drop below this cutoff - if ( cutoff ) MATRIX_MIN_CUTOFF = 0.0; - else MATRIX_MIN_CUTOFF = -1e100; + final int MATRIX_MIN_CUTOFF; // never let matrix elements drop below this cutoff + if ( cutoff ) MATRIX_MIN_CUTOFF = 0; + else MATRIX_MIN_CUTOFF = (int) -1e8; - final double[] best_gap_v = new double[m+1]; - Arrays.fill(best_gap_v, -1.0e40); - final int[] gap_size_v = new int[m+1]; - final double[] best_gap_h = new double[n+1]; - Arrays.fill(best_gap_h,-1.0e40); - final int[] gap_size_h = new int[n+1]; + int lowInitValue=Integer.MIN_VALUE/2; + final int[] best_gap_v = new int[ncol+1]; + Arrays.fill(best_gap_v, lowInitValue); + final int[] gap_size_v = new int[ncol+1]; + final int[] best_gap_h = new int[nrow+1]; + Arrays.fill(best_gap_h,lowInitValue); + final int[] gap_size_h = new int[nrow+1]; // we need to initialize the SW matrix with gap penalties if we want to keep track of indels at the edges of alignments if ( overhang_strategy == OVERHANG_STRATEGY.INDEL || overhang_strategy == OVERHANG_STRATEGY.LEADING_INDEL ) { // initialize the first row - sw[1] = parameters.w_open; - double currentValue = parameters.w_open; - for ( int i = 2; i < m; i++ ) { + int[] topRow=sw[0]; + topRow[1]=parameters.w_open; + int currentValue = parameters.w_open; + for ( int i = 2; i < topRow.length; i++ ) { currentValue += parameters.w_extend; - sw[i] = currentValue; + topRow[i]=currentValue; } - // initialize the first column - sw[m] = parameters.w_open; + sw[1][0]=parameters.w_open; currentValue = parameters.w_open; - for ( int i = 2; i < n; i++ ) { + for ( int i = 2; i < sw.length; i++ ) { currentValue += parameters.w_extend; - sw[i*m] = currentValue; + sw[i][0]=currentValue; } } - // build smith-waterman matrix and keep backtrack info: - for ( int i = 1, row_offset_1 = 0 ; i < n ; i++ ) { // we do NOT update row_offset_1 here, see comment at the end of this outer loop - byte a_base = reference[i-1]; // letter in a at the current pos - - final int row_offset = row_offset_1 + m; - - // On the entrance into the loop, row_offset_1 is the (linear) offset - // of the first element of row (i-1) and row_offset is the linear offset of the - // start of row i - - for ( int j = 1, data_offset_1 = row_offset_1 ; j < m ; j++, data_offset_1++ ) { - - // data_offset_1 is linearized offset of element [i-1][j-1] - + int[] curRow=sw[0]; + for ( int i = 1; i 0 ) { + if ( prev_gap > best_gap_v[j] ) { // opening a gap just before the current cell results in better score than extending by one // the best previously opened gap. This will hold for ALL cells below: since any gap // once opened always costs w_extend to extend by another base, we will always get a better score @@ -317,7 +289,7 @@ public class SWPairwiseAlignment implements SmithWaterman { gap_size_v[j]++; } - final double step_down = best_gap_v[j] ; + final int step_down = best_gap_v[j] ; final int kd = gap_size_v[j]; // optimized "traversal" of all the matrix cells to the left of the current one (i.e. traversing @@ -325,11 +297,9 @@ public class SWPairwiseAlignment implements SmithWaterman { // does exactly the same thing as the commented out loop below. IMPORTANT: // the optimization works ONLY for linear w(k)=wopen+(k-1)*wextend!!!! - final int data_offset = row_offset + j; // linearized offset of element [i][j] - prev_gap = sw[data_offset-1]+parameters.w_open; // what would it cost us to open length 1 gap just to the left from current cell + prev_gap =curRow[j-1] + parameters.w_open; // what would it cost us to open length 1 gap just to the left from current cell best_gap_h[i] += parameters.w_extend; // previous best gap would cost us that much if extended by another base - - if ( compareScores(prev_gap,best_gap_h[i]) > 0 ) { + if ( prev_gap > best_gap_h[i] ) { // newly opened gap is better (score-wise) than any previous gap with the same row index i; since // gap penalty is linear with k, this new gap location is going to remain better than any previous ones best_gap_h[i] = prev_gap; @@ -338,34 +308,26 @@ public class SWPairwiseAlignment implements SmithWaterman { gap_size_h[i]++; } - final double step_right = best_gap_h[i]; + final int step_right = best_gap_h[i]; final int ki = gap_size_h[i]; - if ( compareScores(step_down, step_right) > 0 ) { - if ( compareScores(step_down,step_diag) > 0) { - sw[data_offset] = Math.max(MATRIX_MIN_CUTOFF,step_down); - btrack[data_offset] = kd ; // positive=vertical - } else { - sw[data_offset] = Math.max(MATRIX_MIN_CUTOFF,step_diag); - btrack[data_offset] = 0; // 0 = diagonal - } - } else { - // step_down <= step_right - if ( compareScores(step_right, step_diag) > 0 ) { - sw[data_offset] = Math.max(MATRIX_MIN_CUTOFF,step_right); - btrack[data_offset] = -ki; // negative = horizontal - } else { - sw[data_offset] = Math.max(MATRIX_MIN_CUTOFF,step_diag); - btrack[data_offset] = 0; // 0 = diagonal - } + //priority here will be step diagonal, step right, step down + final boolean diagHighestOrEqual = (step_diag >= step_down) + && (step_diag >= step_right); + + if ( diagHighestOrEqual ) { + curRow[j]=Math.max(MATRIX_MIN_CUTOFF,step_diag); + curBackTrackRow[j]=0; + } + else if(step_right>=step_down) { //moving right is the highest + curRow[j]=Math.max(MATRIX_MIN_CUTOFF,step_right); + curBackTrackRow[j]=-ki; // negative = horizontal + } + else { + curRow[j]=Math.max(MATRIX_MIN_CUTOFF,step_down); + curBackTrackRow[j]= kd; // positive=vertical } } - - // IMPORTANT, IMPORTANT, IMPORTANT: - // note that we update this (secondary) outer loop variable here, - // so that we DO NOT need to update it - // in the for() statement itself. - row_offset_1 = row_offset; } } @@ -381,40 +343,22 @@ public class SWPairwiseAlignment implements SmithWaterman { } } - /** - * Compares alternative (sub)alignments scores considering - * a round-off error: {@link #parameters#epsilon}. - * - * @param score1 first score. - * @param score2 second score. - * - * @return -1 if {@code score1} is less than {@code score2}, 0 if the are the same, 1 if {@code score2} is - * greater than {@code score1}. - */ - private int compareScores(final double score1, final double score2) { - if (score1 == score2) - return 0; - else if (score1 < score2) - return score2 - score1 > parameters.epsilon ? -1 : 0; - else - return score1 - score2 > parameters.epsilon ? +1 : 0; - } - /** * Calculates the CIGAR for the alignment from the back track matrix * - * @param refLength length of the reference sequence - * @param altLength length of the alternate sequence * @param sw the Smith-Waterman matrix to use * @param btrack the back track matrix to use * @param overhang_strategy the strategy to use for dealing with overhangs * @return non-null SWPairwiseAlignmentResult object */ - protected SWPairwiseAlignmentResult calculateCigar(final int refLength, final int altLength, final double[] sw, final int[] btrack, final OVERHANG_STRATEGY overhang_strategy) { + protected SWPairwiseAlignmentResult calculateCigar(final int[][] sw, final int[][] btrack, final OVERHANG_STRATEGY overhang_strategy) { // p holds the position we start backtracking from; we will be assembling a cigar in the backwards order int p1 = 0, p2 = 0; - double maxscore = Double.NEGATIVE_INFINITY; // sw scores are allowed to be negative + int refLength = sw.length-1; + int altLength = sw[0].length-1; + + int maxscore = Integer.MIN_VALUE; // sw scores are allowed to be negative int segment_length = 0; // length of the segment (continuous matches, insertions or deletions) // if we want to consider overhangs as legitimate operators, then just start from the corner of the matrix @@ -424,31 +368,34 @@ public class SWPairwiseAlignment implements SmithWaterman { } else { // look for the largest score on the rightmost column. we use >= combined with the traversal direction // to ensure that if two scores are equal, the one closer to diagonal gets picked - for ( int i = 1, data_offset = altLength+1+altLength ; i < refLength+1 ; i++, data_offset += (altLength+1) ) { - // data_offset is the offset of [i][m] + //Note: this is not technically smith-waterman, as by only looking for max values on the right we are + //excluding high scoring local alignments + p2=altLength; - if ( compareScores(sw[data_offset],maxscore) >= 0 ) { // sw[data_offset] >= maxscore - p1 = i; p2 = altLength ; maxscore = sw[data_offset]; - } + for(int i=1;i= maxscore ) { + p1 = i; + maxscore = curScore; + } } - // now look for a larger score on the bottom-most row if ( overhang_strategy != OVERHANG_STRATEGY.LEADING_INDEL ) { - for ( int j = 1, data_offset = refLength*(altLength+1)+1 ; j < altLength+1 ; j++, data_offset++ ) { + final int[] bottomRow=sw[refLength]; + for ( int j = 1 ; j < bottomRow.length; j++) { + int curScore=bottomRow[j]; // data_offset is the offset of [n][j] - final int scoreComparison = compareScores(sw[data_offset],maxscore); - if ( scoreComparison > 0 || scoreComparison == 0 && Math.abs(refLength-j) < Math.abs(p1 - p2)) { + if ( curScore > maxscore || + (curScore == maxscore && Math.abs(refLength-j) < Math.abs(p1 - p2) ) ) { p1 = refLength; p2 = j ; - maxscore = sw[data_offset]; + maxscore = curScore; segment_length = altLength - j ; // end of sequence 2 is overhanging; we will just record it as 'M' segment } } } } - final List lce = new ArrayList(5); - if ( segment_length > 0 && overhang_strategy == OVERHANG_STRATEGY.SOFTCLIP ) { lce.add(makeElement(State.CLIP, segment_length)); segment_length = 0; @@ -458,14 +405,10 @@ public class SWPairwiseAlignment implements SmithWaterman { // to that sequence State state = State.MATCH; - - int data_offset = p1*(altLength+1)+p2; // offset of element [p1][p2] do { - int btr = btrack[data_offset]; - + int btr = btrack[p1][p2]; State new_state; int step_length = 1; - if ( btr > 0 ) { new_state = State.DELETION; step_length = btr; @@ -476,9 +419,9 @@ public class SWPairwiseAlignment implements SmithWaterman { // move to next best location in the sw matrix: switch( new_state ) { - case MATCH: data_offset -= (altLength+2); p1--; p2--; break; // move back along the diag in the sw matrix - case INSERTION: data_offset -= step_length; p2 -= step_length; break; // move left - case DELETION: data_offset -= (altLength+1)*step_length; p1 -= step_length; break; // move up + case MATCH: p1--; p2--; break; // move back along the diag in the sw matrix + case INSERTION: p2 -= step_length; break; // move left + case DELETION: p1 -= step_length; break; // move up } // now let's see if the state actually changed: @@ -537,7 +480,8 @@ public class SWPairwiseAlignment implements SmithWaterman { return new CigarElement(length, op); } - private double wd(byte x, byte y) { + + private int wd(final byte x, final byte y) { return (x == y ? parameters.w_match : parameters.w_mismatch); } diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/SWPairwiseAlignmentMain.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/SWPairwiseAlignmentMain.java index f111a64d5..006acefd6 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/SWPairwiseAlignmentMain.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/SWPairwiseAlignmentMain.java @@ -90,19 +90,16 @@ public class SWPairwiseAlignmentMain { System.exit(1); } - double w_match; - double w_mismatch; - double w_open; - double w_extend; + final int w_match, w_mismatch, w_open, w_extend; - w_match = (m == null ? 30.0 : m.doubleValue()); - w_mismatch = (mm == null ? -10.0 : mm.doubleValue()); - w_open = (open == null ? -10.0 : open.doubleValue()); - w_extend = (ext == null ? -2.0 : ext.doubleValue()); + w_match = (m == null ? 30 : m.intValue()); + w_mismatch = (mm == null ? -10 : mm.intValue()); + w_open = (open == null ? -10 : open.intValue()); + w_extend = (ext == null ? -2 : ext.intValue()); SWPairwiseAlignment.keepScoringMatrix = true; - SWPairwiseAlignment a = new SWPairwiseAlignment(ref.getBytes(),read.getBytes(),w_match,w_mismatch,w_open,w_extend, 0.0001); + SWPairwiseAlignment a = new SWPairwiseAlignment(ref.getBytes(),read.getBytes(),w_match,w_mismatch,w_open,w_extend); System.out.println("start="+a.getAlignmentStart2wrt1()+", cigar="+a.getCigar()+ " length1="+ref.length()+" length2="+read.length()); @@ -117,19 +114,19 @@ public class SWPairwiseAlignmentMain { } } - private static void print(double[] s, byte[] a, byte[] b) { + private static void print(final int[][] s, final byte[] a, final byte[] b) { int n = a.length+1; int m = b.length+1; System.out.print(" "); for ( int j = 1 ; j < m ; j++) System.out.printf(" %5c",(char)b[j-1]) ; System.out.println(); - for ( int i = 0, row_offset = 0 ; i < n ; i++, row_offset+=m) { + for ( int i = 0 ; i < n ; i++) { if ( i > 0 ) System.out.print((char)a[i-1]); else System.out.print(' '); System.out.print(" "); for ( int j = 0; j < m ; j++ ) { - System.out.printf(" %5.1f",s[row_offset+j]); + System.out.printf(" %5.1f",s[i][j]); } System.out.println(); } diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/SWParameterSet.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/SWParameterSet.java index 9bb2bad5d..7226a98f5 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/SWParameterSet.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/smithwaterman/SWParameterSet.java @@ -34,12 +34,12 @@ package org.broadinstitute.gatk.utils.smithwaterman; */ public enum SWParameterSet { // match=1, mismatch = -1/3, gap=-(1+k/3) - ORIGINAL_DEFAULT(new Parameters(1.0,-1.0/3.0,-1.0-1.0/3.0,-1.0/3.0,0.00001)), + ORIGINAL_DEFAULT(new Parameters(3,-1,-4,-3)), /** * A standard set of values for NGS alignments */ - STANDARD_NGS(new Parameters(5.0, -10.0, -22.0, -1.2,0.00001)); + STANDARD_NGS(new Parameters(25, -50, -110, -6)); protected Parameters parameters;