Change SmithWaterman to use integers instead of doubles.
This commit is contained in:
parent
cfde6e72bf
commit
cc45e62e8e
|
|
@ -339,18 +339,18 @@ public class HaplotypeResolver extends RodWalker<Integer, Integer> {
|
|||
return overlap;
|
||||
}
|
||||
|
||||
private static final double SW_MATCH = 4.0;
|
||||
private static final double SW_MISMATCH = -10.0;
|
||||
private static final double SW_GAP = -25.0;
|
||||
private static final double SW_GAP_EXTEND = -1.3;
|
||||
private static final double SW_EPSILON = 0.00001;
|
||||
private static final int SW_MATCH = 40;
|
||||
private static final int SW_MISMATCH = -100;
|
||||
private static final int SW_GAP = -250;
|
||||
private static final int SW_GAP_EXTEND = -13;
|
||||
|
||||
private void resolveByHaplotype(final ReferenceContext refContext) {
|
||||
|
||||
final byte[] source1Haplotype = generateHaplotype(sourceVCs1, refContext);
|
||||
final byte[] source2Haplotype = generateHaplotype(sourceVCs2, refContext);
|
||||
|
||||
final SWPairwiseAlignment swConsensus1 = new SWPairwiseAlignment( refContext.getBases(), source1Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND, SW_EPSILON );
|
||||
final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( refContext.getBases(), source2Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND, SW_EPSILON );
|
||||
final SWPairwiseAlignment swConsensus1 = new SWPairwiseAlignment( refContext.getBases(), source1Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
|
||||
final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( refContext.getBases(), source2Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
|
||||
|
||||
// protect against SW failures
|
||||
if( swConsensus1.getCigar().toString().contains("S") || swConsensus1.getCigar().getReferenceLength() < 20 ||
|
||||
|
|
|
|||
|
|
@ -329,7 +329,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
// fraction of mismatches that need to no longer mismatch for a column to be considered cleaned
|
||||
private static final double MISMATCH_COLUMN_CLEANED_FRACTION = 0.75;
|
||||
|
||||
private final static Parameters swParameters = new Parameters(30.0, -10.0, -10.0, -2.0,0.0001);
|
||||
private final static Parameters swParameters = new Parameters(30, -10, -10, -2);
|
||||
|
||||
// reference base padding size
|
||||
// TODO -- make this a command-line argument if the need arises
|
||||
|
|
|
|||
|
|
@ -203,7 +203,7 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
public Map<Integer,VariantContext> calcAlignment() {
|
||||
final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap, new Parameters(1.0,-1.0/3.0,-1 - 1./3., -1./3.,0.00001));
|
||||
final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap, new Parameters(3,-1,-4, -1));
|
||||
final Haplotype h = new Haplotype(hap, false, alignment.getAlignmentStart2wrt1(), alignment.getCigar());
|
||||
return HaplotypeCallerGenotypingEngine.generateVCsFromAlignment(h, ref, genomeLocParser.createGenomeLoc("4", 1, 1 + ref.length), "name");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,10 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.smithwaterman;
|
||||
|
||||
import htsjdk.samtools.Cigar;
|
||||
import htsjdk.samtools.CigarElement;
|
||||
import htsjdk.samtools.CigarOperator;
|
||||
import org.broadinstitute.gatk.utils.sam.CigarUtils;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -78,16 +82,16 @@ public class SWPairwiseAlignmentUnitTest extends BaseTest {
|
|||
|
||||
final String ref1 = "AAAGACTACTG";
|
||||
final String read1 = "AACGGACACTG";
|
||||
tests.add(new Object[]{ref1, read1, 5.0, -10.0, -22.0, -1.2, 0.0001, 1, "2M2I3M1D4M"});
|
||||
tests.add(new Object[]{ref1, read1, 20.0, -5.0, -30.0, -2.2, 0.0001, 0, "11M"});
|
||||
tests.add(new Object[]{ref1, read1, 50, -100, -220, -12, 1, "2M2I3M1D4M"});
|
||||
tests.add(new Object[]{ref1, read1, 200, -50, -300, -22, 0, "11M"});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "OddNoAlignment", enabled = true)
|
||||
public void testOddNoAlignment(final String reference, final String read, final double match, final double mismatch, final double gap, final double gap_extend, final double epsilon,
|
||||
public void testOddNoAlignment(final String reference, final String read, final int match, final int mismatch, final int gap, final int gap_extend,
|
||||
final int expectedStart, final String expectedCigar) {
|
||||
final SWPairwiseAlignment sw = new SWPairwiseAlignment(reference.getBytes(), read.getBytes(), match, mismatch, gap, gap_extend, epsilon);
|
||||
final SWPairwiseAlignment sw = new SWPairwiseAlignment(reference.getBytes(), read.getBytes(), match, mismatch, gap, gap_extend);
|
||||
Assert.assertEquals(sw.getAlignmentStart2wrt1(), expectedStart);
|
||||
Assert.assertEquals(sw.getCigar().toString(), expectedCigar);
|
||||
}
|
||||
|
|
@ -117,4 +121,46 @@ public class SWPairwiseAlignmentUnitTest extends BaseTest {
|
|||
Assert.assertEquals(sw.getAlignmentStart2wrt1(), expectedStart);
|
||||
Assert.assertEquals(sw.getCigar().toString(), expectedCigar);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testForIdenticalAlignmentsWithDifferingFlankLengths() {
|
||||
//This test is designed to ensure that the indels are correctly placed
|
||||
//if the region flanking these indels is extended by a varying amount.
|
||||
//It checks for problems caused by floating point rounding leading to different
|
||||
//paths being selected.
|
||||
|
||||
//Create two versions of the same sequence with different flanking regions.
|
||||
byte[] paddedRef="GCGTCGCAGTCTTAAGGCCCCGCCTTTTCAGACAGCTTCCGCTGGGCCTGGGCCGCTGCGGGGCGGTCACGGCCCCTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGAGGGGGCCCGGGGCCGCGTCCCTGGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGACCGGGCCGAGCCGGGGGAAGGGCTCCGGTGACT".getBytes();
|
||||
byte[] paddedHap="GCGTCGCAGTCTTAAGGCCCCGCCTTTTCAGACAGCTTCCGCTGGGCCTGGGCCGCTGCGGGGCGGTCACGGCCCCTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGA--GGGCC---------------GGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGACCGGGCCGAGCCGGGGGAAGGGCTCCGGTGACT".replace("-","").getBytes();
|
||||
byte[] notPaddedRef= "CTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGAGGGGGCCCGGGGCCGCGTCCCTGGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGA".getBytes();
|
||||
byte[] notPaddedHap= "CTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGA---------GGGCC--------GGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGA".replace("-","").getBytes();
|
||||
//a simplified version of the getCigar routine in the haplotype caller to align these
|
||||
final String SW_PAD = "NNNNNNNNNN";
|
||||
final String paddedsRef = SW_PAD + new String(paddedRef) + SW_PAD;
|
||||
final String paddedsHap = SW_PAD + new String(paddedHap) + SW_PAD;
|
||||
final String notPaddedsRef = SW_PAD + new String(notPaddedRef) + SW_PAD;
|
||||
final String notpaddedsHap = SW_PAD + new String(notPaddedHap) + SW_PAD;
|
||||
final SmithWaterman paddedAlignment = new SWPairwiseAlignment( paddedsRef.getBytes(), paddedsHap.getBytes(), CigarUtils.NEW_SW_PARAMETERS );
|
||||
final SmithWaterman notPaddedAlignment = new SWPairwiseAlignment( notPaddedsRef.getBytes(), notpaddedsHap.getBytes(), CigarUtils.NEW_SW_PARAMETERS );
|
||||
//Now verify that the two sequences have the same alignment and not match positions.
|
||||
Cigar rawPadded = paddedAlignment.getCigar();
|
||||
Cigar notPadded= notPaddedAlignment.getCigar();
|
||||
List<CigarElement> paddedC=rawPadded.getCigarElements();
|
||||
List<CigarElement> notPaddedC=notPadded.getCigarElements();
|
||||
Assert.assertEquals(paddedC.size(),notPaddedC.size());
|
||||
for(int i=0;i<notPaddedC.size();i++)
|
||||
{
|
||||
CigarElement pc=paddedC.get(i);
|
||||
CigarElement npc=notPaddedC.get(i);
|
||||
if(pc.getOperator()== CigarOperator.M && npc.getOperator()==CigarOperator.M)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
int l1=pc.getLength();
|
||||
int l2=npc.getLength();
|
||||
Assert.assertEquals(l1,l2);
|
||||
Assert.assertEquals(pc.getOperator(),npc.getOperator());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -172,7 +172,7 @@ public class CigarUtils {
|
|||
|
||||
// used in the bubble state machine to apply Smith-Waterman to the bubble sequence
|
||||
// these values were chosen via optimization against the NA12878 knowledge base
|
||||
public static final Parameters NEW_SW_PARAMETERS = new Parameters(20.0, -15.0, -26.0, -1.1, 0.00001);
|
||||
public static final Parameters NEW_SW_PARAMETERS = new Parameters(200, -150, -260, -11);
|
||||
|
||||
private final static String SW_PAD = "NNNNNNNNNN";
|
||||
|
||||
|
|
|
|||
|
|
@ -121,35 +121,29 @@ public final class GlobalEdgeGreedySWPairwiseAlignment extends SWPairwiseAlignme
|
|||
final byte[] refToAlign = Utils.trimArray(reference, forwardEdgeMatch, reverseEdgeMatch);
|
||||
final byte[] altToAlign = Utils.trimArray(alternate, forwardEdgeMatch, reverseEdgeMatch);
|
||||
|
||||
final double[] sw = new double[(sizeOfRefToAlign+1)*(sizeOfAltToAlign+1)];
|
||||
final int[][] sw = new int[(sizeOfRefToAlign+1)][(sizeOfAltToAlign+1)];
|
||||
if ( keepScoringMatrix ) SW = sw;
|
||||
final int[] btrack = new int[(sizeOfRefToAlign+1)*(sizeOfAltToAlign+1)];
|
||||
final int[][] btrack = new int[(sizeOfRefToAlign+1)][(sizeOfAltToAlign+1)];
|
||||
|
||||
calculateMatrix(refToAlign, altToAlign, sw, btrack, OVERHANG_STRATEGY.INDEL);
|
||||
|
||||
if ( DEBUG_MODE ) {
|
||||
System.out.println(new String(refToAlign) + " vs. " + new String(altToAlign));
|
||||
debugMatrix(sw, sizeOfRefToAlign+1, sizeOfAltToAlign+1);
|
||||
debugMatrix(sw);
|
||||
System.out.println("----");
|
||||
debugMatrix(btrack, sizeOfRefToAlign + 1, sizeOfAltToAlign + 1);
|
||||
debugMatrix(btrack);
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
alignmentResult = calculateCigar(forwardEdgeMatch, reverseEdgeMatch, sizeOfRefToAlign, sizeOfAltToAlign, sw, btrack);
|
||||
alignmentResult = calculateCigar(forwardEdgeMatch, reverseEdgeMatch, sw, btrack);
|
||||
}
|
||||
|
||||
private void debugMatrix(final double[] matrix, final int dim1, final int dim2) {
|
||||
for ( int i = 0; i < dim1; i++ ) {
|
||||
for ( int j = 0; j < dim2; j++ )
|
||||
System.out.print(String.format("%.1f ", matrix[i * dim2 + j]));
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
|
||||
private void debugMatrix(final int[] matrix, final int dim1, final int dim2) {
|
||||
for ( int i = 0; i < dim1; i++ ) {
|
||||
for ( int j = 0; j < dim2; j++ )
|
||||
System.out.print(matrix[i*dim2 + j] + " ");
|
||||
private void debugMatrix(final int[][] matrix) {
|
||||
for ( int i = 0; i < matrix.length; i++ ) {
|
||||
int [] cur = matrix[i];
|
||||
for ( int j = 0; j < cur.length; j++ )
|
||||
System.out.print(cur[j] + " ");
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
|
|
@ -194,17 +188,14 @@ public final class GlobalEdgeGreedySWPairwiseAlignment extends SWPairwiseAlignme
|
|||
*
|
||||
* @param matchingPrefix the prefix match size
|
||||
* @param matchingSuffix the suffix match size
|
||||
* @param refLength length of the reference sequence
|
||||
* @param altLength length of the alternate sequence
|
||||
* @param sw the Smith-Waterman matrix to use
|
||||
* @param btrack the back track matrix to use
|
||||
* @return non-null SWPairwiseAlignmentResult object
|
||||
*/
|
||||
protected SWPairwiseAlignmentResult calculateCigar(final int matchingPrefix, final int matchingSuffix,
|
||||
final int refLength, final int altLength,
|
||||
final double[] sw, final int[] btrack) {
|
||||
final int[][] sw, final int[][] btrack) {
|
||||
|
||||
final SWPairwiseAlignmentResult SW_result = calculateCigar(refLength, altLength, sw, btrack, OVERHANG_STRATEGY.INDEL);
|
||||
final SWPairwiseAlignmentResult SW_result = calculateCigar(sw, btrack, OVERHANG_STRATEGY.INDEL);
|
||||
|
||||
final LinkedList<CigarElement> lce = new LinkedList<CigarElement>(SW_result.cigar.getCigarElements());
|
||||
if ( matchingPrefix > 0 )
|
||||
|
|
|
|||
|
|
@ -35,11 +35,10 @@ package org.broadinstitute.gatk.utils.smithwaterman;
|
|||
* Time: 12:03 PM
|
||||
*/
|
||||
public final class Parameters {
|
||||
public final double w_match;
|
||||
public final double w_mismatch;
|
||||
public final double w_open;
|
||||
public final double w_extend;
|
||||
public final double epsilon;
|
||||
public final int w_match;
|
||||
public final int w_mismatch;
|
||||
public final int w_open;
|
||||
public final int w_extend;
|
||||
|
||||
/**
|
||||
* Create a new set of SW parameters
|
||||
|
|
@ -47,20 +46,17 @@ public final class Parameters {
|
|||
* @param w_mismatch the mismatch penalty
|
||||
* @param w_open the gap open penalty
|
||||
* @param w_extend the gap extension penalty
|
||||
* @param epsilon weight comparison error
|
||||
|
||||
*/
|
||||
public Parameters(final double w_match, final double w_mismatch, final double w_open, final double w_extend, final double epsilon) {
|
||||
public Parameters(final int w_match, final int w_mismatch, final int w_open, final int w_extend) {
|
||||
if ( w_mismatch > 0 ) throw new IllegalArgumentException("w_mismatch must be <= 0 but got " + w_mismatch);
|
||||
if ( w_open> 0 ) throw new IllegalArgumentException("w_open must be <= 0 but got " + w_open);
|
||||
if ( w_extend> 0 ) throw new IllegalArgumentException("w_extend must be <= 0 but got " + w_extend);
|
||||
if ( Double.isNaN(epsilon)) throw new IllegalArgumentException("epsilon cannot be a NaN");
|
||||
if ( epsilon < 0.0 ) throw new IllegalArgumentException("epsilon cannot be negative");
|
||||
|
||||
this.w_match = w_match;
|
||||
this.w_mismatch = w_mismatch;
|
||||
this.w_open = w_open;
|
||||
this.w_extend = w_extend;
|
||||
this.epsilon = epsilon;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
/**
|
||||
* The SW scoring matrix, stored for debugging purposes if keepScoringMatrix is true
|
||||
*/
|
||||
protected double[] SW = null;
|
||||
protected int[][] SW = null;
|
||||
|
||||
/**
|
||||
* Only for testing purposes in the SWPairwiseAlignmentMain function
|
||||
|
|
@ -113,10 +113,8 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
* @deprecated in favor of constructors using the Parameter or ParameterSet class
|
||||
*/
|
||||
@Deprecated
|
||||
public SWPairwiseAlignment(final byte[] seq1, final byte[] seq2,
|
||||
final double match, final double mismatch, final double open, final double extend,
|
||||
final double epsilon ) {
|
||||
this(seq1, seq2, new Parameters(match, mismatch, open, extend, epsilon));
|
||||
public SWPairwiseAlignment(byte[] seq1, byte[] seq2, int match, int mismatch, int open, int extend ) {
|
||||
this(seq1, seq2, new Parameters(match, mismatch, open, extend));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -149,22 +147,6 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
align(seq1, seq2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new SW pairwise aligner
|
||||
*
|
||||
* After creating the object the two sequences are aligned with an internal call to align(seq1, seq2, parameters, strategy)
|
||||
*
|
||||
* @param seq1 the first sequence we want to align
|
||||
* @param seq2 the second sequence we want to align
|
||||
* @param parameters the parameters to use
|
||||
* @param strategy the overhang strategy to use
|
||||
*/
|
||||
public SWPairwiseAlignment(final byte[] seq1, final byte[] seq2, final Parameters parameters, final OVERHANG_STRATEGY strategy) {
|
||||
this(parameters);
|
||||
overhang_strategy = strategy;
|
||||
align(seq1, seq2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new SW pairwise aligner, without actually doing any alignment yet
|
||||
*
|
||||
|
|
@ -207,14 +189,14 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
if ( reference == null || reference.length == 0 || alternate == null || alternate.length == 0 )
|
||||
throw new IllegalArgumentException("Non-null, non-empty sequences are required for the Smith-Waterman calculation");
|
||||
|
||||
final int n = reference.length;
|
||||
final int m = alternate.length;
|
||||
double [] sw = new double[(n+1)*(m+1)];
|
||||
final int n = reference.length+1;
|
||||
final int m = alternate.length+1;
|
||||
int[][] sw = new int[n][m];
|
||||
if ( keepScoringMatrix ) SW = sw;
|
||||
int [] btrack = new int[(n+1)*(m+1)];
|
||||
int[][] btrack=new int[n][m];
|
||||
|
||||
calculateMatrix(reference, alternate, sw, btrack);
|
||||
alignmentResult = calculateCigar(n, m, sw, btrack, overhang_strategy); // length of the segment (continuous matches, insertions or deletions)
|
||||
alignmentResult = calculateCigar(sw, btrack, overhang_strategy); // length of the segment (continuous matches, insertions or deletions)
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -225,7 +207,7 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
* @param sw the Smith-Waterman matrix to populate
|
||||
* @param btrack the back track matrix to populate
|
||||
*/
|
||||
protected void calculateMatrix(final byte[] reference, final byte[] alternate, double[] sw, int[] btrack) {
|
||||
protected void calculateMatrix(final byte[] reference, final byte[] alternate, final int[][] sw, final int[][] btrack) {
|
||||
calculateMatrix(reference, alternate, sw, btrack, overhang_strategy);
|
||||
}
|
||||
|
||||
|
|
@ -238,62 +220,54 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
* @param btrack the back track matrix to populate
|
||||
* @param overhang_strategy the strategy to use for dealing with overhangs
|
||||
*/
|
||||
protected void calculateMatrix(final byte[] reference, final byte[] alternate, double[] sw, int[] btrack, final OVERHANG_STRATEGY overhang_strategy) {
|
||||
protected void calculateMatrix(final byte[] reference, final byte[] alternate, final int[][] sw, final int[][] btrack, final OVERHANG_STRATEGY overhang_strategy) {
|
||||
if ( reference.length == 0 || alternate.length == 0 )
|
||||
throw new IllegalArgumentException("Non-null, non-empty sequences are required for the Smith-Waterman calculation");
|
||||
|
||||
final int n = reference.length+1;
|
||||
final int m = alternate.length+1;
|
||||
final int ncol = sw[0].length;//alternate.length+1; formerly m
|
||||
final int nrow = sw.length;// reference.length+1; formerly n
|
||||
|
||||
//final double MATRIX_MIN_CUTOFF=-1e100; // never let matrix elements drop below this cutoff
|
||||
final double MATRIX_MIN_CUTOFF; // never let matrix elements drop below this cutoff
|
||||
if ( cutoff ) MATRIX_MIN_CUTOFF = 0.0;
|
||||
else MATRIX_MIN_CUTOFF = -1e100;
|
||||
final int MATRIX_MIN_CUTOFF; // never let matrix elements drop below this cutoff
|
||||
if ( cutoff ) MATRIX_MIN_CUTOFF = 0;
|
||||
else MATRIX_MIN_CUTOFF = (int) -1e8;
|
||||
|
||||
final double[] best_gap_v = new double[m+1];
|
||||
Arrays.fill(best_gap_v, -1.0e40);
|
||||
final int[] gap_size_v = new int[m+1];
|
||||
final double[] best_gap_h = new double[n+1];
|
||||
Arrays.fill(best_gap_h,-1.0e40);
|
||||
final int[] gap_size_h = new int[n+1];
|
||||
int lowInitValue=Integer.MIN_VALUE/2;
|
||||
final int[] best_gap_v = new int[ncol+1];
|
||||
Arrays.fill(best_gap_v, lowInitValue);
|
||||
final int[] gap_size_v = new int[ncol+1];
|
||||
final int[] best_gap_h = new int[nrow+1];
|
||||
Arrays.fill(best_gap_h,lowInitValue);
|
||||
final int[] gap_size_h = new int[nrow+1];
|
||||
|
||||
// we need to initialize the SW matrix with gap penalties if we want to keep track of indels at the edges of alignments
|
||||
if ( overhang_strategy == OVERHANG_STRATEGY.INDEL || overhang_strategy == OVERHANG_STRATEGY.LEADING_INDEL ) {
|
||||
// initialize the first row
|
||||
sw[1] = parameters.w_open;
|
||||
double currentValue = parameters.w_open;
|
||||
for ( int i = 2; i < m; i++ ) {
|
||||
int[] topRow=sw[0];
|
||||
topRow[1]=parameters.w_open;
|
||||
int currentValue = parameters.w_open;
|
||||
for ( int i = 2; i < topRow.length; i++ ) {
|
||||
currentValue += parameters.w_extend;
|
||||
sw[i] = currentValue;
|
||||
topRow[i]=currentValue;
|
||||
}
|
||||
|
||||
// initialize the first column
|
||||
sw[m] = parameters.w_open;
|
||||
sw[1][0]=parameters.w_open;
|
||||
currentValue = parameters.w_open;
|
||||
for ( int i = 2; i < n; i++ ) {
|
||||
for ( int i = 2; i < sw.length; i++ ) {
|
||||
currentValue += parameters.w_extend;
|
||||
sw[i*m] = currentValue;
|
||||
sw[i][0]=currentValue;
|
||||
}
|
||||
}
|
||||
|
||||
// build smith-waterman matrix and keep backtrack info:
|
||||
for ( int i = 1, row_offset_1 = 0 ; i < n ; i++ ) { // we do NOT update row_offset_1 here, see comment at the end of this outer loop
|
||||
byte a_base = reference[i-1]; // letter in a at the current pos
|
||||
|
||||
final int row_offset = row_offset_1 + m;
|
||||
|
||||
// On the entrance into the loop, row_offset_1 is the (linear) offset
|
||||
// of the first element of row (i-1) and row_offset is the linear offset of the
|
||||
// start of row i
|
||||
|
||||
for ( int j = 1, data_offset_1 = row_offset_1 ; j < m ; j++, data_offset_1++ ) {
|
||||
|
||||
// data_offset_1 is linearized offset of element [i-1][j-1]
|
||||
|
||||
int[] curRow=sw[0];
|
||||
for ( int i = 1; i <sw.length ; i++ ) {
|
||||
final byte a_base = reference[i-1]; // letter in a at the current pos
|
||||
final int[] lastRow=curRow;
|
||||
curRow=sw[i];
|
||||
final int[] curBackTrackRow=btrack[i];
|
||||
for ( int j = 1; j < curRow.length; j++) {
|
||||
final byte b_base = alternate[j-1]; // letter in b at the current pos
|
||||
|
||||
// in other words, step_diag = sw[i-1][j-1] + wd(a_base,b_base);
|
||||
final double step_diag = sw[data_offset_1] + wd(a_base,b_base);
|
||||
final int step_diag = lastRow[j-1] + wd(a_base,b_base);
|
||||
|
||||
// optimized "traversal" of all the matrix cells above the current one (i.e. traversing
|
||||
// all 'step down' events that would end in the current cell. The optimized code
|
||||
|
|
@ -301,11 +275,9 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
// the optimization works ONLY for linear w(k)=wopen+(k-1)*wextend!!!!
|
||||
|
||||
// if a gap (length 1) was just opened above, this is the cost of arriving to the current cell:
|
||||
double prev_gap = sw[data_offset_1+1]+parameters.w_open;
|
||||
|
||||
int prev_gap = lastRow[j]+parameters.w_open;
|
||||
best_gap_v[j] += parameters.w_extend; // for the gaps that were already opened earlier, extending them by 1 costs w_extend
|
||||
|
||||
if ( compareScores(prev_gap,best_gap_v[j]) > 0 ) {
|
||||
if ( prev_gap > best_gap_v[j] ) {
|
||||
// opening a gap just before the current cell results in better score than extending by one
|
||||
// the best previously opened gap. This will hold for ALL cells below: since any gap
|
||||
// once opened always costs w_extend to extend by another base, we will always get a better score
|
||||
|
|
@ -317,7 +289,7 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
gap_size_v[j]++;
|
||||
}
|
||||
|
||||
final double step_down = best_gap_v[j] ;
|
||||
final int step_down = best_gap_v[j] ;
|
||||
final int kd = gap_size_v[j];
|
||||
|
||||
// optimized "traversal" of all the matrix cells to the left of the current one (i.e. traversing
|
||||
|
|
@ -325,11 +297,9 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
// does exactly the same thing as the commented out loop below. IMPORTANT:
|
||||
// the optimization works ONLY for linear w(k)=wopen+(k-1)*wextend!!!!
|
||||
|
||||
final int data_offset = row_offset + j; // linearized offset of element [i][j]
|
||||
prev_gap = sw[data_offset-1]+parameters.w_open; // what would it cost us to open length 1 gap just to the left from current cell
|
||||
prev_gap =curRow[j-1] + parameters.w_open; // what would it cost us to open length 1 gap just to the left from current cell
|
||||
best_gap_h[i] += parameters.w_extend; // previous best gap would cost us that much if extended by another base
|
||||
|
||||
if ( compareScores(prev_gap,best_gap_h[i]) > 0 ) {
|
||||
if ( prev_gap > best_gap_h[i] ) {
|
||||
// newly opened gap is better (score-wise) than any previous gap with the same row index i; since
|
||||
// gap penalty is linear with k, this new gap location is going to remain better than any previous ones
|
||||
best_gap_h[i] = prev_gap;
|
||||
|
|
@ -338,34 +308,26 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
gap_size_h[i]++;
|
||||
}
|
||||
|
||||
final double step_right = best_gap_h[i];
|
||||
final int step_right = best_gap_h[i];
|
||||
final int ki = gap_size_h[i];
|
||||
|
||||
if ( compareScores(step_down, step_right) > 0 ) {
|
||||
if ( compareScores(step_down,step_diag) > 0) {
|
||||
sw[data_offset] = Math.max(MATRIX_MIN_CUTOFF,step_down);
|
||||
btrack[data_offset] = kd ; // positive=vertical
|
||||
} else {
|
||||
sw[data_offset] = Math.max(MATRIX_MIN_CUTOFF,step_diag);
|
||||
btrack[data_offset] = 0; // 0 = diagonal
|
||||
}
|
||||
} else {
|
||||
// step_down <= step_right
|
||||
if ( compareScores(step_right, step_diag) > 0 ) {
|
||||
sw[data_offset] = Math.max(MATRIX_MIN_CUTOFF,step_right);
|
||||
btrack[data_offset] = -ki; // negative = horizontal
|
||||
} else {
|
||||
sw[data_offset] = Math.max(MATRIX_MIN_CUTOFF,step_diag);
|
||||
btrack[data_offset] = 0; // 0 = diagonal
|
||||
}
|
||||
//priority here will be step diagonal, step right, step down
|
||||
final boolean diagHighestOrEqual = (step_diag >= step_down)
|
||||
&& (step_diag >= step_right);
|
||||
|
||||
if ( diagHighestOrEqual ) {
|
||||
curRow[j]=Math.max(MATRIX_MIN_CUTOFF,step_diag);
|
||||
curBackTrackRow[j]=0;
|
||||
}
|
||||
else if(step_right>=step_down) { //moving right is the highest
|
||||
curRow[j]=Math.max(MATRIX_MIN_CUTOFF,step_right);
|
||||
curBackTrackRow[j]=-ki; // negative = horizontal
|
||||
}
|
||||
else {
|
||||
curRow[j]=Math.max(MATRIX_MIN_CUTOFF,step_down);
|
||||
curBackTrackRow[j]= kd; // positive=vertical
|
||||
}
|
||||
}
|
||||
|
||||
// IMPORTANT, IMPORTANT, IMPORTANT:
|
||||
// note that we update this (secondary) outer loop variable here,
|
||||
// so that we DO NOT need to update it
|
||||
// in the for() statement itself.
|
||||
row_offset_1 = row_offset;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -381,40 +343,22 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares alternative (sub)alignments scores considering
|
||||
* a round-off error: {@link #parameters#epsilon}.
|
||||
*
|
||||
* @param score1 first score.
|
||||
* @param score2 second score.
|
||||
*
|
||||
* @return -1 if {@code score1} is less than {@code score2}, 0 if the are the same, 1 if {@code score2} is
|
||||
* greater than {@code score1}.
|
||||
*/
|
||||
private int compareScores(final double score1, final double score2) {
|
||||
if (score1 == score2)
|
||||
return 0;
|
||||
else if (score1 < score2)
|
||||
return score2 - score1 > parameters.epsilon ? -1 : 0;
|
||||
else
|
||||
return score1 - score2 > parameters.epsilon ? +1 : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the CIGAR for the alignment from the back track matrix
|
||||
*
|
||||
* @param refLength length of the reference sequence
|
||||
* @param altLength length of the alternate sequence
|
||||
* @param sw the Smith-Waterman matrix to use
|
||||
* @param btrack the back track matrix to use
|
||||
* @param overhang_strategy the strategy to use for dealing with overhangs
|
||||
* @return non-null SWPairwiseAlignmentResult object
|
||||
*/
|
||||
protected SWPairwiseAlignmentResult calculateCigar(final int refLength, final int altLength, final double[] sw, final int[] btrack, final OVERHANG_STRATEGY overhang_strategy) {
|
||||
protected SWPairwiseAlignmentResult calculateCigar(final int[][] sw, final int[][] btrack, final OVERHANG_STRATEGY overhang_strategy) {
|
||||
// p holds the position we start backtracking from; we will be assembling a cigar in the backwards order
|
||||
int p1 = 0, p2 = 0;
|
||||
|
||||
double maxscore = Double.NEGATIVE_INFINITY; // sw scores are allowed to be negative
|
||||
int refLength = sw.length-1;
|
||||
int altLength = sw[0].length-1;
|
||||
|
||||
int maxscore = Integer.MIN_VALUE; // sw scores are allowed to be negative
|
||||
int segment_length = 0; // length of the segment (continuous matches, insertions or deletions)
|
||||
|
||||
// if we want to consider overhangs as legitimate operators, then just start from the corner of the matrix
|
||||
|
|
@ -424,31 +368,34 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
} else {
|
||||
// look for the largest score on the rightmost column. we use >= combined with the traversal direction
|
||||
// to ensure that if two scores are equal, the one closer to diagonal gets picked
|
||||
for ( int i = 1, data_offset = altLength+1+altLength ; i < refLength+1 ; i++, data_offset += (altLength+1) ) {
|
||||
// data_offset is the offset of [i][m]
|
||||
//Note: this is not technically smith-waterman, as by only looking for max values on the right we are
|
||||
//excluding high scoring local alignments
|
||||
p2=altLength;
|
||||
|
||||
if ( compareScores(sw[data_offset],maxscore) >= 0 ) { // sw[data_offset] >= maxscore
|
||||
p1 = i; p2 = altLength ; maxscore = sw[data_offset];
|
||||
}
|
||||
for(int i=1;i<sw.length;i++) {
|
||||
final int curScore = sw[i][altLength];
|
||||
if (curScore >= maxscore ) {
|
||||
p1 = i;
|
||||
maxscore = curScore;
|
||||
}
|
||||
}
|
||||
|
||||
// now look for a larger score on the bottom-most row
|
||||
if ( overhang_strategy != OVERHANG_STRATEGY.LEADING_INDEL ) {
|
||||
for ( int j = 1, data_offset = refLength*(altLength+1)+1 ; j < altLength+1 ; j++, data_offset++ ) {
|
||||
final int[] bottomRow=sw[refLength];
|
||||
for ( int j = 1 ; j < bottomRow.length; j++) {
|
||||
int curScore=bottomRow[j];
|
||||
// data_offset is the offset of [n][j]
|
||||
final int scoreComparison = compareScores(sw[data_offset],maxscore);
|
||||
if ( scoreComparison > 0 || scoreComparison == 0 && Math.abs(refLength-j) < Math.abs(p1 - p2)) {
|
||||
if ( curScore > maxscore ||
|
||||
(curScore == maxscore && Math.abs(refLength-j) < Math.abs(p1 - p2) ) ) {
|
||||
p1 = refLength;
|
||||
p2 = j ;
|
||||
maxscore = sw[data_offset];
|
||||
maxscore = curScore;
|
||||
segment_length = altLength - j ; // end of sequence 2 is overhanging; we will just record it as 'M' segment
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final List<CigarElement> lce = new ArrayList<CigarElement>(5);
|
||||
|
||||
if ( segment_length > 0 && overhang_strategy == OVERHANG_STRATEGY.SOFTCLIP ) {
|
||||
lce.add(makeElement(State.CLIP, segment_length));
|
||||
segment_length = 0;
|
||||
|
|
@ -458,14 +405,10 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
// to that sequence
|
||||
|
||||
State state = State.MATCH;
|
||||
|
||||
int data_offset = p1*(altLength+1)+p2; // offset of element [p1][p2]
|
||||
do {
|
||||
int btr = btrack[data_offset];
|
||||
|
||||
int btr = btrack[p1][p2];
|
||||
State new_state;
|
||||
int step_length = 1;
|
||||
|
||||
if ( btr > 0 ) {
|
||||
new_state = State.DELETION;
|
||||
step_length = btr;
|
||||
|
|
@ -476,9 +419,9 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
|
||||
// move to next best location in the sw matrix:
|
||||
switch( new_state ) {
|
||||
case MATCH: data_offset -= (altLength+2); p1--; p2--; break; // move back along the diag in the sw matrix
|
||||
case INSERTION: data_offset -= step_length; p2 -= step_length; break; // move left
|
||||
case DELETION: data_offset -= (altLength+1)*step_length; p1 -= step_length; break; // move up
|
||||
case MATCH: p1--; p2--; break; // move back along the diag in the sw matrix
|
||||
case INSERTION: p2 -= step_length; break; // move left
|
||||
case DELETION: p1 -= step_length; break; // move up
|
||||
}
|
||||
|
||||
// now let's see if the state actually changed:
|
||||
|
|
@ -537,7 +480,8 @@ public class SWPairwiseAlignment implements SmithWaterman {
|
|||
return new CigarElement(length, op);
|
||||
}
|
||||
|
||||
private double wd(byte x, byte y) {
|
||||
|
||||
private int wd(final byte x, final byte y) {
|
||||
return (x == y ? parameters.w_match : parameters.w_mismatch);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -90,19 +90,16 @@ public class SWPairwiseAlignmentMain {
|
|||
System.exit(1);
|
||||
}
|
||||
|
||||
double w_match;
|
||||
double w_mismatch;
|
||||
double w_open;
|
||||
double w_extend;
|
||||
final int w_match, w_mismatch, w_open, w_extend;
|
||||
|
||||
w_match = (m == null ? 30.0 : m.doubleValue());
|
||||
w_mismatch = (mm == null ? -10.0 : mm.doubleValue());
|
||||
w_open = (open == null ? -10.0 : open.doubleValue());
|
||||
w_extend = (ext == null ? -2.0 : ext.doubleValue());
|
||||
w_match = (m == null ? 30 : m.intValue());
|
||||
w_mismatch = (mm == null ? -10 : mm.intValue());
|
||||
w_open = (open == null ? -10 : open.intValue());
|
||||
w_extend = (ext == null ? -2 : ext.intValue());
|
||||
|
||||
|
||||
SWPairwiseAlignment.keepScoringMatrix = true;
|
||||
SWPairwiseAlignment a = new SWPairwiseAlignment(ref.getBytes(),read.getBytes(),w_match,w_mismatch,w_open,w_extend, 0.0001);
|
||||
SWPairwiseAlignment a = new SWPairwiseAlignment(ref.getBytes(),read.getBytes(),w_match,w_mismatch,w_open,w_extend);
|
||||
|
||||
System.out.println("start="+a.getAlignmentStart2wrt1()+", cigar="+a.getCigar()+
|
||||
" length1="+ref.length()+" length2="+read.length());
|
||||
|
|
@ -117,19 +114,19 @@ public class SWPairwiseAlignmentMain {
|
|||
}
|
||||
}
|
||||
|
||||
private static void print(double[] s, byte[] a, byte[] b) {
|
||||
private static void print(final int[][] s, final byte[] a, final byte[] b) {
|
||||
int n = a.length+1;
|
||||
int m = b.length+1;
|
||||
System.out.print(" ");
|
||||
for ( int j = 1 ; j < m ; j++) System.out.printf(" %5c",(char)b[j-1]) ;
|
||||
System.out.println();
|
||||
|
||||
for ( int i = 0, row_offset = 0 ; i < n ; i++, row_offset+=m) {
|
||||
for ( int i = 0 ; i < n ; i++) {
|
||||
if ( i > 0 ) System.out.print((char)a[i-1]);
|
||||
else System.out.print(' ');
|
||||
System.out.print(" ");
|
||||
for ( int j = 0; j < m ; j++ ) {
|
||||
System.out.printf(" %5.1f",s[row_offset+j]);
|
||||
System.out.printf(" %5.1f",s[i][j]);
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,12 +34,12 @@ package org.broadinstitute.gatk.utils.smithwaterman;
|
|||
*/
|
||||
public enum SWParameterSet {
|
||||
// match=1, mismatch = -1/3, gap=-(1+k/3)
|
||||
ORIGINAL_DEFAULT(new Parameters(1.0,-1.0/3.0,-1.0-1.0/3.0,-1.0/3.0,0.00001)),
|
||||
ORIGINAL_DEFAULT(new Parameters(3,-1,-4,-3)),
|
||||
|
||||
/**
|
||||
* A standard set of values for NGS alignments
|
||||
*/
|
||||
STANDARD_NGS(new Parameters(5.0, -10.0, -22.0, -1.2,0.00001));
|
||||
STANDARD_NGS(new Parameters(25, -50, -110, -6));
|
||||
|
||||
protected Parameters parameters;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue