Initial commit of v2.0 of the cleaner. DO NOT USE. (this means you, Chris)
Cleaned up SW code and started moving over everything to use byte[] instead of String or char[]. Added a wrapper class for SAMFileWriter that allows for adding reads out of order. Not even close to done, but I need to commit now to sync up with Andrey. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2712 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b8ae083d1b
commit
fddca032bb
File diff suppressed because it is too large
Load Diff
|
|
@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -279,7 +280,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
if ( altAlignmentsToTest.size() <= MAX_READS_FOR_CONSENSUSES ) {
|
if ( altAlignmentsToTest.size() <= MAX_READS_FOR_CONSENSUSES ) {
|
||||||
for ( AlignedRead aRead : altAlignmentsToTest ) {
|
for ( AlignedRead aRead : altAlignmentsToTest ) {
|
||||||
// do a pairwise alignment against the reference
|
// do a pairwise alignment against the reference
|
||||||
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getRead().getReadString(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(StringUtil.stringToBytes(reference), aRead.getRead().getReadBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
||||||
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getRead().getReadBases());
|
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getRead().getReadBases());
|
||||||
if ( c != null) {
|
if ( c != null) {
|
||||||
// if ( debugOn ) System.out.println("NEW consensus generated by SW: "+c.str ) ;
|
// if ( debugOn ) System.out.println("NEW consensus generated by SW: "+c.str ) ;
|
||||||
|
|
@ -295,7 +296,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
int index = generator.nextInt(altAlignmentsToTest.size());
|
int index = generator.nextInt(altAlignmentsToTest.size());
|
||||||
AlignedRead aRead = altAlignmentsToTest.remove(index);
|
AlignedRead aRead = altAlignmentsToTest.remove(index);
|
||||||
// do a pairwise alignment against the reference
|
// do a pairwise alignment against the reference
|
||||||
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getRead().getReadString(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(StringUtil.stringToBytes(reference), aRead.getRead().getReadBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
||||||
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getRead().getReadBases());
|
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getRead().getReadBases());
|
||||||
if ( c != null)
|
if ( c != null)
|
||||||
altConsenses.add(c);
|
altConsenses.add(c);
|
||||||
|
|
@ -404,7 +405,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
AlignedRead aRead = altReads.get(indexPair.first);
|
AlignedRead aRead = altReads.get(indexPair.first);
|
||||||
if ( aRead.finalizeUpdate() ) {
|
if ( aRead.finalizeUpdate() ) {
|
||||||
aRead.getRead().setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + (int)(improvement/10.0), 255));
|
aRead.getRead().setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + (int)(improvement/10.0), 255));
|
||||||
aRead.getRead().setAttribute("NM", AlignmentUtils.numMismatches(aRead.getRead(), reference, aRead.getRead().getAlignmentStart()-(int)leftmostIndex));
|
aRead.getRead().setAttribute("NM", AlignmentUtils.numMismatches(aRead.getRead(), StringUtil.stringToBytes(reference), aRead.getRead().getAlignmentStart()-(int)leftmostIndex));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,118 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
|
|
||||||
|
import net.sf.samtools.*;
|
||||||
|
|
||||||
|
import java.util.TreeSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author ebanks
|
||||||
|
* SortingSAMFileWriter
|
||||||
|
*
|
||||||
|
* this class extends the samtools SAMFileWriter class and caches reads for N loci so that reads
|
||||||
|
* can be emitted out of order (provided they are within the N-locus window)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class SortingSAMFileWriter implements SAMFileWriter {
|
||||||
|
|
||||||
|
// the base writer from Picard
|
||||||
|
private SAMFileWriter baseWriter;
|
||||||
|
|
||||||
|
// the window over which we agree to accumulate reads
|
||||||
|
private int window;
|
||||||
|
|
||||||
|
// the reads we are accumulating
|
||||||
|
private TreeSet<SAMRecord> cachedReads = new TreeSet<SAMRecord>(new SAMRecordCoordinateComparator());
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor
|
||||||
|
*
|
||||||
|
* @param baseWriter the real SAMFileWriter
|
||||||
|
* @param window the window over which we agree to store reads
|
||||||
|
*/
|
||||||
|
public SortingSAMFileWriter(SAMFileWriter baseWriter, int window) {
|
||||||
|
this.baseWriter = baseWriter;
|
||||||
|
this.window = window;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a read to the writer for emission
|
||||||
|
*
|
||||||
|
* @param read the read to emit
|
||||||
|
*/
|
||||||
|
public void addAlignment(SAMRecord read) {
|
||||||
|
// at a new contig, clear the cache
|
||||||
|
if ( cachedReads.size() > 0 && cachedReads.first().getReferenceIndex() < read.getReferenceIndex() )
|
||||||
|
clearCache();
|
||||||
|
|
||||||
|
long currentPos = read.getAlignmentStart();
|
||||||
|
|
||||||
|
Iterator<SAMRecord> iter = cachedReads.iterator();
|
||||||
|
while ( iter.hasNext() ) {
|
||||||
|
SAMRecord cachedRead = iter.next();
|
||||||
|
if ( currentPos - cachedRead.getAlignmentStart() >= window ) {
|
||||||
|
baseWriter.addAlignment(cachedRead);
|
||||||
|
iter.remove();
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cachedReads.add(read);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a list of reads to the writer for emission; the reads do NOT need to be sorted
|
||||||
|
*
|
||||||
|
* @param reads the reads to emit
|
||||||
|
*/
|
||||||
|
public void addAlignments(List<SAMRecord> reads) {
|
||||||
|
if ( reads.size() == 0 )
|
||||||
|
return;
|
||||||
|
|
||||||
|
// at a new contig, clear the cache
|
||||||
|
if ( cachedReads.size() > 0 && cachedReads.first().getReferenceIndex() < reads.get(0).getReferenceIndex() )
|
||||||
|
clearCache();
|
||||||
|
|
||||||
|
cachedReads.addAll(reads);
|
||||||
|
|
||||||
|
// get the last read in the cache
|
||||||
|
SAMRecord last = cachedReads.last();
|
||||||
|
|
||||||
|
long currentPos = last.getAlignmentStart();
|
||||||
|
|
||||||
|
Iterator<SAMRecord> iter = cachedReads.iterator();
|
||||||
|
while ( iter.hasNext() ) {
|
||||||
|
SAMRecord cachedRead = iter.next();
|
||||||
|
if ( currentPos - cachedRead.getAlignmentStart() >= window ) {
|
||||||
|
baseWriter.addAlignment(cachedRead);
|
||||||
|
iter.remove();
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the SAM file header
|
||||||
|
*/
|
||||||
|
public SAMFileHeader getFileHeader() {
|
||||||
|
return baseWriter.getFileHeader();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* close this writer by clearing the cache
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
clearCache();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void clearCache() {
|
||||||
|
Iterator<SAMRecord> iter = cachedReads.iterator();
|
||||||
|
while ( iter.hasNext() )
|
||||||
|
baseWriter.addAlignment(iter.next());
|
||||||
|
cachedReads.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -4,6 +4,7 @@ import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.Cigar;
|
import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.utils.pileup.*;
|
import org.broadinstitute.sting.utils.pileup.*;
|
||||||
|
|
@ -113,11 +114,11 @@ public class AlignmentUtils {
|
||||||
/** See {@link #numMismatches(SAMRecord, ReferenceSequence)}. This method implements same functionality
|
/** See {@link #numMismatches(SAMRecord, ReferenceSequence)}. This method implements same functionality
|
||||||
* for reference sequence specified as conventional java string (of bases). By default, it is assumed that
|
* for reference sequence specified as conventional java string (of bases). By default, it is assumed that
|
||||||
* the alignment starts at (1-based) position r.getAlignmentStart() on the reference <code>refSeq</code>.
|
* the alignment starts at (1-based) position r.getAlignmentStart() on the reference <code>refSeq</code>.
|
||||||
* See {@link #numMismatches(SAMRecord, String, int)} if this is not the case.
|
* See {@link #numMismatches(SAMRecord, byte[], int)} if this is not the case.
|
||||||
*/
|
*/
|
||||||
public static int numMismatches(SAMRecord r, String refSeq ) {
|
public static int numMismatches(SAMRecord r, String refSeq ) {
|
||||||
if ( r.getReadUnmappedFlag() ) return 1000000;
|
if ( r.getReadUnmappedFlag() ) return 1000000;
|
||||||
return numMismatches(r, refSeq, r.getAlignmentStart()-1);
|
return numMismatches(r, StringUtil.stringToBytes(refSeq), r.getAlignmentStart()-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns number of mismatches in the alignment <code>r</code> to the reference sequence
|
/** Returns number of mismatches in the alignment <code>r</code> to the reference sequence
|
||||||
|
|
@ -125,6 +126,8 @@ public class AlignmentUtils {
|
||||||
* specified reference sequence; in other words, <code>refIndex</code> is used in place of alignment's own
|
* specified reference sequence; in other words, <code>refIndex</code> is used in place of alignment's own
|
||||||
* getAlignmentStart() coordinate and the latter is never used. However, the structure of the alignment <code>r</code>
|
* getAlignmentStart() coordinate and the latter is never used. However, the structure of the alignment <code>r</code>
|
||||||
* (i.e. it's cigar string with all the insertions/deletions it may specify) is fully respected.
|
* (i.e. it's cigar string with all the insertions/deletions it may specify) is fully respected.
|
||||||
|
*
|
||||||
|
* THIS CODE ASSUMES THAT ALL BYTES COME FROM UPPERCASED CHARS.
|
||||||
*
|
*
|
||||||
* @param r alignment
|
* @param r alignment
|
||||||
* @param refSeq chunk of reference sequence that subsumes the alignment completely (if alignment runs out of
|
* @param refSeq chunk of reference sequence that subsumes the alignment completely (if alignment runs out of
|
||||||
|
|
@ -132,7 +135,7 @@ public class AlignmentUtils {
|
||||||
* @param refIndex zero-based position, at which the alignment starts on the specified reference string.
|
* @param refIndex zero-based position, at which the alignment starts on the specified reference string.
|
||||||
* @return the number of mismatches
|
* @return the number of mismatches
|
||||||
*/
|
*/
|
||||||
public static int numMismatches(SAMRecord r, String refSeq, int refIndex) {
|
public static int numMismatches(SAMRecord r, byte[] refSeq, int refIndex) {
|
||||||
int readIdx = 0;
|
int readIdx = 0;
|
||||||
int mismatches = 0;
|
int mismatches = 0;
|
||||||
byte[] readSeq = r.getReadBases();
|
byte[] readSeq = r.getReadBases();
|
||||||
|
|
@ -142,15 +145,15 @@ public class AlignmentUtils {
|
||||||
switch ( ce.getOperator() ) {
|
switch ( ce.getOperator() ) {
|
||||||
case M:
|
case M:
|
||||||
for (int j = 0 ; j < ce.getLength() ; j++, refIndex++, readIdx++ ) {
|
for (int j = 0 ; j < ce.getLength() ; j++, refIndex++, readIdx++ ) {
|
||||||
if ( refIndex >= refSeq.length() )
|
if ( refIndex >= refSeq.length )
|
||||||
continue;
|
continue;
|
||||||
char refChr = refSeq.charAt(refIndex);
|
byte refChr = refSeq[refIndex];
|
||||||
char readChr = (char)readSeq[readIdx];
|
byte readChr = readSeq[readIdx];
|
||||||
// Note: we need to count X/N's as mismatches because that's what SAM requires
|
// Note: we need to count X/N's as mismatches because that's what SAM requires
|
||||||
//if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 ||
|
//if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 ||
|
||||||
// BaseUtils.simpleBaseToBaseIndex(refChr) == -1 )
|
// BaseUtils.simpleBaseToBaseIndex(refChr) == -1 )
|
||||||
// continue; // do not count Ns/Xs/etc ?
|
// continue; // do not count Ns/Xs/etc ?
|
||||||
if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
|
if ( readChr != refChr )
|
||||||
mismatches++;
|
mismatches++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
|
|
@ -16,10 +16,6 @@ import java.util.Collections;
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public class SWPairwiseAlignment {
|
public class SWPairwiseAlignment {
|
||||||
private String s1;
|
|
||||||
private String s2;
|
|
||||||
private int i1;
|
|
||||||
private int i2;
|
|
||||||
private int alignment_offset; // offset of s2 w/respect to s1
|
private int alignment_offset; // offset of s2 w/respect to s1
|
||||||
private Cigar alignmentCigar;
|
private Cigar alignmentCigar;
|
||||||
|
|
||||||
|
|
@ -28,428 +24,48 @@ public class SWPairwiseAlignment {
|
||||||
private double w_open;
|
private double w_open;
|
||||||
private double w_extend;
|
private double w_extend;
|
||||||
|
|
||||||
private int best_mm; // mismatch count
|
|
||||||
private static final int IMPOSSIBLE = 1000000000;
|
|
||||||
private static final int MSTATE = 0;
|
private static final int MSTATE = 0;
|
||||||
private static final int ISTATE = 1;
|
private static final int ISTATE = 1;
|
||||||
private static final int DSTATE = 2;
|
private static final int DSTATE = 2;
|
||||||
|
|
||||||
public SWPairwiseAlignment(String seq1, String seq2, int id1, int id2, double match, double mismatch, double open, double extend ) {
|
// ************************************************************************
|
||||||
s1 = seq1;
|
// **** IMPORTANT NOTE: ****
|
||||||
s2 = seq2;
|
// **** This class assumes that all bytes come from UPPERCASED chars! ****
|
||||||
i1 = id1;
|
// ************************************************************************
|
||||||
i2 = id2;
|
public SWPairwiseAlignment(byte[] seq1, byte[] seq2, double match, double mismatch, double open, double extend ) {
|
||||||
w_match = match;
|
w_match = match;
|
||||||
w_mismatch = mismatch;
|
w_mismatch = mismatch;
|
||||||
w_open = open;
|
w_open = open;
|
||||||
w_extend = extend;
|
w_extend = extend;
|
||||||
best_mm = IMPOSSIBLE;
|
align(seq1,seq2);
|
||||||
//next_mm = IMPOSSIBLE;
|
|
||||||
align4(s1,s2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public SWPairwiseAlignment(String seq1, String seq2, int id1, int id2) {
|
public SWPairwiseAlignment(byte[] seq1, byte[] seq2) {
|
||||||
this(seq1,seq2,id1,id2,1.0,-1.0/3.0,-1.0-1.0/3.0,-1.0/3.0); // match=1, mismatch = -1/3, gap=-(1+k/3)
|
this(seq1,seq2,1.0,-1.0/3.0,-1.0-1.0/3.0,-1.0/3.0); // match=1, mismatch = -1/3, gap=-(1+k/3)
|
||||||
}
|
|
||||||
|
|
||||||
/** Initializes the alignment with pair of sequences (that will be immediately aligned) and
|
|
||||||
* sets their external ids to -1. Such un-annotated pairwise alignment can not be added to MultipleAlignment.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public SWPairwiseAlignment(String seq1, String seq2) {
|
|
||||||
this(seq1,seq2,-1,-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public SWPairwiseAlignment(String seq1, String seq2, double match, double mismatch, double open, double extend) {
|
|
||||||
this(seq1,seq2,-1,-1,match,mismatch,open, extend);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Cigar getCigar() { return alignmentCigar ; }
|
public Cigar getCigar() { return alignmentCigar ; }
|
||||||
|
|
||||||
public int getAlignmentStart2wrt1() { return alignment_offset; }
|
public int getAlignmentStart2wrt1() { return alignment_offset; }
|
||||||
|
|
||||||
public void align(String a, String b) {
|
public void align(final byte[] a, final byte[] b) {
|
||||||
int n = a.length();
|
int n = a.length;
|
||||||
int m = b.length();
|
int m = b.length;
|
||||||
int [][] sw = new int[n+1][m+1];
|
|
||||||
|
|
||||||
// build smith-waterman matrix:
|
|
||||||
for ( int i = 1 ; i < n+1 ; i++ ) {
|
|
||||||
char a_base = Character.toUpperCase(a.charAt(i-1)); // letter in a at the current pos
|
|
||||||
for ( int j = 1 ; j < m+1 ; j++ ) {
|
|
||||||
char b_base = Character.toUpperCase(b.charAt(j-1)); // letter in b at the current pos
|
|
||||||
int step_diag = sw[i-1][j-1] + w(a_base,b_base);
|
|
||||||
int step_down = sw[i-1][j]+w(a_base,'-');
|
|
||||||
int step_right = sw[i][j-1]+w('-',b_base);
|
|
||||||
|
|
||||||
sw[i][j] = Math.max(0, Math.max(step_diag,Math.max(step_down,step_right)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// print(sw,a,b);
|
|
||||||
|
|
||||||
PrimitivePair.Int p = new PrimitivePair.Int();
|
|
||||||
int maxscore = 0;
|
|
||||||
int segment_length = 0; // length of the segment (continuous matches, insertions or deletions)
|
|
||||||
|
|
||||||
// look for largest score. we use >= combined with the traversal direction
|
|
||||||
// to ensure that if two scores are equal, the one closer to diagonal gets picked
|
|
||||||
for ( int i = 1 ; i < n+1 ; i++ ) {
|
|
||||||
if ( sw[i][m] >= maxscore ) { p.first = i; p.second = m ; maxscore = sw[i][m]; }
|
|
||||||
}
|
|
||||||
for ( int j = 1 ; j < m+1 ; j++ ) {
|
|
||||||
if ( sw[n][j] > maxscore ||
|
|
||||||
sw[n][j] == maxscore && Math.abs(n-j) < Math.abs(p.first-p.second)) {
|
|
||||||
p.first = n;
|
|
||||||
p.second = j ;
|
|
||||||
maxscore = sw[n][j];
|
|
||||||
segment_length = m - j; // end of sequence 2 is overhanging; we will just record it as 'M' segment
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// System.out.println("\ni="+p.first+"; j="+p.second);
|
|
||||||
|
|
||||||
// p holds the position we start backtracking from; we will be assembling a cigar in the backwards order
|
|
||||||
|
|
||||||
|
|
||||||
// we will be placing all insertions and deletions into sequence b, so the state are named w/regard
|
|
||||||
// to that sequence
|
|
||||||
|
|
||||||
int state = MSTATE;
|
|
||||||
|
|
||||||
int [] scores = new int[3];
|
|
||||||
|
|
||||||
List<CigarElement> lce = new ArrayList<CigarElement>(5);
|
|
||||||
|
|
||||||
do {
|
|
||||||
scores[ISTATE] = sw[p.first][p.second-1]; // moving left: same base on a, prev base on b = insertion on b
|
|
||||||
scores[DSTATE] = sw[p.first-1][p.second]; // moving up: same base on b, prev base on a = deletion on b
|
|
||||||
scores[MSTATE] = sw[p.first-1][p.second-1]; // moving diagonal : mathc/mismatch
|
|
||||||
|
|
||||||
int new_state = findMaxInd(scores,MSTATE);
|
|
||||||
|
|
||||||
// move to next best location in the sw matrix:
|
|
||||||
switch( new_state ) {
|
|
||||||
case MSTATE: p.first--; p.second--; break;
|
|
||||||
case ISTATE: p.second--; break;
|
|
||||||
case DSTATE: p.first--; break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// now let's see if the state actually changed:
|
|
||||||
if ( new_state == state ) segment_length++;
|
|
||||||
else {
|
|
||||||
// state changed, lets emit previous segment, whatever it was (Insertion Deletion, or (Mis)Match).
|
|
||||||
CigarOperator o=null;
|
|
||||||
switch(state) {
|
|
||||||
case MSTATE: o = CigarOperator.M; break;
|
|
||||||
case ISTATE: o = CigarOperator.I; break;
|
|
||||||
case DSTATE: o = CigarOperator.D; break;
|
|
||||||
}
|
|
||||||
CigarElement e = new CigarElement(segment_length,o);
|
|
||||||
lce.add(e);
|
|
||||||
segment_length = 1;
|
|
||||||
state = new_state;
|
|
||||||
}
|
|
||||||
} while ( scores[state] != 0 );
|
|
||||||
|
|
||||||
// post-process the last segment we are still keeping
|
|
||||||
CigarOperator o=null;
|
|
||||||
|
|
||||||
switch(state) {
|
|
||||||
case MSTATE: o = CigarOperator.M; break;
|
|
||||||
case ISTATE: o = CigarOperator.I; break;
|
|
||||||
case DSTATE: o = CigarOperator.D; break;
|
|
||||||
}
|
|
||||||
|
|
||||||
alignment_offset = p.first - p.second;
|
|
||||||
segment_length+=p.second;
|
|
||||||
CigarElement e = new CigarElement(segment_length,o);
|
|
||||||
lce.add(e);
|
|
||||||
Collections.reverse(lce);
|
|
||||||
alignmentCigar = new Cigar(lce);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Allows for separate gap opening end extension penalties, no direct backtracking.
|
|
||||||
*
|
|
||||||
* @param a
|
|
||||||
* @param b
|
|
||||||
*/
|
|
||||||
public void align2(String a, String b) {
|
|
||||||
int n = a.length();
|
|
||||||
int m = b.length();
|
|
||||||
double [][] sw = new double[n+1][m+1];
|
|
||||||
|
|
||||||
// build smith-waterman matrix:
|
|
||||||
for ( int i = 1 ; i < n+1 ; i++ ) {
|
|
||||||
char a_base = Character.toUpperCase(a.charAt(i-1)); // letter in a at the current pos
|
|
||||||
for ( int j = 1 ; j < m+1 ; j++ ) {
|
|
||||||
char b_base = Character.toUpperCase(b.charAt(j-1)); // letter in b at the current pos
|
|
||||||
double step_diag = sw[i-1][j-1] + wd(a_base,b_base);
|
|
||||||
double step_down = 0.0 ;
|
|
||||||
for ( int k = 1 ; k < i ; k++ ) step_down = Math.max(step_down,sw[i-k][j]+wk(a_base,'-',k));
|
|
||||||
|
|
||||||
double step_right = 0;
|
|
||||||
for ( int k = 1 ; k < j ; k++ ) step_right = Math.max(step_right,sw[i][j-k]+wk('-',b_base,k));
|
|
||||||
|
|
||||||
sw[i][j] = Math.max(0, Math.max(step_diag,Math.max(step_down,step_right)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// print(sw,a,b);
|
|
||||||
|
|
||||||
PrimitivePair.Int p = new PrimitivePair.Int();
|
|
||||||
double maxscore = 0.0;
|
|
||||||
int segment_length = 0; // length of the segment (continuous matches, insertions or deletions)
|
|
||||||
|
|
||||||
// look for largest score. we use >= combined with the traversal direction
|
|
||||||
// to ensure that if two scores are equal, the one closer to diagonal gets picked
|
|
||||||
for ( int i = 1 ; i < n+1 ; i++ ) {
|
|
||||||
if ( sw[i][m] >= maxscore ) { p.first = i; p.second = m ; maxscore = sw[i][m]; }
|
|
||||||
}
|
|
||||||
for ( int j = 1 ; j < m+1 ; j++ ) {
|
|
||||||
if ( sw[n][j] > maxscore ||
|
|
||||||
sw[n][j] == maxscore && Math.abs(n-j) < Math.abs(p.first-p.second)) {
|
|
||||||
p.first = n;
|
|
||||||
p.second = j ;
|
|
||||||
maxscore = sw[n][j];
|
|
||||||
segment_length = m - j; // end of sequence 2 is overhanging; we will just record it as 'M' segment
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// System.out.println("\ni="+p.first+"; j="+p.second);
|
|
||||||
|
|
||||||
// p holds the position we start backtracking from; we will be assembling a cigar in the backwards order
|
|
||||||
|
|
||||||
|
|
||||||
// we will be placing all insertions and deletions into sequence b, so the state are named w/regard
|
|
||||||
// to that sequence
|
|
||||||
|
|
||||||
int state = MSTATE;
|
|
||||||
|
|
||||||
double [] scores = new double[3];
|
|
||||||
|
|
||||||
List<CigarElement> lce = new ArrayList<CigarElement>(5);
|
|
||||||
|
|
||||||
do {
|
|
||||||
// moving left: same base on a, prev base on b = insertion on b:
|
|
||||||
scores[ISTATE] = sw[p.first][p.second-1] ;
|
|
||||||
scores[DSTATE] = sw[p.first - 1][p.second];
|
|
||||||
scores[MSTATE] = sw[p.first-1][p.second-1]; // moving diagonal : match/mismatch
|
|
||||||
|
|
||||||
// System.out.println("i = " + p.first + " ; j = " + p.second);
|
|
||||||
// System.out.println("s(M)="+scores[MSTATE]+"; s(D)="+scores[DSTATE]+"; s(I)=" + scores[ISTATE]);
|
|
||||||
int new_state = findMaxInd(scores,MSTATE);
|
|
||||||
|
|
||||||
// move to next best location in the sw matrix:
|
|
||||||
switch( new_state ) {
|
|
||||||
case MSTATE: p.first--; p.second--; break;
|
|
||||||
case ISTATE: p.second--; break;
|
|
||||||
case DSTATE: p.first--; break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// now let's see if the state actually changed:
|
|
||||||
if ( new_state == state ) segment_length++;
|
|
||||||
else {
|
|
||||||
// state changed, lets emit previous segment, whatever it was (Insertion Deletion, or (Mis)Match).
|
|
||||||
CigarOperator o=null;
|
|
||||||
switch(state) {
|
|
||||||
case MSTATE: o = CigarOperator.M; break;
|
|
||||||
case ISTATE: o = CigarOperator.I; break;
|
|
||||||
case DSTATE: o = CigarOperator.D; break;
|
|
||||||
}
|
|
||||||
CigarElement e = new CigarElement(segment_length,o);
|
|
||||||
lce.add(e);
|
|
||||||
segment_length = 1;
|
|
||||||
state = new_state;
|
|
||||||
}
|
|
||||||
} while ( scores[state] != 0 );
|
|
||||||
|
|
||||||
// post-process the last segment we are still keeping
|
|
||||||
CigarOperator o=null;
|
|
||||||
switch(state) {
|
|
||||||
case MSTATE: o = CigarOperator.M; break;
|
|
||||||
case ISTATE: o = CigarOperator.I; break;
|
|
||||||
case DSTATE: o = CigarOperator.D; break;
|
|
||||||
}
|
|
||||||
alignment_offset = p.first - p.second;
|
|
||||||
segment_length+=p.second;
|
|
||||||
CigarElement e = new CigarElement(segment_length,o);
|
|
||||||
lce.add(e);
|
|
||||||
Collections.reverse(lce);
|
|
||||||
alignmentCigar = new Cigar(lce);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** Allows for separate gap opening and extension penalties, with backtracking.
|
|
||||||
*
|
|
||||||
* @param a
|
|
||||||
* @param b
|
|
||||||
*/
|
|
||||||
public void align3(String a, String b) {
|
|
||||||
int n = a.length();
|
|
||||||
int m = b.length();
|
|
||||||
double [][] sw = new double[n+1][m+1];
|
|
||||||
|
|
||||||
int [][] btrack = new int[n+1][m+1];
|
|
||||||
|
|
||||||
// build smith-waterman matrix and keep backtrack info:
|
|
||||||
for ( int i = 1 ; i < n+1 ; i++ ) {
|
|
||||||
char a_base = Character.toUpperCase(a.charAt(i-1)); // letter in a at the current pos
|
|
||||||
for ( int j = 1 ; j < m+1 ; j++ ) {
|
|
||||||
char b_base = Character.toUpperCase(b.charAt(j-1)); // letter in b at the current pos
|
|
||||||
double step_diag = sw[i-1][j-1] + wd(a_base,b_base);
|
|
||||||
double step_down = 0.0 ;
|
|
||||||
int kd = 0;
|
|
||||||
for ( int k = 1 ; k < i ; k++ ) {
|
|
||||||
if ( step_down < sw[i-k][j]+wk(a_base,'-',k) ) {
|
|
||||||
step_down=sw[i-k][j]+wk(a_base,'-',k);
|
|
||||||
kd = k;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
double step_right = 0;
|
|
||||||
int ki = 0;
|
|
||||||
for ( int k = 1 ; k < j ; k++ ) {
|
|
||||||
if ( step_right < sw[i][j-k]+wk('-',b_base,k) ) {
|
|
||||||
step_right=sw[i][j-k]+wk('-',b_base,k);
|
|
||||||
ki = k;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( step_down > step_right ) {
|
|
||||||
if ( step_down > step_diag ) {
|
|
||||||
sw[i][j] = Math.max(0,step_down);
|
|
||||||
btrack[i][j] = kd; // positive=vertical
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
sw[i][j] = Math.max(0,step_diag);
|
|
||||||
btrack[i][j] = 0; // 0 = diagonal
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// step_down < step_right
|
|
||||||
if ( step_right > step_diag ) {
|
|
||||||
sw[i][j] = Math.max(0,step_right);
|
|
||||||
btrack[i][j] = -ki; // negative = horizontal
|
|
||||||
} else {
|
|
||||||
sw[i][j] = Math.max(0,step_diag);
|
|
||||||
btrack[i][j] = 0; // 0 = diagonal
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sw[i][j] = Math.max(0, Math.max(step_diag,Math.max(step_down,step_right)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// print(sw,a,b);
|
|
||||||
|
|
||||||
PrimitivePair.Int p = new PrimitivePair.Int();
|
|
||||||
double maxscore = 0.0;
|
|
||||||
int segment_length = 0; // length of the segment (continuous matches, insertions or deletions)
|
|
||||||
|
|
||||||
// look for largest score. we use >= combined with the traversal direction
|
|
||||||
// to ensure that if two scores are equal, the one closer to diagonal gets picked
|
|
||||||
for ( int i = 1 ; i < n+1 ; i++ ) {
|
|
||||||
if ( sw[i][m] >= maxscore ) { p.first = i; p.second = m ; maxscore = sw[i][m]; }
|
|
||||||
}
|
|
||||||
for ( int j = 1 ; j < m+1 ; j++ ) {
|
|
||||||
if ( sw[n][j] > maxscore ||
|
|
||||||
sw[n][j] == maxscore && Math.abs(n-j) < Math.abs(p.first-p.second)) {
|
|
||||||
p.first = n;
|
|
||||||
p.second = j ;
|
|
||||||
maxscore = sw[n][j];
|
|
||||||
segment_length = m - j ; // end of sequence 2 is overhanging; we will just record it as 'M' segment
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
System.out.println("\ni="+p.first+"; j="+p.second);
|
|
||||||
|
|
||||||
// p holds the position we start backtracking from; we will be assembling a cigar in the backwards order
|
|
||||||
|
|
||||||
|
|
||||||
// we will be placing all insertions and deletions into sequence b, so the state are named w/regard
|
|
||||||
// to that sequence
|
|
||||||
|
|
||||||
int state = MSTATE;
|
|
||||||
|
|
||||||
double [] scores = new double[3];
|
|
||||||
|
|
||||||
List<CigarElement> lce = new ArrayList<CigarElement>(5);
|
|
||||||
|
|
||||||
do {
|
|
||||||
|
|
||||||
int btr = btrack[p.first][p.second];
|
|
||||||
int step_left = ( btr < 0 ? -btr : 1);
|
|
||||||
int step_up = ( btr > 0 ? btr : 1 );
|
|
||||||
|
|
||||||
// moving left: same base on a, prev base on b = insertion on b:
|
|
||||||
scores[ISTATE] = sw[p.first][p.second-step_left] ;
|
|
||||||
scores[DSTATE] = sw[p.first - step_up][p.second];
|
|
||||||
scores[MSTATE] = sw[p.first-1][p.second-1]; // moving diagonal : match/mismatch
|
|
||||||
|
|
||||||
// System.out.println("i = " + p.first + " ; j = " + p.second);
|
|
||||||
// System.out.println("s(M)="+scores[MSTATE]+"; s(D)="+scores[DSTATE]+"; s(I)=" + scores[ISTATE]);
|
|
||||||
int new_state = findMaxInd(scores,MSTATE);
|
|
||||||
|
|
||||||
int step_length = 1;
|
|
||||||
|
|
||||||
// move to next best location in the sw matrix:
|
|
||||||
switch( new_state ) {
|
|
||||||
case MSTATE: p.first--; p.second--; break;
|
|
||||||
case ISTATE: p.second-=step_left; step_length = step_left; break;
|
|
||||||
case DSTATE: p.first-=step_up; step_length = step_up; break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// now let's see if the state actually changed:
|
|
||||||
if ( new_state == state ) segment_length+=step_length;
|
|
||||||
else {
|
|
||||||
// state changed, lets emit previous segment, whatever it was (Insertion Deletion, or (Mis)Match).
|
|
||||||
CigarOperator o=null;
|
|
||||||
switch(state) {
|
|
||||||
case MSTATE: o = CigarOperator.M; break;
|
|
||||||
case ISTATE: o = CigarOperator.I; break;
|
|
||||||
case DSTATE: o = CigarOperator.D; break;
|
|
||||||
}
|
|
||||||
CigarElement e = new CigarElement(segment_length,o);
|
|
||||||
lce.add(e);
|
|
||||||
segment_length = step_length;
|
|
||||||
state = new_state;
|
|
||||||
}
|
|
||||||
} while ( scores[state] != 0 );
|
|
||||||
|
|
||||||
// post-process the last segment we are still keeping
|
|
||||||
CigarOperator o=null;
|
|
||||||
switch(state) {
|
|
||||||
case MSTATE: o = CigarOperator.M; break;
|
|
||||||
case ISTATE: o = CigarOperator.I; break;
|
|
||||||
case DSTATE: o = CigarOperator.D; break;
|
|
||||||
}
|
|
||||||
alignment_offset = p.first - p.second;
|
|
||||||
segment_length+=p.second;
|
|
||||||
CigarElement e = new CigarElement(segment_length,o);
|
|
||||||
lce.add(e);
|
|
||||||
Collections.reverse(lce);
|
|
||||||
alignmentCigar = new Cigar(lce);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void align4(String a, String b) {
|
|
||||||
int n = a.length();
|
|
||||||
int m = b.length();
|
|
||||||
double [][] sw = new double[n+1][m+1];
|
double [][] sw = new double[n+1][m+1];
|
||||||
|
|
||||||
int [][] btrack = new int[n+1][m+1];
|
int [][] btrack = new int[n+1][m+1];
|
||||||
|
|
||||||
// build smith-waterman matrix and keep backtrack info:
|
// build smith-waterman matrix and keep backtrack info:
|
||||||
for ( int i = 1 ; i < n+1 ; i++ ) {
|
for ( int i = 1 ; i < n+1 ; i++ ) {
|
||||||
char a_base = Character.toUpperCase(a.charAt(i-1)); // letter in a at the current pos
|
byte a_base = a[i-1]; // letter in a at the current pos
|
||||||
for ( int j = 1 ; j < m+1 ; j++ ) {
|
for ( int j = 1 ; j < m+1 ; j++ ) {
|
||||||
char b_base = Character.toUpperCase(b.charAt(j-1)); // letter in b at the current pos
|
byte b_base = b[j-1]; // letter in b at the current pos
|
||||||
double step_diag = sw[i-1][j-1] + wd(a_base,b_base);
|
double step_diag = sw[i-1][j-1] + wd(a_base,b_base);
|
||||||
double step_down = 0.0 ;
|
double step_down = 0.0 ;
|
||||||
int kd = 0;
|
int kd = 0;
|
||||||
for ( int k = 1 ; k < i ; k++ ) {
|
for ( int k = 1 ; k < i ; k++ ) {
|
||||||
if ( step_down < sw[i-k][j]+wk(a_base,'-',k) ) {
|
if ( step_down < sw[i-k][j]+wk(k) ) {
|
||||||
step_down=sw[i-k][j]+wk(a_base,'-',k);
|
step_down=sw[i-k][j]+wk(k);
|
||||||
kd = k;
|
kd = k;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -457,8 +73,8 @@ public void align3(String a, String b) {
|
||||||
double step_right = 0;
|
double step_right = 0;
|
||||||
int ki = 0;
|
int ki = 0;
|
||||||
for ( int k = 1 ; k < j ; k++ ) {
|
for ( int k = 1 ; k < j ; k++ ) {
|
||||||
if ( step_right < sw[i][j-k]+wk('-',b_base,k) ) {
|
if ( step_right < sw[i][j-k]+wk(k) ) {
|
||||||
step_right=sw[i][j-k]+wk('-',b_base,k);
|
step_right=sw[i][j-k]+wk(k);
|
||||||
ki = k;
|
ki = k;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -517,8 +133,6 @@ public void align3(String a, String b) {
|
||||||
|
|
||||||
int state = MSTATE;
|
int state = MSTATE;
|
||||||
|
|
||||||
double [] scores = new double[3];
|
|
||||||
|
|
||||||
List<CigarElement> lce = new ArrayList<CigarElement>(5);
|
List<CigarElement> lce = new ArrayList<CigarElement>(5);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
|
@ -527,7 +141,7 @@ public void align3(String a, String b) {
|
||||||
int step_left = ( btr < 0 ? -btr : 1);
|
int step_left = ( btr < 0 ? -btr : 1);
|
||||||
int step_up = ( btr > 0 ? btr : 1 );
|
int step_up = ( btr > 0 ? btr : 1 );
|
||||||
|
|
||||||
int new_state = -1;
|
int new_state;
|
||||||
if ( btr > 0 ) new_state = DSTATE;
|
if ( btr > 0 ) new_state = DSTATE;
|
||||||
else if ( btr < 0 ) new_state = ISTATE;
|
else if ( btr < 0 ) new_state = ISTATE;
|
||||||
else new_state = MSTATE;
|
else new_state = MSTATE;
|
||||||
|
|
@ -573,51 +187,13 @@ public void align3(String a, String b) {
|
||||||
alignmentCigar = new Cigar(lce);
|
alignmentCigar = new Cigar(lce);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int w(char x, char y) {
|
private double wd(byte x, byte y) {
|
||||||
if ( x == y ) return 2; // match
|
|
||||||
if ( x == '-' || y == '-' ) return -1; // gap
|
|
||||||
return -1; // mismatch
|
|
||||||
}
|
|
||||||
|
|
||||||
private double wd ( char x, char y ) {
|
|
||||||
if ( x== y ) return w_match;
|
if ( x== y ) return w_match;
|
||||||
else return w_mismatch;
|
else return w_mismatch;
|
||||||
}
|
}
|
||||||
|
|
||||||
private double wk(char x, char y, int k) {
|
private double wk(int k) {
|
||||||
return w_open+(k-1)*w_extend; // gap
|
return w_open+(k-1)*w_extend; // gap
|
||||||
// return -1.0 ; // no extension penalty
|
|
||||||
// return -1.0-Math.log(k+1); // weak extension penalty
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns index of the maximum element in array s. If there is a tie, and one of the tied indices is
|
|
||||||
* pref_id, then it will be preferred and returned.
|
|
||||||
* @param s
|
|
||||||
* @param pref_id
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
private int findMaxInd(int[] s, int pref_id) {
|
|
||||||
int imax = 0;
|
|
||||||
int maxval = s[0];
|
|
||||||
for ( int i = 1; i < s.length ; i++ ) {
|
|
||||||
if ( s[i] > maxval || i == pref_id && Math.abs(s[i] - maxval) < 0.0001 ) {
|
|
||||||
imax = i;
|
|
||||||
maxval = s[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return imax;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int findMaxInd(double[] s, int pref_id) {
|
|
||||||
int imax = 0;
|
|
||||||
double maxval = s[0];
|
|
||||||
for ( int i = 1; i < s.length ; i++ ) {
|
|
||||||
if ( s[i] > maxval + 0.0001 || i == pref_id && Math.abs(s[i] - maxval) < 0.0001 ) {
|
|
||||||
imax = i;
|
|
||||||
maxval = s[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return imax;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void print(int[][] s) {
|
private void print(int[][] s) {
|
||||||
|
|
@ -673,137 +249,4 @@ public void align3(String a, String b) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
StringBuilder bmm = new StringBuilder();
|
|
||||||
StringBuilder b1 = new StringBuilder();
|
|
||||||
StringBuilder b2 = new StringBuilder();
|
|
||||||
|
|
||||||
int pos1 = 0;
|
|
||||||
int pos2 = 0;
|
|
||||||
if ( alignment_offset < 0 ) {
|
|
||||||
for ( ; pos2 < -alignment_offset ; pos2++ ) {
|
|
||||||
b1.append(' ');
|
|
||||||
b2.append(s2.charAt(pos2));
|
|
||||||
bmm.append(' ');
|
|
||||||
}
|
|
||||||
// now pos2 = -alignment_offset;
|
|
||||||
} else {
|
|
||||||
for ( ; pos1 < alignment_offset ; pos1++ ) {
|
|
||||||
b2.append(' ');
|
|
||||||
b1.append(s1.charAt(pos1));
|
|
||||||
bmm.append(' ');
|
|
||||||
}
|
|
||||||
// now pos1 = alignment_offset
|
|
||||||
}
|
|
||||||
/* debug prints: */
|
|
||||||
// System.out.println(AlignmentUtils.toString(getCigar()));
|
|
||||||
// System.out.println("seq1l="+s1.length()+"; seq2l=" + s2.length());
|
|
||||||
// System.out.println("offset="+alignment_offset);
|
|
||||||
// System.out.println("pos1="+pos1+"; pos2=" + pos2);
|
|
||||||
/**/
|
|
||||||
for ( int i = 0 ; i < getCigar().numCigarElements() ; i++ ) {
|
|
||||||
CigarElement ce = getCigar().getCigarElement(i) ;
|
|
||||||
switch( ce.getOperator() ) {
|
|
||||||
case M:
|
|
||||||
int z = ( i == 0 ? pos2 : 0); // if we are in the first element and seq overhangs to the left,
|
|
||||||
// start inside the first segment, at the position where actual matches begin
|
|
||||||
// check separately for pos1 < s1.length() since seq2 is allowed to overhang beyond seq1's end
|
|
||||||
for ( ; z < ce.getLength() && pos1 < s1.length() ; z++ ) {
|
|
||||||
// System.out.println("pos1="+pos1+"; pos2="+pos2+"; k="+z);
|
|
||||||
if ( Character.toUpperCase(s1.charAt(pos1)) !=
|
|
||||||
Character.toUpperCase(s2.charAt(pos2)) ) bmm.append('*');
|
|
||||||
else bmm.append(' ');
|
|
||||||
b1.append(s1.charAt(pos1++));
|
|
||||||
b2.append(s2.charAt(pos2++));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case I:
|
|
||||||
for ( int k = 0 ; k < ce.getLength() ; k++ ) {
|
|
||||||
b1.append('+');
|
|
||||||
bmm.append('+');
|
|
||||||
b2.append(s2.charAt(pos2++));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case D:
|
|
||||||
for ( int k = 0 ; k < ce.getLength() ; k++ ) {
|
|
||||||
b1.append(s1.charAt(pos1++));
|
|
||||||
bmm.append('-');
|
|
||||||
b2.append('-');
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bmm.append('\n');
|
|
||||||
b1.append(s1,pos1,s1.length());
|
|
||||||
bmm.append(b1);
|
|
||||||
bmm.append('\n');
|
|
||||||
b2.append(s2,pos2,s2.length());
|
|
||||||
bmm.append(b2);
|
|
||||||
bmm.append('\n');
|
|
||||||
|
|
||||||
return bmm.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void testMe() {
|
|
||||||
/* String s1 = "ACCTGGTGTATATAGGGTAAGGCTGAT";
|
|
||||||
String s2 = "TGTATATAGGGTAAGG";
|
|
||||||
|
|
||||||
testMe(s1,s2);
|
|
||||||
|
|
||||||
s1 = "GGTAAGGC";
|
|
||||||
s2 = "GGTCTCAA";
|
|
||||||
|
|
||||||
testMe(s1,s2);
|
|
||||||
|
|
||||||
s1 = "ACCTGGTGTATATAGGGTAAGGCTGAT";
|
|
||||||
s2 = "TGTTAGGGTCTCAAGG";
|
|
||||||
testMe(s1,s2);
|
|
||||||
|
|
||||||
|
|
||||||
s1 = "ACCTGGTGTATATAGGGTAAGGCTGAT";
|
|
||||||
s2 = "TAGGGTAAGGCTGATCCATGTACCG" ;
|
|
||||||
testMe(s1,s2);
|
|
||||||
|
|
||||||
s1 = "ACCTGGTGTATATAGGGTAAGGCTGAT";
|
|
||||||
s2 = "CCGTATCATTACCTGGTGTATATAGG";
|
|
||||||
testMe(s1,s2);
|
|
||||||
|
|
||||||
s1 = "GGTGTATATAGGGT" ;
|
|
||||||
s2 = "TGTTAGGG";
|
|
||||||
testMe(s1,s2);
|
|
||||||
|
|
||||||
s1 = "AGACAGAGAGAAGG";
|
|
||||||
s2 = "AGACAGAGAAGG";
|
|
||||||
testMe(s1,s2);
|
|
||||||
*/
|
|
||||||
// String s1 = "CCAGCACACAGGTATCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTGTTTTTTGA";
|
|
||||||
// String s2 = "CCAGCACACATCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTGTTTTTTGA";
|
|
||||||
|
|
||||||
// String s1 = "CCCATCTGTCTCCAATCTGCTGTTTTCCAAAAATTAGGGAACTTCAGTTTTCCCTTTGATACTCTGTATTTCTACCAACCACAACGCCAGGGCTGTCCTGCTTCTACAAGTGACAATGACAAATATAGGCCTGAAGGAAGATG";
|
|
||||||
// String s2 = "AAAATCTGTTTCCAATCTACTGTTTTCCAAAAATTAGGGAAGTTCAGTTTTCCCTTTGATACTCTGTTTCTACCAATCC";
|
|
||||||
String s1 = "CCCATCTGTCTCCAATCTGCTGTTTTCCAAAAATTAGGGAACTTCAGTTTTCCCTTTGATACTCTGTATTTCTACCAACCACAACGCCAGGGCTGTCCTGCTTCTACAAGTGACAATGACAAATATAGGCCTGAAGGAAGATG";
|
|
||||||
String s2 = "AAAATCTGTCTCCAATCTACTGTTTTCCAAAAATTAGGGAAGTTCAGTTTTCCCTTTGATACTCTGTTTCTACCAATCC";
|
|
||||||
testMe(s1,s2);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void testMe(String s1, String s2) {
|
|
||||||
|
|
||||||
SWPairwiseAlignment swpa = new SWPairwiseAlignment(s1,s2,3.0,-1.0,-4,-0.5);
|
|
||||||
|
|
||||||
System.out.println(AlignmentUtils.toString(swpa.getCigar()));
|
|
||||||
// SequencePile sp = new SequencePile(s1);
|
|
||||||
// sp.addAlignedSequence(s2,false,swpa.getCigar(),swpa.getAlignmentStart2wrt1());
|
|
||||||
// System.out.println();
|
|
||||||
// System.out.println(sp.format());
|
|
||||||
|
|
||||||
System.out.println("--------\n"+swpa.toString());
|
|
||||||
|
|
||||||
//sp.colorprint(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void main(String argv[]) {
|
|
||||||
if ( argv.length > 0 ) testMe(argv[0],argv[1]);
|
|
||||||
else testMe();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue