git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@146 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
046cecb067
commit
835e85374e
|
|
@ -0,0 +1,122 @@
|
|||
package org.broadinstitute.sting.indels;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Mar 21, 2009
|
||||
* Time: 4:25:12 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ConsensusSequence {
|
||||
private List< int[] > coverage; // counts of observations of every nucleotide at each position
|
||||
private long referencePos;// (arbitrary) reference position; when adding sequences, their offsets should be wrt this position
|
||||
private int startOffset; // offset of the leftmost base of this consensus sequence wrt referencePos; negative=left, positive=right
|
||||
private final static int NUMBINS = 4;
|
||||
|
||||
public ConsensusSequence(int refPos) {
|
||||
coverage = new ArrayList< int[] >();
|
||||
referencePos = refPos;
|
||||
startOffset = 0;
|
||||
}
|
||||
|
||||
public ConsensusSequence() {
|
||||
this(0);
|
||||
}
|
||||
|
||||
/** Adds sequence <code>se</code> to the consensus, in the sense that all bases of seq are counted
|
||||
* into observed coverage kept by the consensus. The position of the sequence is specified by the
|
||||
* <code>offset</code> with respect to the fixed reference position of the consensus (the latter does not
|
||||
* have to be consensus start), and if the sequence extends beyound the consensus on either end, the
|
||||
* consensus will be extended appropriately to accomodate the full sequence.
|
||||
* @param seq nucleotide sequence ('ACGT...')
|
||||
* @param offset position of the start of the sequence relative to the fixed reference position of the consensus
|
||||
*/
|
||||
public void addSequence(String seq, int offset) {
|
||||
// if sequence starts before than the currently held consensus oes, extend consensus to the left
|
||||
if ( offset < startOffset ) {
|
||||
coverage.addAll(0,instantiateCoverageList(startOffset-offset));
|
||||
startOffset = offset;
|
||||
}
|
||||
// if the sequence ends beyound the currently held consensus, extend consensus to the right
|
||||
if ( offset + seq.length() > startOffset + coverage.size() ) {
|
||||
coverage.addAll( instantiateCoverageList(offset+seq.length() - startOffset - coverage.size()) );
|
||||
}
|
||||
|
||||
// count bases from the sequence into the coverage
|
||||
int posOnConsensus = offset - startOffset;
|
||||
for ( int i = 0 ; i < seq.length() ; i++, posOnConsensus++ ) {
|
||||
coverage.get(posOnConsensus)[baseToInt(seq.charAt(i))]++;
|
||||
}
|
||||
}
|
||||
|
||||
/** Removes sequence <code>seq</code> from the consensus. More exactly, 1 will be subtracted from current
|
||||
* observation counts kept by the consensus for each observed base at every position of the sequence. The
|
||||
* position of the sequence is specified by the <code>offset</code> with respect to the reference position
|
||||
* of the consensus. NOTE: this method is unchecked and does not verify that the sequence being subtracted
|
||||
* was indeed previously added to the consensus and/or that the consenus does accomodate full length of
|
||||
* the sequence. If it is not the case, the results can be unpredictable or assert failure may occur.
|
||||
*
|
||||
* @param seq nucleotide sequence ('ACGT...')
|
||||
* @param offset position of the start of the sequence relative to the fixed reference position of the consensus
|
||||
*/
|
||||
public void removeSequence(String seq, int offset) {
|
||||
assert offset >= startOffset :
|
||||
"Attempt to remove from consensus a sequence that starts prior to consenus start";
|
||||
assert (offset+seq.length() < startOffset + coverage.size()) :
|
||||
"Attempt to remove from consensus a sequence that extends beyond consensus end";
|
||||
// subtract sequence bases from the coverage
|
||||
int posOnConsensus = offset - startOffset;
|
||||
for ( int i = 0 ; i < seq.length() ; i++, posOnConsensus++ ) {
|
||||
coverage.get(posOnConsensus)[ baseToInt(seq.charAt(i)) ]--;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the "distance" (score measuring the agreement) from the currently held consensus sequence to
|
||||
* the specified sequence <code>seq</code> starting at position <code>offset</code> wrt consenus reference position.
|
||||
* @param seq
|
||||
* @param offset
|
||||
* @return
|
||||
*/
|
||||
public double distance(String seq, int offset) {
|
||||
int posOnConsensus; // index into the currently held consensus sequence
|
||||
int i ; // index into the passed sequence argument
|
||||
if ( offset < startOffset ) {
|
||||
posOnConsensus = 0;
|
||||
i = startOffset - offset;
|
||||
} else {
|
||||
i = 0 ;
|
||||
posOnConsensus = offset - startOffset;
|
||||
}
|
||||
// stop position on the passed sequence (can be less than sequence length if consensus stops prematurely)
|
||||
int stop = Math.min(offset+seq.length(), startOffset+coverage.size() ) - offset;
|
||||
|
||||
for ( ; i < stop ; i++, posOnConsensus++ ) {
|
||||
int base = baseToInt(seq.charAt(posOnConsensus));
|
||||
int [] cov = coverage.get(posOnConsensus);
|
||||
int totalcov = cov[0]+cov[1]+cov[2]+cov[3];
|
||||
|
||||
}
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
private List<int[]> instantiateCoverageList(int n) {
|
||||
List< int[] > subseq = new ArrayList<int[] >(n);
|
||||
for ( int i = 0 ; i < n ; i++ ) subseq.add(new int[NUMBINS]);
|
||||
return subseq;
|
||||
}
|
||||
|
||||
private int baseToInt(char c) {
|
||||
int base;
|
||||
switch( Character.toUpperCase(c) ) {
|
||||
case 'A': base = 0; break;
|
||||
case 'C': base = 1; break;
|
||||
case 'G': base = 2; break;
|
||||
case 'T': base = 3; break;
|
||||
default : throw new IllegalArgumentException("Sequence can contain only ACGT symbols");
|
||||
}
|
||||
return base;
|
||||
}
|
||||
}
|
||||
|
|
@ -60,8 +60,9 @@ public class IndelInspector extends CommandLineProgram {
|
|||
ReferenceSequence contig_seq = null;
|
||||
|
||||
IndelRecordPileCollector col = null;
|
||||
PileBuilder pileBuilder = new PileBuilder();
|
||||
try {
|
||||
col = new IndelRecordPileCollector(new DiscardingReceiver(), new PileBuilder() );
|
||||
col = new IndelRecordPileCollector(new DiscardingReceiver(), pileBuilder );
|
||||
} catch(Exception e) { System.err.println(e.getMessage()); }
|
||||
if ( col == null ) return 1;
|
||||
|
||||
|
|
@ -78,6 +79,9 @@ public class IndelInspector extends CommandLineProgram {
|
|||
if ( location != null && GenomeLoc.compareContigs(cur_contig, location.getContig()) == 1 ) break;
|
||||
if ( location == null || GenomeLoc.compareContigs(cur_contig, location.getContig()) == 0 ) {
|
||||
contig_seq = reference.get(r.getReferenceIndex());
|
||||
String refstr = new String(contig_seq.getBases());
|
||||
col.setReferenceSequence(refstr);
|
||||
pileBuilder.setReferenceSequence(refstr);
|
||||
System.out.println("loaded contig "+cur_contig+" (index="+r.getReferenceIndex()+"); length="+contig_seq.getBases().length+" tst="+contig_seq.toString());
|
||||
}
|
||||
}
|
||||
|
|
@ -85,9 +89,12 @@ public class IndelInspector extends CommandLineProgram {
|
|||
// if contig is specified and wqe did not reach it yet, skip the records until we reach that contig:
|
||||
if ( location != null && GenomeLoc.compareContigs(cur_contig, location.getContig()) == -1 ) continue;
|
||||
|
||||
|
||||
if ( location != null && r.getAlignmentEnd() < location.getStart() ) continue;
|
||||
|
||||
// if stop position is specified and we are past that, stop reading:
|
||||
if ( location != null && r.getAlignmentStart() > location.getStop() ) break;
|
||||
|
||||
|
||||
if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY")==1 ) continue; // skip chrM and unplaced contigs for now
|
||||
|
||||
int err = -1;
|
||||
|
|
|
|||
|
|
@ -89,6 +89,8 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
|||
private RecordReceiver defaultReceiver; // we will send there records that do not overlap with regions of interest
|
||||
private RecordPileReceiver indelPileReceiver; // piles over indel regions will be sent there
|
||||
|
||||
private String referenceSequence;
|
||||
|
||||
public String memStatsString() {
|
||||
String s = "mRecordPile: ";
|
||||
return s+mRecordPile.size() + " mAllIndels: "+mAllIndels.size() + " mLastContig=" +mLastContig + " mLastStartOnref="+mLastStartOnRef;
|
||||
|
|
@ -110,6 +112,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
|||
}
|
||||
defaultReceiver = rr;
|
||||
indelPileReceiver = rp;
|
||||
referenceSequence = null;
|
||||
setWaitState();
|
||||
}
|
||||
|
||||
|
|
@ -124,7 +127,11 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
|||
avoiding_region = false;
|
||||
mState = WAIT_STATE; // got to do this if we were in avoid_region state
|
||||
}
|
||||
|
||||
|
||||
public void setReferenceSequence(String contig) {
|
||||
referenceSequence = contig;
|
||||
}
|
||||
|
||||
/** A utility method: emits into nonindelReceiver and purges from the currently held SAM record pile
|
||||
* all the consequtive records with alignment end positions less than or equal to the specified
|
||||
* position <code>pos</code>, until the first record is encountered that does not meet this condition. Note that
|
||||
|
|
|
|||
|
|
@ -1,22 +1,37 @@
|
|||
package org.broadinstitute.sting.indels;
|
||||
|
||||
public class Matrix<T> {
|
||||
private int size;
|
||||
private int nRows;
|
||||
private int nCols;
|
||||
private Object [][] data;
|
||||
|
||||
public Matrix(int n) {
|
||||
size = n;
|
||||
data = new Object[n][n];
|
||||
|
||||
/** Instantiates a generic matrix of objects with n rows and m cols.
|
||||
*
|
||||
* @param n number of rows
|
||||
* @param m number of columns
|
||||
*/
|
||||
public Matrix(int n, int m) {
|
||||
nRows = n;
|
||||
nCols = m;
|
||||
data = new Object[n][m];
|
||||
}
|
||||
|
||||
|
||||
/** Instantiates a square n x n matrix of objects.
|
||||
*
|
||||
* @param n size of the matrix
|
||||
*/
|
||||
public Matrix (int n) {
|
||||
this(n,n);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public T get(int i, int j) {
|
||||
assert (i<size) && (j < size) : "Matrix index is out of bounds";
|
||||
assert (i < nRows ) && (j < nCols) : "Matrix index is out of bounds";
|
||||
return (T) data[i][j];
|
||||
}
|
||||
|
||||
public void set(int i, int j, T value) {
|
||||
assert (i<size) && (j < size) : "Matrix index is out of bounds";
|
||||
assert ( i < nRows ) && ( j < nCols ) : "Matrix index is out of bounds";
|
||||
data[i][j] = value;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.indels;
|
||||
|
||||
import java.util.*;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
|
||||
public class MultipleAlignment implements Iterable<Integer> {
|
||||
|
|
@ -26,26 +27,6 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
ext_ids.clear();
|
||||
}
|
||||
|
||||
public static <T extends Comparable> Integer[] SortPermutation( List<T> A )
|
||||
{
|
||||
final Object[] data = A.toArray();
|
||||
|
||||
class comparator implements Comparator<Integer>
|
||||
{
|
||||
public int compare(Integer a, Integer b)
|
||||
{
|
||||
return ((T)data[a]).compareTo(data[b]);
|
||||
}
|
||||
}
|
||||
Integer[] permutation = new Integer[A.size()];
|
||||
for (int i = 0; i < A.size(); i++)
|
||||
{
|
||||
permutation[i] = i;
|
||||
}
|
||||
Arrays.sort(permutation, new comparator());
|
||||
return permutation;
|
||||
}
|
||||
|
||||
/** Adds single sequence with id set to i. Pile must be empty, or IllegalStateException will be thrown
|
||||
*
|
||||
* @param seq sequence to add
|
||||
|
|
@ -108,7 +89,8 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
alignment_offsets.add( -a.getBestOffset2wrt1() + alignment_offsets.get( second ));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Returns sequence associated with the specified external id, or null if sequence with this external id is not found in the pile
|
||||
*
|
||||
* @param id query id
|
||||
|
|
@ -154,7 +136,13 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
alignment_offsets.add(off2+a.getOffsetById(id));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void add(MultipleAlignment a, PairwiseAlignment p) {
|
||||
if ( p.id1() == -1 || p.id2() == -1 ) throw new IllegalArgumentException("Attempt to add MSA based on pairwise alignemnt with sequence ids not properly set");
|
||||
add(a,p,p.id1(),p.id2());
|
||||
}
|
||||
|
||||
/** Returns true if the alignment already contains sequence with the specified id
|
||||
*
|
||||
* @param id
|
||||
|
|
@ -168,21 +156,20 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
return PairwiseAlignment.countMismatches(getSequenceById(i), getSequenceById(j), getOffsetById(j)-getOffsetById(i));
|
||||
}
|
||||
|
||||
/** Returns the length of the overlapping region of sequences si and sj .
|
||||
/** Returns the length of the overlapping region of the two sequences specified by their external ids i and j.
|
||||
*
|
||||
* @return overlap size
|
||||
*/
|
||||
public int getOverlap(int i, int j) {
|
||||
if ( ! contains(i) || ! contains(j) ) return -1;
|
||||
if ( ! contains(i) || ! contains(j) ) throw new RuntimeException("Sequence with specified id is not in MSA pile");
|
||||
int off = getOffsetById(j) - getOffsetById(i);
|
||||
if ( off >= 0 ) {
|
||||
return Math.min(getSequenceById(i).length()-off, getSequenceById(j).length());
|
||||
} else {
|
||||
return Math.min(getSequenceById(j).length()+off, getSequenceById(i).length());
|
||||
}
|
||||
int L;
|
||||
if ( off >= 0 ) L = Math.min(getSequenceById(i).length()-off, getSequenceById(j).length());
|
||||
else L = Math.min(getSequenceById(j).length()+off, getSequenceById(i).length());
|
||||
return ( L < 0 ? 0 : L );
|
||||
}
|
||||
|
||||
/** Given the two indices, one of which has to be already in the pile, returns the one that is not in the pile.
|
||||
/** Given the two sequence ids, one of which has to be already in the pile, returns the one that is not in the pile.
|
||||
*
|
||||
* @param i sequence id
|
||||
* @param j sequence id
|
||||
|
|
@ -199,13 +186,23 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
public String skipN(int n) {
|
||||
/** Returns a string consisting of n spaces.
|
||||
*
|
||||
* @param n
|
||||
* @return
|
||||
*/
|
||||
private String skipN(int n) {
|
||||
StringBuilder b=new StringBuilder();
|
||||
for ( int k = 0 ; k < n ; k++ ) b.append(' ');
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
public void skipN(int n, StringBuilder b) {
|
||||
/** Prints n spaces directly into the specified string builder.
|
||||
*
|
||||
* @param n
|
||||
* @param b
|
||||
*/
|
||||
private void skipN(int n, StringBuilder b) {
|
||||
for ( int k = 0 ; k < n ; k++ ) b.append(' ');
|
||||
}
|
||||
|
||||
|
|
@ -220,12 +217,60 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
if ( seqs.size() == 0 ) return b.toString();
|
||||
|
||||
int skip_first = 0;
|
||||
int msa_length = 0;
|
||||
for ( int i = 0 ; i < seqs.size() ; i++ ) {
|
||||
if ( -alignment_offsets.get(i) > skip_first ) skip_first = -alignment_offsets.get(i);
|
||||
msa_length = Math.max( alignment_offsets.get(i)+seqs.get(i).length() , msa_length );
|
||||
}
|
||||
|
||||
msa_length += skip_first;
|
||||
int [] cov = new int[4];
|
||||
char[] bases = { 'A' , 'C', 'G', 'T' };
|
||||
char[][] consensus = new char[4][msa_length];
|
||||
|
||||
for ( int i = 0 ; i < msa_length ; i++ ) {
|
||||
cov[0] = cov[1] = cov[2] = cov[3] = 0;
|
||||
for ( int j = 0 ; j < seqs.size(); j++ ) {
|
||||
// offset of the sequence j wrt start of the msa region
|
||||
int seq_offset = skip_first + alignment_offsets.get(j);
|
||||
if ( i < seq_offset || i >= seq_offset + seqs.get(j).length() ) continue; // sequence j has no bases at position i
|
||||
int base = -1;
|
||||
switch( Character.toUpperCase(seqs.get(j).charAt(i-seq_offset)) ) {
|
||||
case 'A': base = 0; break;
|
||||
case 'C': base = 1 ; break;
|
||||
case 'G': base = 2 ; break;
|
||||
case 'T': base = 3 ; break;
|
||||
}
|
||||
if ( base >= 0 ) cov[base]++;
|
||||
}
|
||||
int total_cov = cov[0] + cov[1] + cov[2] + cov[3];
|
||||
int bmax = 0;
|
||||
int mm = 0;
|
||||
consensus[3][i] = 'N';
|
||||
for ( int z = 0; z < 4 ; z++ ) {
|
||||
if ( cov[z] > bmax ) {
|
||||
bmax = cov[z];
|
||||
consensus[3][i] = bases[z];
|
||||
mm = total_cov - bmax;
|
||||
}
|
||||
}
|
||||
if ( mm > 0 ) {
|
||||
consensus[2][i] = '*';
|
||||
if ( mm > 9 ) consensus[0][i] = Character.forDigit(mm/10,10);
|
||||
else consensus[0][i] = ' ';
|
||||
consensus[1][i] = Character.forDigit(mm%10,10);
|
||||
} else {
|
||||
consensus[0][i] = consensus[1][i] = consensus[2][i] = ' ';
|
||||
}
|
||||
}
|
||||
|
||||
b.append(" "); b.append(consensus[0]); b.append('\n');
|
||||
b.append(" "); b.append(consensus[1]); b.append('\n');
|
||||
b.append(" "); b.append(consensus[2]); b.append('\n');
|
||||
b.append(" "); b.append(consensus[3]); b.append('\n');
|
||||
|
||||
Integer[] perm = null;
|
||||
if ( inorder ) perm = SortPermutation(alignment_offsets);
|
||||
if ( inorder ) perm = Utils.SortPermutation(alignment_offsets);
|
||||
|
||||
for ( int i = 0 ; i < seqs.size() ; i++ ) {
|
||||
int index = (inorder ? perm[i] : i);
|
||||
|
|
@ -248,8 +293,12 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
*/
|
||||
public Iterator<Integer> sequenceIdIterator() { return index.keySet().iterator(); }
|
||||
|
||||
/** Returns an iterator over external seuqnce ids of the sequences stored in the pile, presenting them in
|
||||
* the order of ascending alignment offsets.
|
||||
* @return
|
||||
*/
|
||||
public Iterator<Integer> sequenceIdByOffsetIterator() {
|
||||
final Integer[] perm = SortPermutation(alignment_offsets);
|
||||
final Integer[] perm = Utils.SortPermutation(alignment_offsets);
|
||||
return new Iterator<Integer>() {
|
||||
private int i = 0;
|
||||
public boolean hasNext() {
|
||||
|
|
|
|||
|
|
@ -46,8 +46,26 @@ public class PairwiseAlignment {
|
|||
* if s2 is shifted left (starts earlier) wrt s1
|
||||
*/
|
||||
public int getBestOffset2wrt1() { return alignment_offset; }
|
||||
|
||||
/** Returns offset of the sequence j wrt sequence i in the best pairwise alignment found.
|
||||
*
|
||||
* @param i extrenal id of a sequence, must be one of the sequences kept by this alignment
|
||||
* @param j extrenal id of a sequence, must be one of the sequences kept by this alignment
|
||||
* @return offset of 2nd arg (j) wrt to the first arg (i)
|
||||
*/
|
||||
public int getBestOffset2wrt1(int i, int j ) {
|
||||
if ( i == i1 && j == i2 ) return alignment_offset;
|
||||
else if ( i == i2 && j == i1 ) return -alignment_offset;
|
||||
throw new RuntimeException("Specified sequence id not found in the alignment");
|
||||
}
|
||||
|
||||
public String getSequence1() { return s1; }
|
||||
public String getSequence2() { return s2; }
|
||||
public String getSequenceById(int i) {
|
||||
if ( i == i1 ) return s1;
|
||||
else if ( i == i2 ) return s2;
|
||||
throw new RuntimeException("Specified sequence id not found in the alignment");
|
||||
}
|
||||
public int id1() { return i1;}
|
||||
public int id2() { return i2;}
|
||||
|
||||
|
|
@ -78,6 +96,16 @@ public class PairwiseAlignment {
|
|||
return Math.min(s2.length()+alignment_offset, s1.length());
|
||||
}
|
||||
}
|
||||
|
||||
public static int getOverlap(String seq1, String seq2, int offset2wrt1) {
|
||||
int L ;
|
||||
if ( offset2wrt1 >= 0 ) {
|
||||
L = Math.min(seq1.length()-offset2wrt1, seq2.length());
|
||||
} else {
|
||||
L = Math.min(seq2.length()+offset2wrt1, seq1.length());
|
||||
}
|
||||
return ( L < 0 ? 0 : L );
|
||||
}
|
||||
|
||||
/** Returns true if at least one alignment, no matter how bad, was found between the two sequences
|
||||
* (i.e. the sequences have at least one kmer in common).
|
||||
|
|
@ -116,7 +144,8 @@ public class PairwiseAlignment {
|
|||
int pos2 = ( offset2wrt1 >= 0 ? 0 : -offset2wrt1 );
|
||||
int cnt = 0;
|
||||
while ( pos1 < seq1.length() && pos2 < seq2.length() ) {
|
||||
if ( seq1.charAt(pos1++) == seq2.charAt(pos2++) ) continue;
|
||||
if ( Character.toUpperCase(seq1.charAt(pos1++)) ==
|
||||
Character.toUpperCase(seq2.charAt(pos2++)) ) continue;
|
||||
cnt++; // found mismatch
|
||||
}
|
||||
return cnt;
|
||||
|
|
@ -127,7 +156,8 @@ public class PairwiseAlignment {
|
|||
int pos2 = ( offset2wrt1 >= 0 ? 0 : -offset2wrt1 );
|
||||
int cnt = 0;
|
||||
while ( pos1 < seq1.length() && pos2 < seq2.length() && cnt < maxerr ) {
|
||||
if ( seq1.charAt(pos1++) == seq2.charAt(pos2++) ) continue;
|
||||
if ( Character.toUpperCase(seq1.charAt(pos1++)) ==
|
||||
Character.toUpperCase(seq2.charAt(pos2++)) ) continue;
|
||||
cnt++; // found mismatch
|
||||
}
|
||||
return cnt;
|
||||
|
|
@ -157,6 +187,14 @@ public class PairwiseAlignment {
|
|||
return ( l / (double)L );
|
||||
}
|
||||
|
||||
public static double distance(String seq1, String seq2, int offset2wrt1) {
|
||||
int L = getOverlap(seq1,seq2,offset2wrt1);
|
||||
if ( L <= 0 ) return 1e100;
|
||||
int mm = countMismatches(seq1,seq2,offset2wrt1);
|
||||
double l = ( mm == 0 ? 1.0 : (double)mm + Math.sqrt((double)mm) );
|
||||
return ( l / (double) L );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2,16 +2,26 @@ package org.broadinstitute.sting.indels;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class PileBuilder implements RecordPileReceiver {
|
||||
private StrictlyUpperTriangularMatrix distances ;
|
||||
private SymmetricMatrix distances ;
|
||||
private Matrix<PairwiseAlignment> alignments ;
|
||||
private static final int KmerSize = 8;
|
||||
private MultipleAlignment alignments1;
|
||||
private MultipleAlignment alignments2;
|
||||
private String referenceSequence;
|
||||
private int reference_start;
|
||||
|
||||
private static class IntPair {
|
||||
int first;
|
||||
int second;
|
||||
public IntPair(int i, int j) { set(i,j); }
|
||||
public int first() { return first; }
|
||||
public int second() { return second; }
|
||||
public void set(int i, int j) { first = i; second = j; }
|
||||
}
|
||||
|
||||
private static class SelectedPair {
|
||||
private int i_;
|
||||
|
|
@ -46,32 +56,88 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
public class SelectedSequence {
|
||||
private int id_;
|
||||
private double d_;
|
||||
|
||||
private SelectedSequence(int i, double d) {
|
||||
set(i,d);
|
||||
}
|
||||
|
||||
private SelectedSequence() { this(-1,1e100) ; }
|
||||
private void set(int i, double d) { id_ = i; d_ = d; }
|
||||
|
||||
public double d() { return d_;}
|
||||
public int i() { return id_; }
|
||||
|
||||
}
|
||||
|
||||
public PileBuilder() {}
|
||||
public PileBuilder() {
|
||||
referenceSequence = null;
|
||||
reference_start = -1;
|
||||
}
|
||||
|
||||
public void setReferenceSequence(String seq, int start) {
|
||||
referenceSequence = seq;
|
||||
reference_start = start;
|
||||
}
|
||||
|
||||
public void setReferenceSequence(String seq) {
|
||||
referenceSequence = seq;
|
||||
reference_start = -1;
|
||||
}
|
||||
|
||||
public void receive(Collection<SAMRecord> c) {
|
||||
IndexedSequence[] seqs = new IndexedSequence[c.size()];
|
||||
int i = 0;
|
||||
int left = 1000000000;
|
||||
int right = 0;
|
||||
for ( SAMRecord r : c ) {
|
||||
seqs[i++] = new IndexedSequence(r.getReadString(),KmerSize);
|
||||
left = Math.min(left, r.getAlignmentStart() );
|
||||
right = Math.max(right,r.getAlignmentEnd());
|
||||
}
|
||||
doMultipleAlignment(seqs);
|
||||
System.out.print("Distance between final piles: "+distance(alignments1, alignments2));
|
||||
System.out.print("; diameter of PILE1: "+ diameter(alignments1));
|
||||
System.out.println("; diameter of PILE2: "+ diameter(alignments2));
|
||||
|
||||
System.out.println("PILE 1: \n"+alignments1.toString());
|
||||
System.out.println("PILE 2: \n"+alignments2.toString());
|
||||
SequencePile originalAligns = new SequencePile(referenceSequence.substring(left-1,right));
|
||||
for ( SAMRecord r : c ) {
|
||||
originalAligns.addAlignedSequence(r.getReadString(), r.getReadNegativeStrandFlag(),
|
||||
r.getCigar(), r.getAlignmentStart() - left );
|
||||
}
|
||||
System.out.println("\n#############################################################################");
|
||||
System.out.println("ORIGINAL ALIGNMENT: \n");
|
||||
originalAligns.colorprint(true);
|
||||
System.out.println("\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") ;
|
||||
|
||||
List<MultipleAlignment> piles = doMultipleAlignment2(seqs);
|
||||
// System.out.print("Distance between final piles: "+distance(alignments1, alignments2));
|
||||
// System.out.print("; diameter of PILE1: "+ diameter(alignments1));
|
||||
// System.out.println("; diameter of PILE2: "+ diameter(alignments2));
|
||||
|
||||
// System.out.println("PILE 1: \n"+alignments1.toString());
|
||||
// System.out.println("PILE 2: \n"+alignments2.toString());
|
||||
|
||||
SymmetricMatrix d = new SymmetricMatrix(piles.size());
|
||||
for ( int n = 0 ; n < piles.size() ; n++ ) {
|
||||
d.set(n,n,diameter(piles.get(n)));
|
||||
for ( int m = n+1 ; m < piles.size() ; m++ ) {
|
||||
d.set(n,m,distance(piles.get(n), piles.get(m)));
|
||||
}
|
||||
}
|
||||
System.out.println(d.format("%8.4g"));
|
||||
for ( int n = 0 ; n < piles.size() ; n++ ) {
|
||||
System.out.println("PILE " + n +"\n" +piles.get(n).toString());
|
||||
}
|
||||
}
|
||||
|
||||
public void initPairwiseAlignments( IndexedSequence [] seqs ) {
|
||||
distances = new StrictlyUpperTriangularMatrix( seqs.length );
|
||||
distances = new SymmetricMatrix( seqs.length );
|
||||
alignments = new Matrix<PairwiseAlignment>( seqs.length );
|
||||
for( int i = 0; i < seqs.length ; i++ ) {
|
||||
for ( int j = i+1 ; j < seqs.length ; j++ ) {
|
||||
PairwiseAlignment a = new PairwiseAlignment(seqs[i],seqs[j],i,j); // compute pairwise alignment
|
||||
alignments.set(i, j, a); // save it
|
||||
alignments.set(j, i, a); // save it
|
||||
distances.set(i,j,a.distance());
|
||||
}
|
||||
}
|
||||
|
|
@ -146,21 +212,152 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
SelectedPair p = new SelectedPair(-1,-1,1e100);
|
||||
|
||||
for ( Integer id : a ) {
|
||||
for (int i = 0; i < id; i++) {
|
||||
for (int i = 0; i < distances.size(); i++) {
|
||||
if (a.contains(i)) continue; // a already contains both sequences (i,id)
|
||||
double d = distances.get(i, id);
|
||||
if (d < p.d() ) p.set(i,id,d);
|
||||
}
|
||||
for (int j = id + 1; j < distances.size() ; j++) {
|
||||
if (a.contains(j)) continue; // a already contains both sequences (id, j)
|
||||
double d = distances.get(id, j);
|
||||
if (d < p.d()) p.set(id,j,d);
|
||||
}
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
public SelectedPair findClosestToPileAverage(MultipleAlignment a) {
|
||||
|
||||
SelectedPair p = new SelectedPair(-1,-1,1e100);
|
||||
|
||||
// currently, we compute the average distance from each sequence to the pile, but if the average
|
||||
// distance is small enough, we will try to stitch that sequence to the pile based on the *best*
|
||||
// available pairwise alignment, best_id will keep the id of that sequence from the pile that
|
||||
// has the best alignment with the sequence that is the closest on average
|
||||
int best_id=-1;
|
||||
|
||||
Set<Integer> offsets = new HashSet<Integer>(); // all putative offsets suggested by different p-wise alignments
|
||||
for ( int i = 0 ; i < distances.size() ; i++ ) { // for each sequence i
|
||||
|
||||
if ( a.contains(i) ) continue; // sequence i is already in the pile, ignore it
|
||||
|
||||
offsets.clear();
|
||||
|
||||
for ( Integer id : a ) { // for all sequences from the msa pile
|
||||
PairwiseAlignment pa = alignments.get(i,id);
|
||||
if ( pa.getOverlap() <= 0 ) continue; // at this step we do not take into account sequences with no overlap
|
||||
// alignment pa suggests this offset of i wrt the first sequence in the msa
|
||||
offsets.add( pa.getBestOffset2wrt1(id,i)+a.getOffsetById(id));
|
||||
}
|
||||
// we got all suggested offsets; now lets recompute distances:
|
||||
|
||||
for( Integer off : offsets ) {
|
||||
SelectedPair spo = averageDistanceForOffset(a,i,off);
|
||||
if ( spo.d() < p.d() ) p.set(spo.i(),spo.j(),spo.d());
|
||||
}
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
public Matrix<SelectedPair> averageClosestDistanceMatrix(List<MultipleAlignment> la, int n) {
|
||||
|
||||
Matrix<SelectedPair> mp = new Matrix<SelectedPair>(n);
|
||||
|
||||
for ( int i = 0 ; i < n ; i++ ) {
|
||||
for ( int j = i + 1 ; j < n ; j++ ) {
|
||||
mp.set(i,j, findBestAlignment(la.get(i),la.get(j)) );
|
||||
mp.set(j,i, mp.get(i,j) );
|
||||
}
|
||||
}
|
||||
return mp;
|
||||
}
|
||||
|
||||
public SelectedPair findBestAlignment(MultipleAlignment a1, MultipleAlignment a2) {
|
||||
|
||||
Map<Integer, IntPair > all_offsets = new HashMap<Integer, IntPair >();
|
||||
SelectedPair p = new SelectedPair(-1,-1,1e100);
|
||||
|
||||
for ( Integer id1 : a1 ) {
|
||||
for ( Integer id2 : a2 ) {
|
||||
PairwiseAlignment pa = alignments.get(id1,id2);
|
||||
if ( pa.getOverlap() <= 0 ) continue; // id1 and id2 do not overlap and/or we don't have p-wise alignment
|
||||
|
||||
// record suggested offset of a2 wrt a1 (defined by their first sequences), and remember the
|
||||
// pairwise alignment that suggested it
|
||||
int suggested_offset = a1.getOffsetById(id1) + pa.getBestOffset2wrt1(id1,id2) - a2.getOffsetById(id2);
|
||||
if ( ! all_offsets.containsKey(suggested_offset) ) {
|
||||
all_offsets.put( suggested_offset , new IntPair(id1,id2)) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
for ( Map.Entry<Integer,IntPair> offset_record : all_offsets.entrySet() ) {
|
||||
|
||||
double d = averageDistanceForOffset(a1,a2,offset_record.getKey());
|
||||
if ( d < p.d() ) p.set(offset_record.getValue().first(),offset_record.getValue().second(),d);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
public double averageDistanceForOffset(MultipleAlignment a1, MultipleAlignment a2, int offset) {
|
||||
SelectedPair p = new SelectedPair();
|
||||
|
||||
double d_av = 0;
|
||||
int nseq = 0;
|
||||
int i1 = -1;
|
||||
int i2 = -1;
|
||||
|
||||
for ( Integer id2 : a2 ) {
|
||||
SelectedPair spo = averageDistanceForOffset(a1,id2,offset+a2.getOffsetById(id2));
|
||||
if ( spo.d() > 1e99 ) continue;
|
||||
nseq++;
|
||||
d_av += spo.d();
|
||||
}
|
||||
if ( nseq == 0 ) return 1e100;
|
||||
d_av /= nseq;
|
||||
return d_av;
|
||||
}
|
||||
|
||||
/** Computes average distance from sequence i to multiple alignment a for the specified offset of i wrt a
|
||||
* and returns that distance and pair of sequence indices, on which the specified offset is realized
|
||||
* @param a
|
||||
* @param i
|
||||
* @param offset
|
||||
* @return
|
||||
*/
|
||||
public SelectedPair averageDistanceForOffset(MultipleAlignment a, int i, int offset) {
|
||||
|
||||
SelectedPair p = new SelectedPair(-1,-1,1e100);
|
||||
|
||||
double d = 0; // will hold average distance
|
||||
double dmin = 1e100; // used to find the nearest individual sequence in the pile
|
||||
int nseq = 0; // number of sequences in the pile that have distance to sequence i defined
|
||||
int best_id = -1;
|
||||
|
||||
for ( Integer id : a ) { // for all sequences from the msa pile
|
||||
PairwiseAlignment pa = alignments.get(i,id);
|
||||
int new_off = offset - a.getOffsetById(id); // offset of i wrt id as suggested by <offset>
|
||||
double dist_for_off; // distance between i and id for the given offset off
|
||||
|
||||
// check if p-wise alignment has data for the specified offset:
|
||||
boolean canuse = false;
|
||||
if ( pa.alignmentExists() && pa.getBestOffset2wrt1(id,i) == new_off ) {
|
||||
dist_for_off = distances.get(i,id);
|
||||
canuse = true; // can use this alignment to stitch i to a
|
||||
}
|
||||
else {
|
||||
// offset is different from what the pwise alignment suggests; recompute!
|
||||
dist_for_off = PairwiseAlignment.distance(pa.getSequenceById(id),pa.getSequenceById(i),new_off);
|
||||
}
|
||||
if ( dist_for_off > 1e99 ) continue; // at this offset, i and id do not overlap, go check next id
|
||||
d += dist_for_off;
|
||||
nseq++;
|
||||
if ( dist_for_off < dmin && canuse ) {
|
||||
dmin = dist_for_off;
|
||||
best_id = id;
|
||||
}
|
||||
|
||||
}
|
||||
if ( nseq == 0 ) return p;
|
||||
d /= (double)nseq;
|
||||
p.set(i,best_id,d);
|
||||
return p;
|
||||
}
|
||||
|
||||
/** Finds, among all sequences, the one farthest from the specified pile. Being
|
||||
* the 'farthest' is defined as having the largest lower bound of the distances to all sequences in the pile.
|
||||
*
|
||||
|
|
@ -178,7 +375,7 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
double d_min = 1e100; // smallest distance from sequence i to the pile
|
||||
|
||||
for ( Integer id : a ) {
|
||||
double d = ( i < id ? distances.get(i, id) : distances.get(id,i) );
|
||||
double d = distances.get(i, id) ;
|
||||
if (d < d_min ) d_min = d;
|
||||
}
|
||||
// d_min is the smallest distance from sequence i to pile a
|
||||
|
|
@ -195,8 +392,7 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
double d = 1e100;
|
||||
for ( Integer id1 : a1 ) {
|
||||
for ( Integer id2 : a2 ) {
|
||||
if ( id1 < id2 && distances.get(id1,id2) < d ) d = distances.get(id1,id2);
|
||||
if ( id1 > id2 && distances.get(id2,id1) < d ) d = distances.get(id2,id1);
|
||||
if ( distances.get(id1,id2) < d ) d = distances.get(id1,id2);
|
||||
}
|
||||
}
|
||||
return d;
|
||||
|
|
@ -217,7 +413,7 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
double d = 1e100; // will hold distance from id1 to its closest neighbor
|
||||
for ( Integer id2 : a ) {
|
||||
if ( id2 == id1 ) continue;
|
||||
double dpair = ( id1 < id2 ? distances.get(id1,id2) : distances.get(id2,id1) );
|
||||
double dpair = distances.get(id1,id2) ;
|
||||
d = Math.min(d,dpair);
|
||||
}
|
||||
// d = distance from id1 to its closest neighbor within the pile
|
||||
|
|
@ -240,7 +436,8 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
|
||||
PileBuilder pb = new PileBuilder();
|
||||
|
||||
pb.doMultipleAlignment(seqs);
|
||||
//pb.doMultipleAlignment(seqs);
|
||||
pb.doMultipleAlignment2(seqs);
|
||||
System.out.print("Distance between final piles: "+pb.distance(pb.alignments1, pb.alignments2));
|
||||
System.out.print("; diameter of PILE1: "+ pb.diameter(pb.alignments1));
|
||||
System.out.println("; diameter of PILE2: "+ pb.diameter(pb.alignments2));
|
||||
|
|
@ -252,6 +449,8 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
public void doMultipleAlignment(IndexedSequence[] seqs) {
|
||||
// two piles we are going to grow until all sequences are assigned to one of them.
|
||||
// we intend to keep the piles disjoint, e.g. no sequence should be placed in both
|
||||
|
||||
|
||||
MultipleAlignment pile1 = new MultipleAlignment();
|
||||
MultipleAlignment pile2 = new MultipleAlignment();
|
||||
|
||||
|
|
@ -260,7 +459,7 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
|
||||
// all the pairwise alignments are computed and disjoint best and next-best pairs are found
|
||||
|
||||
//System.out.println( distances.format("%8.4g "));
|
||||
// System.out.println( distances.format("%8.4g "));
|
||||
|
||||
|
||||
SelectedPair pworst = findWorst();
|
||||
|
|
@ -268,13 +467,14 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
pile1.add(seqs[pworst.i()].getSequence(), pworst.i());
|
||||
pile2.add(seqs[pworst.j()].getSequence(), pworst.j());
|
||||
|
||||
/*
|
||||
|
||||
// initialize piles with best and next-best pairs
|
||||
/*
|
||||
SelectedPair p_best = findClosestPair();
|
||||
SelectedPair p_nextbest = findNextClosestPairAfter(p_best);
|
||||
pile1.add( alignments.get(p_best.i(), p_best.j()),p_best.i(),p_best.j());
|
||||
pile2.add( alignments.get(p_nextbest.i(), p_nextbest.j()),p_nextbest.i(),p_nextbest.j());
|
||||
*/
|
||||
pile1.add( alignments.get(p_best.i(), p_best.j()));
|
||||
pile2.add( alignments.get(p_nextbest.i(), p_nextbest.j()));
|
||||
*/
|
||||
/*
|
||||
System.out.println("Best pair ("+p_best.i() + "," + p_best.j()+", d="+p_best.d()+"):");
|
||||
System.out.println(pile1.toString());
|
||||
|
|
@ -283,39 +483,44 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
*/
|
||||
SelectedPair p1 = null;
|
||||
SelectedPair p2 = null;
|
||||
|
||||
|
||||
// grow piles hierarchical clustering-style
|
||||
while ( pile1.size() + pile2.size() < seqs.length ) {
|
||||
// find candidate sequences closest to each of the two piles
|
||||
p1 = findClosestToPile(pile1);
|
||||
p2 = findClosestToPile(pile2);
|
||||
|
||||
// p1 = findClosestToPileAverage(pile1); // findClosestToPile(pile1);
|
||||
// p2 = findClosestToPileAverage(pile2); //findClosestToPile(pile2);
|
||||
p1 = findClosestToPile(pile1); // findClosestToPile(pile1);
|
||||
p2 = findClosestToPile(pile2); //findClosestToPile(pile2);
|
||||
int id1_cand = pile1.selectExternal(p1.i(), p1.j()); // id of the sequence closest to the pile 1
|
||||
int id2_cand = pile2.selectExternal(p2.i(), p2.j()); // id of the sequence closest to the pile 2
|
||||
if ( pile2.contains(id1_cand) && pile1.contains(id2_cand)) {
|
||||
// pile1 and pile 2 are mutually the closest, so we need to merge them.
|
||||
// if piles are mutually the closest, then p1 and p2 are the same pair (id1, id2),
|
||||
// so we just merge on one of the (redundant) instances:
|
||||
pile1.add(pile2, alignments.get( p1.i(), p1.j()),p1.i(), p1.j());
|
||||
pile1.add(pile2, alignments.get( p1.i(), p1.j()));
|
||||
pile2.clear(); // need to reset pile 2 to something else
|
||||
int z = findFarthestFromPile(pile1); // get sequence farthest from merged pile 1
|
||||
pile2.add(seqs[z].getSequence(), z); // and reinitialize pile 2 with that sequence
|
||||
} else {
|
||||
if ( p1.d() < p2.d() ) {
|
||||
if ( pile2.contains(id1_cand) ) {
|
||||
pile1.add(pile2, alignments.get( p1.i(), p1.j()),p1.i(), p1.j());
|
||||
pile1.add(pile2, alignments.get( p1.i(), p1.j()));
|
||||
pile2.clear(); // need to reset pile 2 to something else
|
||||
int z = findFarthestFromPile(pile1); // get sequence farthest from merged pile 1
|
||||
pile2.add(seqs[z].getSequence(), z); // and reinitialize pile 2 with that sequence
|
||||
} else pile1.add( alignments.get(p1.i(), p1.j()) );
|
||||
} else {
|
||||
if ( pile1.contains(id2_cand) ) {
|
||||
pile2.add(pile1, alignments.get( p2.i(), p2.j()),p2.i(), p2.j());
|
||||
pile2.add(pile1, alignments.get( p2.i(), p2.j()));
|
||||
pile1.clear(); // need to reset pile 2 to something else
|
||||
int z = findFarthestFromPile(pile2); // get sequence farthest from merged pile 1
|
||||
pile1.add(seqs[z].getSequence(), z); // and reinitialize pile 2 with that sequence
|
||||
} else pile2.add( alignments.get(p2.i(), p2.j()) );
|
||||
}
|
||||
}
|
||||
System.out.println("PILE 1: \n"+pile1.toString());
|
||||
System.out.println("PILE 2: \n"+pile2.toString());
|
||||
} // end WHILE
|
||||
|
||||
alignments1 = pile1;
|
||||
|
|
@ -327,6 +532,56 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
}
|
||||
*/
|
||||
}
|
||||
|
||||
public List<MultipleAlignment> doMultipleAlignment2(IndexedSequence[] seqs) {
|
||||
|
||||
initPairwiseAlignments(seqs);
|
||||
|
||||
List<MultipleAlignment> piles = new LinkedList<MultipleAlignment>();
|
||||
|
||||
int npiles = seqs.length;
|
||||
|
||||
for ( int i = 0 ; i < seqs.length ; i++ ) {
|
||||
MultipleAlignment m = new MultipleAlignment();
|
||||
m.add(seqs[i].getSequence(),i);
|
||||
piles.add(m);
|
||||
}
|
||||
|
||||
while ( npiles > 2 ) {
|
||||
Matrix<SelectedPair> dist = averageClosestDistanceMatrix(piles,npiles);
|
||||
int best_i = -1;
|
||||
int best_j = -1;
|
||||
int pile_i = -1;
|
||||
int pile_j = -1;
|
||||
double d = 1e100;
|
||||
for ( int i = 0 ; i < npiles ; i++ ) {
|
||||
for ( int j = i+1 ; j < npiles ; j++ ) {
|
||||
SelectedPair p = dist.get(i,j);
|
||||
if ( p.d() < d ) {
|
||||
d = p.d();
|
||||
pile_i = i;
|
||||
pile_j = j;
|
||||
best_i = p.i();
|
||||
best_j = p.j();
|
||||
}
|
||||
}
|
||||
}
|
||||
// System.out.println("joining pile "+pile_i +" and pile " + pile_j +" on seqs " + best_i +" and " + best_j );
|
||||
// got the closest pair
|
||||
piles.get(pile_i).add(piles.get(pile_j),alignments.get(best_i,best_j));
|
||||
// System.out.println("JOINED PILE: \n"+piles.get(pile_i).toString());
|
||||
piles.remove(pile_j);
|
||||
npiles--;
|
||||
}
|
||||
|
||||
alignments1 = piles.get(0);
|
||||
alignments2 = piles.get(1);
|
||||
|
||||
|
||||
// System.out.println("PILE 1: \n"+piles.get(0).toString());
|
||||
// System.out.println("PILE 2: \n"+piles.get(1).toString());
|
||||
return piles;
|
||||
}
|
||||
|
||||
public static IndexedSequence[] testSet1(int K) {
|
||||
IndexedSequence [] seqs = new IndexedSequence[9];
|
||||
|
|
|
|||
|
|
@ -1,81 +1,23 @@
|
|||
package org.broadinstitute.sting.indels;
|
||||
|
||||
public class StrictlyUpperTriangularMatrix {
|
||||
private double [] data;
|
||||
private int size;
|
||||
|
||||
public class StrictlyUpperTriangularMatrix extends SymmetricMatrix {
|
||||
|
||||
public StrictlyUpperTriangularMatrix(int dimension) {
|
||||
super(dimension);
|
||||
assert dimension >=2 : "Distance matrix can not be smaller than 2x2";
|
||||
if ( dimension % 2 == 0 ) {
|
||||
int k = dimension >> 1; // dimension/2
|
||||
data = new double[k*(dimension-1)];
|
||||
} else {
|
||||
int k = ( dimension -1 ) >> 1; // (dimension -1)/2
|
||||
data = new double[k*dimension];
|
||||
}
|
||||
size = dimension;
|
||||
}
|
||||
|
||||
public double get(int i, int j) {
|
||||
assert (i < size) && (j<size) : "Out of bound index into distance matrix detected";
|
||||
if ( i >= j ) return 0.0;
|
||||
|
||||
// we are guaranteed now that i < j; now
|
||||
// translate i,j into the linear offset into our internal data array and return the value:
|
||||
return data[ linearOffset(i,j) ];
|
||||
return super.get(i,j);
|
||||
}
|
||||
|
||||
public void set(int i, int j, double value) {
|
||||
assert (i < size) && (j<size) : "Out of bound index into distance matrix detected";
|
||||
assert i < j : "Only i < j elements can be set in strictly upper diagonal matrix" ;
|
||||
|
||||
// we are guaranteed now that i < j; now
|
||||
// translate i,j into the linear offset into our internal data array and set the value
|
||||
data[ linearOffset(i,j) ] = value;
|
||||
super.set(i,j,value);
|
||||
}
|
||||
|
||||
public int size() { return size; }
|
||||
|
||||
/** Returns ready-to-print string representing the full matrix (don't use for 1000x1000 matrices!!);
|
||||
* each element is formatted according to a default format.
|
||||
* @return
|
||||
* @see format(String f)
|
||||
*/
|
||||
public String format() {
|
||||
return format("%6.3f ");
|
||||
}
|
||||
|
||||
/** Returns ready-to-print string representing the full matrix (don't use for 1000x1000 matrices!!);
|
||||
* each element is formatted according to a specified format string (note: format string must include all desired
|
||||
* whitespaces before and/or after an element, as this method itself does not add any spaces between the formatted elements).
|
||||
* @return
|
||||
* @see format()
|
||||
*/
|
||||
public String format(String f) {
|
||||
StringBuilder b = new StringBuilder();
|
||||
java.util.Formatter frm = new java.util.Formatter(b);
|
||||
for ( int i = 0 ; i < size ; i++ ) {
|
||||
for ( int j = 0 ; j < size ; j++ ) {
|
||||
if ( i < j ) frm.format(f, get(i,j));
|
||||
else frm.format(f, 0.0);
|
||||
}
|
||||
b.append('\n');
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
/** Computes linear offset into the array that keeps actual data given "row" and "column" indices into the matrix
|
||||
* this class represents; this method is unchecked, but it expects i < j otherwise the result will be incorrect.
|
||||
* @param i row index
|
||||
* @param j column index
|
||||
* @return linear offset into the data[] member of this class
|
||||
*/
|
||||
private int linearOffset(int i, int j) {
|
||||
int k = (( size << 1 ) - i - 1)*i; // [ 2*d - (i+1) ] * i
|
||||
k >>= 1; // k/=2
|
||||
// now k is the offset of the first stored element in row i
|
||||
return ( k + (j - i - 1));
|
||||
}
|
||||
|
||||
|
||||
private static void testMe() {
|
||||
StrictlyUpperTriangularMatrix m = new StrictlyUpperTriangularMatrix(3);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,87 @@
|
|||
package org.broadinstitute.sting.indels;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Mar 22, 2009
|
||||
* Time: 3:43:05 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class SymmetricMatrix {
|
||||
|
||||
protected double [] data;
|
||||
protected int size;
|
||||
|
||||
public SymmetricMatrix(int dimension) {
|
||||
assert dimension >= 0 : "Matrix size can not be negative";
|
||||
if ( dimension % 2 == 0 ) {
|
||||
int k = dimension >> 1; // dimension/2
|
||||
data = new double[k*(dimension+1)];
|
||||
} else {
|
||||
int k = ( dimension + 1 ) >> 1; // (dimension + 1)/2
|
||||
data = new double[k*dimension];
|
||||
}
|
||||
size = dimension;
|
||||
}
|
||||
|
||||
public double get(int i, int j) {
|
||||
assert (i < size) && ( j < size) : "Out of bound index into matrix detected";
|
||||
if ( i >= j ) return data[linearOffset(j,i)]; // we store only the upper triangle in memory
|
||||
return data[ linearOffset(i,j) ];
|
||||
}
|
||||
|
||||
public void set(int i, int j, double value) {
|
||||
assert (i < size) && (j < size) : "Out of bound index into matrix detected";
|
||||
|
||||
if ( i >= j ) data[ linearOffset(j,i) ] = value;
|
||||
else data[ linearOffset(i,j) ] = value;
|
||||
}
|
||||
|
||||
public int size() { return size; }
|
||||
|
||||
public int nRows() { return size; }
|
||||
public int nCols() { return size; }
|
||||
|
||||
/** Returns ready-to-print string representing the full matrix (don't use for 1000x1000 matrices!!);
|
||||
* each element is formatted according to a default format.
|
||||
* @return
|
||||
* @see #format(String f)
|
||||
*/
|
||||
public String format() {
|
||||
return format("%6.3f ");
|
||||
}
|
||||
|
||||
|
||||
/** Returns ready-to-print string representing the full matrix (don't use for 1000x1000 matrices!!);
|
||||
* each element is formatted according to a specified format string (note: format string must include all desired
|
||||
* whitespaces before and/or after an element, as this method itself does not add any spaces between the formatted elements).
|
||||
* @return
|
||||
* @see #format()
|
||||
*/
|
||||
public String format(String f) {
|
||||
StringBuilder b = new StringBuilder();
|
||||
java.util.Formatter frm = new java.util.Formatter(b);
|
||||
for ( int i = 0 ; i < size ; i++ ) {
|
||||
for ( int j = 0 ; j < size ; j++ ) {
|
||||
frm.format(f, get(i,j));
|
||||
}
|
||||
b.append('\n');
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
|
||||
/** Computes linear offset into the internal array that keeps actual data, given "row" and "column" indices
|
||||
* into the matrix; this method is unchecked, but it expects i <= j otherwise the result is unspecified.
|
||||
* @param i row index
|
||||
* @param j column index
|
||||
* @return linear offset into the data[] member of this class
|
||||
*/
|
||||
private int linearOffset(int i, int j) {
|
||||
int k = (( size << 1 ) - i + 1)*i; // [ 2*d - (i+1) ] * i
|
||||
k >>= 1; // k/=2
|
||||
// now k is the offset of the first stored element in row i
|
||||
return ( k + j - i );
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue