git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@184 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8bdf49a01f
commit
40f45c2333
|
|
@ -0,0 +1,178 @@
|
|||
package org.broadinstitute.sting.playground.indels;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||
import org.broadinstitute.sting.playground.utils.CountedObject;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Mar 25, 2009
|
||||
* Time: 12:15:38 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class AlignmentUtils {
|
||||
|
||||
/** Computes number of mismatches in the read alignment to the refence <code>ref</code>
|
||||
* specified in the record <code>r</code>. Indels are completely <i>ignored</i> by this method:
|
||||
* only base mismatches in the alignment segments where both sequences are present are counted.
|
||||
* @param r
|
||||
* @param ref
|
||||
* @return
|
||||
*/
|
||||
public static int numMismatches(SAMRecord r, ReferenceSequence ref) {
|
||||
if ( r.getReadUnmappedFlag() ) return 1000000;
|
||||
int i_ref = r.getAlignmentStart()-1; // position on the ref
|
||||
int i_read = 0; // position on the read
|
||||
int mm_count = 0; // number of mismatches
|
||||
Cigar c = r.getCigar();
|
||||
for ( int k = 0 ; k < c.numCigarElements() ; k++ ) {
|
||||
CigarElement ce = c.getCigarElement(k);
|
||||
switch( ce.getOperator() ) {
|
||||
case M:
|
||||
for ( int l = 0 ; l < ce.getLength() ; l++, i_ref++, i_read++ ) {
|
||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) != 'N' ) continue; // do not count N's ?
|
||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) !=
|
||||
Character.toUpperCase((char)ref.getBases()[i_ref]) ) mm_count++;
|
||||
}
|
||||
break;
|
||||
case I: i_read += ce.getLength(); break;
|
||||
case D: i_ref += ce.getLength(); break;
|
||||
default: throw new RuntimeException("Unrecognized cigar element");
|
||||
}
|
||||
|
||||
}
|
||||
return mm_count;
|
||||
}
|
||||
|
||||
public static int numMismatches(SAMRecord r, String ref) {
|
||||
if ( r.getReadUnmappedFlag() ) return 1000000;
|
||||
int i_ref = r.getAlignmentStart()-1; // position on the ref
|
||||
int i_read = 0; // position on the read
|
||||
int mm_count = 0; // number of mismatches
|
||||
Cigar c = r.getCigar();
|
||||
for ( int k = 0 ; k < c.numCigarElements() ; k++ ) {
|
||||
CigarElement ce = c.getCigarElement(k);
|
||||
switch( ce.getOperator() ) {
|
||||
case M:
|
||||
for ( int l = 0 ; l < ce.getLength() ; l++, i_ref++, i_read++ ) {
|
||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) == 'N' ) continue; // do not count N's ?
|
||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) !=
|
||||
Character.toUpperCase(ref.charAt(i_ref)) ) mm_count++;
|
||||
}
|
||||
break;
|
||||
case I: i_read += ce.getLength(); break;
|
||||
case D: i_ref += ce.getLength(); break;
|
||||
default: throw new RuntimeException("Unrecognized cigar element");
|
||||
}
|
||||
|
||||
}
|
||||
return mm_count;
|
||||
}
|
||||
|
||||
/** Reads through the alignment cigar and returns all indels found in the alignment as a collection
|
||||
* of Indel objects.
|
||||
* @param c
|
||||
* @param start
|
||||
* @return
|
||||
*/
|
||||
public static Collection<Indel> extractIndels(final Cigar c, final int start) {
|
||||
//
|
||||
// firstpos,lastpos span of the indel will be interpreted as follows:
|
||||
// any alignment that ends strictly before firstpos or starts strictly after lastpos
|
||||
// on the *reference* (not inclusive!) does not overlap with an indel; in the case of
|
||||
// insertion it will result in firstpos > lastpos!
|
||||
// lastpos
|
||||
// | firstpos
|
||||
// | |
|
||||
// v v
|
||||
// ---------III----- Ref Insertion: bases I are not in the ref; any alignment that starts
|
||||
// after lastpos or ends before firstpos *on the reference*
|
||||
// is completely over the reference bases to the right or to
|
||||
// the left, respectively, of the insertion site
|
||||
//
|
||||
// firstpos
|
||||
// | lastpos
|
||||
// | |
|
||||
// v v
|
||||
//------------------ Ref Deletion: any alignment that ends before firstpos or starts after lastpos
|
||||
// -----DDD--- alignment on the reference does not overlap with the deletion
|
||||
int runninglength = start; // position on the original reference; start = alignment start position
|
||||
|
||||
List<Indel> indels = new ArrayList<Indel>(4);
|
||||
|
||||
if ( c.numCigarElements() <= 1 ) return indels; // most of the reads have no indels, save a few cycles by returning early
|
||||
|
||||
for ( int i = 0 ; i < c.numCigarElements() ; i++ ) {
|
||||
|
||||
final CigarElement ce = c.getCigarElement(i);
|
||||
Indel curr_indel = null;
|
||||
|
||||
switch(ce.getOperator()) {
|
||||
case I: curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.I); break;
|
||||
case D: curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.D);
|
||||
runninglength += ce.getLength();
|
||||
break;
|
||||
case M: runninglength += ce.getLength(); break; // advance along the gapless block in the alignment
|
||||
default :
|
||||
throw new IllegalArgumentException("Unexpected operator in cigar string");
|
||||
}
|
||||
|
||||
if ( curr_indel == null ) continue; // element was not an indel, go grab next element...
|
||||
|
||||
indels.add(curr_indel); // this is a new indel. Add it.
|
||||
} // end for loop over all alignment cigar elements
|
||||
|
||||
return indels;
|
||||
} // end extractIndels() method
|
||||
|
||||
/** Reads through the alignment specified in the record and returns all indels found in the alignment as a collection
|
||||
* of Indel objects. If read is not mapped, silently returns an empty collection.
|
||||
* @param r
|
||||
* @return
|
||||
*/
|
||||
public static Collection<Indel> extractIndels(SAMRecord r) {
|
||||
if ( r.getReadUnmappedFlag() ) return new ArrayList<Indel>();
|
||||
return extractIndels(r.getCigar(), r.getAlignmentStart());
|
||||
}
|
||||
|
||||
/** Extracts indels from the specified record (@see #extractIndels()) and updates the provided tree object.
|
||||
* Multiple occurences of the same indel (i.e. with support from multiple reads) will be grouped together
|
||||
* in one counted object held by the set.
|
||||
*
|
||||
* @param r
|
||||
* @param t
|
||||
*/
|
||||
public static void collectAndCountIndels(SAMRecord r, TreeSet<CountedObject<Indel> > t) {
|
||||
Collection<Indel> indels = AlignmentUtils.extractIndels(r);
|
||||
for ( Indel ind : indels ) {
|
||||
CountedObject<Indel> ci = new CountedObject<Indel>(ind);
|
||||
CountedObject<Indel> found = t.floor(ci);
|
||||
|
||||
if ( ci.equals( found ) ) found.increment(); // we did find our indel, advance the counter
|
||||
else t.add(ci); // this is a new indel. Add it.
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static String toString(Cigar cig) {
|
||||
StringBuilder b = new StringBuilder();
|
||||
|
||||
for ( int i = 0 ; i < cig.numCigarElements() ; i++ ) {
|
||||
char c='?';
|
||||
switch ( cig.getCigarElement(i).getOperator() ) {
|
||||
case M : c = 'M'; break;
|
||||
case D : c = 'D'; break;
|
||||
case I : c = 'I'; break;
|
||||
}
|
||||
b.append(c);
|
||||
b.append(cig.getCigarElement(i).getLength());
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -132,7 +132,7 @@ public class ConsensusSequence {
|
|||
*/
|
||||
public char baseAt(int offset) {
|
||||
assert offset >= startOffset && offset < startOffset + coverage.size() : "Offset out of bounds";
|
||||
int [] cov = coverage.get(offset+(int)referencePos);
|
||||
int [] cov = coverage.get(offset-startOffset);
|
||||
int total_cov = cov[0] + cov[1] + cov[2] + cov[3];
|
||||
int bmax = 0;
|
||||
char base = 'N';
|
||||
|
|
@ -153,7 +153,7 @@ public class ConsensusSequence {
|
|||
*/
|
||||
public Pair<Character,Integer> baseWithCountAt(int offset) {
|
||||
assert offset >= startOffset && offset < startOffset + coverage.size() : "Offset out of bounds";
|
||||
int [] cov = coverage.get(offset+(int)referencePos);
|
||||
int [] cov = coverage.get(offset-startOffset);
|
||||
int total_cov = cov[0] + cov[1] + cov[2] + cov[3];
|
||||
int bmax = 0;
|
||||
char base = 'N';
|
||||
|
|
@ -174,7 +174,7 @@ public class ConsensusSequence {
|
|||
*/
|
||||
public int coverageAt(int offset) {
|
||||
if ( offset < startOffset || offset >= startOffset + coverage.size() ) return 0;
|
||||
int [] cov = coverage.get(offset+(int)referencePos);
|
||||
int [] cov = coverage.get(offset-startOffset);
|
||||
return cov[0]+cov[1]+cov[2]+cov[3];
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -181,33 +181,7 @@ public class IndelInspector extends CommandLineProgram {
|
|||
return errs;
|
||||
}
|
||||
|
||||
private static int numMismatchesDirect(SAMRecord r, ReferenceSequence ref) {
|
||||
if ( r.getReadUnmappedFlag() ) return 1000000;
|
||||
int i_ref = r.getAlignmentStart()-1; // position on the ref
|
||||
int i_read = 0; // position on the read
|
||||
int mm_count = 0; // number of mismatches
|
||||
Cigar c = r.getCigar();
|
||||
for ( int k = 0 ; k < c.numCigarElements() ; k++ ) {
|
||||
CigarElement ce = c.getCigarElement(k);
|
||||
switch( ce.getOperator() ) {
|
||||
case M:
|
||||
for ( int l = 0 ; l < ce.getLength() ; l++ ) {
|
||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) !=
|
||||
Character.toUpperCase((char)ref.getBases()[i_ref]) ) mm_count++;
|
||||
i_ref++;
|
||||
i_read++;
|
||||
}
|
||||
break;
|
||||
case I: i_read += ce.getLength(); break;
|
||||
case D: i_ref += ce.getLength(); break;
|
||||
default: throw new RuntimeException("Unrecognized cigar element");
|
||||
}
|
||||
|
||||
}
|
||||
return mm_count;
|
||||
}
|
||||
|
||||
/** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD */
|
||||
/** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD */
|
||||
private static int numMismatches(SAMRecord r) throws RuntimeException {
|
||||
|
||||
// NM currently stores the total number of mismatches in all blocks + 1
|
||||
|
|
|
|||
|
|
@ -191,8 +191,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
|||
* @param r
|
||||
* @throws RuntimeException
|
||||
*/
|
||||
@Override
|
||||
public void receive(final SAMRecord r) throws RuntimeException {
|
||||
public void receive(final SAMRecord r) throws RuntimeException {
|
||||
|
||||
if ( r.getReadUnmappedFlag() ) return; // read did not align, nothing to do
|
||||
|
||||
|
|
|
|||
|
|
@ -151,8 +151,20 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
return alignment_offsets.get(index.get(id));
|
||||
}
|
||||
|
||||
/** Returns external id of the read the offsets of this multiple alignment are based upon (i.e. all the offsets
|
||||
* are specified wrt the base read).
|
||||
* @return
|
||||
*/
|
||||
public int getBaseReadId() { return ext_ids.get(0); }
|
||||
|
||||
|
||||
/** Returns offset of the read specified by its external id wrt the start of the consensus sequence in this
|
||||
* multiple alignment (consenus sequence is a major vote union of all the reads in this alignment).
|
||||
* @param id
|
||||
* @return
|
||||
*/
|
||||
public int getOffsetWrtConsensus(int id) {
|
||||
return getOffsetById (id)- consensus.getStartOffset();
|
||||
}
|
||||
|
||||
/** Returns true if the alignment already contains sequence with the specified id.
|
||||
*
|
||||
|
|
@ -235,49 +247,16 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
|
||||
if ( seqs.size() == 0 ) return b.toString();
|
||||
|
||||
// int first_offset = 0; // offset of the first sequence wrt the start of the MSA
|
||||
final int first_offset = -consensus.getStartOffset();
|
||||
// int msa_length = 0;
|
||||
// for ( int i = 0 ; i < seqs.size() ; i++ ) {
|
||||
// if ( -alignment_offsets.get(i) > first_offset ) first_offset = -alignment_offsets.get(i);
|
||||
// msa_length = Math.max( alignment_offsets.get(i)+seqs.get(i).length() , msa_length );
|
||||
// }
|
||||
|
||||
// msa_length += first_offset;
|
||||
final int msa_length = consensus.length();
|
||||
// int [] cov = new int[4];
|
||||
// char[] bases = { 'A' , 'C', 'G', 'T' };
|
||||
char[][] consensusString = new char[4][msa_length];
|
||||
|
||||
for ( int i = -first_offset ; i < msa_length - first_offset ; i++ ) {
|
||||
// cov[0] = cov[1] = cov[2] = cov[3] = 0;
|
||||
// for ( int j = 0 ; j < seqs.size(); j++ ) {
|
||||
// // offset of the sequence j wrt start of the msa region
|
||||
// int seq_offset = first_offset + alignment_offsets.get(j);
|
||||
// if ( i < seq_offset || i >= seq_offset + seqs.get(j).length() ) continue; // sequence j has no bases at position i
|
||||
// int base = -1;
|
||||
// switch( Character.toUpperCase(seqs.get(j).charAt(i-seq_offset)) ) {
|
||||
// case 'A': base = 0; break;
|
||||
// case 'C': base = 1 ; break;
|
||||
// case 'G': base = 2 ; break;
|
||||
// case 'T': base = 3 ; break;
|
||||
// }
|
||||
// if ( base >= 0 ) cov[base]++;
|
||||
// }
|
||||
// int total_cov = cov[0] + cov[1] + cov[2] + cov[3];
|
||||
// int bmax = 0;
|
||||
// int mm = 0;
|
||||
for ( int i = 0 ; i < msa_length ; i++ ) {
|
||||
|
||||
Pair<Character,Integer> base = consensus.baseWithCountAt(i-first_offset);
|
||||
consensusString[3][i] = base.first;
|
||||
// for ( int z = 0; z < 4 ; z++ ) {
|
||||
// if ( cov[z] > bmax ) {
|
||||
// bmax = cov[z];
|
||||
// consensus[3][i] = bases[z];
|
||||
// mm = total_cov - bmax;
|
||||
// }
|
||||
// }
|
||||
int mm = consensus.coverageAt(i) - base.second;
|
||||
int mm = consensus.coverageAt(i-first_offset) - base.second;
|
||||
if ( mm > 0 ) {
|
||||
consensusString[2][i] = '*';
|
||||
if ( mm > 9 ) consensusString[0][i] = Character.forDigit(mm/10,10);
|
||||
|
|
@ -307,6 +286,10 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
return b.toString();
|
||||
}
|
||||
|
||||
public String getConsensus() {
|
||||
return consensus.getSequence();
|
||||
}
|
||||
|
||||
public String toString() { return toString(true); }
|
||||
|
||||
public int size() { return seqs.size(); }
|
||||
|
|
@ -338,7 +321,6 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<Integer> iterator() {
|
||||
return sequenceIdIterator();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -128,10 +128,26 @@ public class PairwiseAlignment {
|
|||
// lets extend these kmer matches and count mismatches:
|
||||
|
||||
for ( Integer trial_offset : offsets ) {
|
||||
int mm_cnt = countMismatches(is1.getSequence(), is2.getSequence(), trial_offset,next_mm);
|
||||
if ( mm_cnt < best_mm ) {
|
||||
int mm_cnt = countMismatches(is1.getSequence(), is2.getSequence(), trial_offset,next_mm+1);
|
||||
// if ( (i1==4||i1==8) && i2==18) {
|
||||
// if ( i1== 18 ) System.out.print("to " + i2+" : ");
|
||||
// else System.out.print("to " + i1+" : ");
|
||||
// System.out.println("offset="+trial_offset.toString()+
|
||||
// "; mm=" + countMismatches(is1.getSequence(),is2.getSequence(),trial_offset)+
|
||||
// "(mm_cnt="+mm_cnt+")"+
|
||||
// "; dist="+distance(is1.getSequence(),is2.getSequence(),trial_offset)+
|
||||
// "; overlap="+getOverlap(is1.getSequence(),is2.getSequence(),trial_offset));
|
||||
// }
|
||||
// save current offset if alignment at this offset has fewer mismatches tham everything we've
|
||||
// seen so far, or if it has same number of mismatches but has larger overlap (i.e. distance
|
||||
// between sequences is smaller)
|
||||
if ( mm_cnt < best_mm ||
|
||||
( ( mm_cnt == best_mm ) &&
|
||||
getOverlap(is1.getSequence(),is2.getSequence(),alignment_offset) <
|
||||
0.8*getOverlap(is1.getSequence(),is2.getSequence(),trial_offset) ) ) {
|
||||
// if ( (i1==4||i1==8) && i2==18) System.out.println("Saved offset "+trial_offset.toString());
|
||||
alignment_offset = trial_offset;
|
||||
next_mm = best_mm;
|
||||
next_mm = best_mm;
|
||||
best_mm = mm_cnt;
|
||||
} else {
|
||||
if ( mm_cnt < next_mm ) next_mm = mm_cnt;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,13 @@
|
|||
package org.broadinstitute.sting.playground.indels;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.TextCigarCodec;
|
||||
|
||||
import java.util.*;
|
||||
import org.broadinstitute.sting.utils.PrimitivePair;
|
||||
import org.broadinstitute.sting.playground.utils.CountedObject;
|
||||
|
||||
|
||||
public class PileBuilder implements RecordPileReceiver {
|
||||
|
|
@ -84,28 +88,35 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
public void receive(Collection<SAMRecord> c) {
|
||||
IndexedSequence[] seqs = new IndexedSequence[c.size()];
|
||||
int i = 0;
|
||||
int left = 1000000000;
|
||||
int right = 0;
|
||||
int startOnRef = 1000000000; // absolute start (leftmost) position of the pile of reads on the ref
|
||||
int stopOnRef = 0; // absolute stop (rightmost) position of the pile of reads on the ref (rightmost alignment end)
|
||||
for ( SAMRecord r : c ) {
|
||||
seqs[i++] = new IndexedSequence(r.getReadString(),KmerSize);
|
||||
left = Math.min(left, r.getAlignmentStart() );
|
||||
right = Math.max(right,r.getAlignmentEnd());
|
||||
startOnRef = Math.min(startOnRef, r.getAlignmentStart() );
|
||||
stopOnRef = Math.max(stopOnRef,r.getAlignmentEnd());
|
||||
}
|
||||
|
||||
SequencePile originalAligns = new SequencePile(referenceSequence.substring(left-1,right));
|
||||
// part of the reference covered by original alignments:
|
||||
String pileRef = referenceSequence.substring(startOnRef-1,stopOnRef);
|
||||
|
||||
int totalMismatches = 0; // total mismatches across all reads
|
||||
TreeSet< CountedObject<Indel> > all_indels = new TreeSet< CountedObject<Indel> >();
|
||||
|
||||
SequencePile originalAligns = new SequencePile(pileRef);
|
||||
for ( SAMRecord r : c ) {
|
||||
originalAligns.addAlignedSequence(r.getReadString(), r.getReadNegativeStrandFlag(),
|
||||
r.getCigar(), r.getAlignmentStart() - left );
|
||||
r.getCigar(), r.getAlignmentStart() - startOnRef );
|
||||
totalMismatches += AlignmentUtils.numMismatches(r,referenceSequence);
|
||||
AlignmentUtils.collectAndCountIndels(r,all_indels);
|
||||
}
|
||||
|
||||
System.out.println("\n#############################################################################");
|
||||
System.out.println("ORIGINAL ALIGNMENT: \n");
|
||||
originalAligns.colorprint(true);
|
||||
originalAligns.dotprint(true);
|
||||
System.out.println("\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") ;
|
||||
|
||||
List<MultipleAlignment> piles = doMultipleAlignment2(seqs);
|
||||
|
||||
|
||||
|
||||
// System.out.print("Distance between final piles: "+distance(alignments1, alignments2));
|
||||
// System.out.print("; diameter of PILE1: "+ diameter(alignments1));
|
||||
// System.out.println("; diameter of PILE2: "+ diameter(alignments2));
|
||||
|
|
@ -120,10 +131,68 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
d.set(n,m,distance(piles.get(n), piles.get(m)));
|
||||
}
|
||||
}
|
||||
|
||||
int new_mismatches = 0 ; // number of mismatches after re-alignment:
|
||||
TreeSet< CountedObject<Indel> > new_indels = new TreeSet< CountedObject<Indel> >(); // new indels after realignment
|
||||
int shifted_reads = 0;
|
||||
|
||||
List<SAMRecord> as_list = (List<SAMRecord>)c; // ugly hack; need this to access records by ids
|
||||
|
||||
System.out.println(d.format("%8.4g"));
|
||||
for ( int n = 0 ; n < piles.size() ; n++ ) {
|
||||
System.out.println("PILE " + n +"\n" +piles.get(n).toString());
|
||||
// SWPairwiseAlignment consToRef = new SWPairwiseAlignment(pileRef,piles.get(n).getConsensus(),2.0,-10.0,-2.0,-1.0);
|
||||
SWPairwiseAlignment consToRef = new SWPairwiseAlignment(pileRef,piles.get(n).getConsensus(),3.0,-1.0,-4,-0.5);
|
||||
|
||||
MultipleAlignment ma = piles.get(n);
|
||||
for ( Integer id : ma ) {
|
||||
SAMRecord r = as_list.get(id);
|
||||
int cons_offset = ma.getOffsetWrtConsensus(id); // offset of the read 'id' wrt multiple alignment's full consensus seq
|
||||
int ref_offset = cons_offset + startOnRef + consToRef.getAlignmentStart2wrt1();
|
||||
if ( ref_offset != r.getAlignmentStart()) shifted_reads++;
|
||||
Cigar cig = buildCigar(cons_offset, r.getReadLength(), consToRef.getCigar());
|
||||
System.out.println(AlignmentUtils.toString(cig));
|
||||
}
|
||||
|
||||
System.out.println("PILE " + n + " to REF ("+ (consToRef.getCigar().numCigarElements()-1)/2 +" indels):");
|
||||
System.out.println(consToRef.toString());
|
||||
System.out.println("PILE " + n +" (READS):\n" +piles.get(n).toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Assuming that a read of length l has a gapless, fully consumed align starting at s (ZERO-based) to some sequence X,
|
||||
* and that sequence's alignment to some reference Y is described by baseCigar, builds a cigar for the direct
|
||||
* alignment of the read to Y (i.e. if the alignment of X to Y contains indel(s) and the read spans them, the
|
||||
* indels will be inserted into the new cigar for read-Y alignment).
|
||||
* @param s
|
||||
* @param l
|
||||
* @param baseCigar
|
||||
* @return
|
||||
*/
|
||||
private Cigar buildCigar(int s, int l, Cigar baseCigar) {
|
||||
int readpos = 0;
|
||||
int refpos = 0;
|
||||
|
||||
List<CigarElement> lce = new ArrayList<CigarElement>(5); // enough to keep 2 indels. should cover 99.999% of cases...
|
||||
|
||||
CigarElement celem = null;
|
||||
int i = 0;
|
||||
while ( refpos <= s ) {
|
||||
celem = baseCigar.getCigarElement(i);
|
||||
refpos+=celem.getLength();
|
||||
i++;
|
||||
}
|
||||
// we now sit on cigar element that contains start s, and refpos points to the end of that element; i points to next element
|
||||
|
||||
lce.add( new CigarElement(Math.min(refpos-s,l),celem.getOperator()) );
|
||||
|
||||
while ( refpos < s+l ) {
|
||||
celem = baseCigar.getCigarElement(i);
|
||||
lce.add( new CigarElement(Math.min(celem.getLength(),l - refpos), celem.getOperator()) );
|
||||
refpos += celem.getLength();
|
||||
i++;
|
||||
}
|
||||
return new Cigar(lce);
|
||||
}
|
||||
|
||||
public void initPairwiseAlignments( IndexedSequence [] seqs ) {
|
||||
|
|
@ -308,7 +377,7 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
return d_av;
|
||||
}
|
||||
|
||||
/** Computes average distance from sequence i to multiple alignment a for the specified offset of i wrt a
|
||||
/** Computes average distance from sequence i to multiple alignment a for the specified offset of 'i' wrt 'a'
|
||||
* and returns that distance and pair of sequence indices, on which the specified offset is realized
|
||||
* @param a
|
||||
* @param i
|
||||
|
|
|
|||
|
|
@ -24,20 +24,33 @@ public class SWPairwiseAlignment {
|
|||
private int alignment_offset; // offset of s2 w/respect to s1
|
||||
private Cigar alignmentCigar;
|
||||
|
||||
private double w_match;
|
||||
private double w_mismatch;
|
||||
private double w_open;
|
||||
private double w_extend;
|
||||
|
||||
private int best_mm; // mismatch count
|
||||
private static final int IMPOSSIBLE = 1000000000;
|
||||
private static final int MSTATE = 0;
|
||||
private static final int ISTATE = 1;
|
||||
private static final int DSTATE = 2;
|
||||
|
||||
public SWPairwiseAlignment(String seq1, String seq2, int id1, int id2 ) {
|
||||
public SWPairwiseAlignment(String seq1, String seq2, int id1, int id2, double match, double mismatch, double open, double extend ) {
|
||||
s1 = seq1;
|
||||
s2 = seq2;
|
||||
i1 = id1;
|
||||
i2 = id2;
|
||||
w_match = match;
|
||||
w_mismatch = mismatch;
|
||||
w_open = open;
|
||||
w_extend = extend;
|
||||
best_mm = IMPOSSIBLE;
|
||||
//next_mm = IMPOSSIBLE;
|
||||
align2(s1,s2);
|
||||
align4(s1,s2);
|
||||
}
|
||||
|
||||
public SWPairwiseAlignment(String seq1, String seq2, int id1, int id2) {
|
||||
this(seq1,seq2,id1,id2,1.0,-1.0/3.0,-1.0-1.0/3.0,-1.0/3.0); // match=1, mismatch = -1/3, gap=-(1+k/3)
|
||||
}
|
||||
|
||||
/** Initializes the alignment with pair of sequences (that will be immediately aligned) and
|
||||
|
|
@ -48,6 +61,10 @@ public class SWPairwiseAlignment {
|
|||
this(seq1,seq2,-1,-1);
|
||||
}
|
||||
|
||||
public SWPairwiseAlignment(String seq1, String seq2, double match, double mismatch, double open, double extend) {
|
||||
this(seq1,seq2,-1,-1,match,mismatch,open, extend);
|
||||
}
|
||||
|
||||
public Cigar getCigar() { return alignmentCigar ; }
|
||||
|
||||
public int getAlignmentStart2wrt1() { return alignment_offset; }
|
||||
|
|
@ -258,6 +275,8 @@ public class SWPairwiseAlignment {
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/** Allows for separate gap opening and extension penalties, with backtracking.
|
||||
*
|
||||
* @param a
|
||||
|
|
@ -405,6 +424,143 @@ public void align3(String a, String b) {
|
|||
alignment_offset = p.first - p.second;
|
||||
}
|
||||
|
||||
public void align4(String a, String b) {
|
||||
int n = a.length();
|
||||
int m = b.length();
|
||||
double [][] sw = new double[n+1][m+1];
|
||||
|
||||
int [][] btrack = new int[n+1][m+1];
|
||||
|
||||
// build smith-waterman matrix and keep backtrack info:
|
||||
for ( int i = 1 ; i < n+1 ; i++ ) {
|
||||
char a_base = Character.toUpperCase(a.charAt(i-1)); // letter in a at the current pos
|
||||
for ( int j = 1 ; j < m+1 ; j++ ) {
|
||||
char b_base = Character.toUpperCase(b.charAt(j-1)); // letter in b at the current pos
|
||||
double step_diag = sw[i-1][j-1] + wd(a_base,b_base);
|
||||
double step_down = 0.0 ;
|
||||
int kd = 0;
|
||||
for ( int k = 1 ; k < i ; k++ ) {
|
||||
if ( step_down < sw[i-k][j]+wk(a_base,'-',k) ) {
|
||||
step_down=sw[i-k][j]+wk(a_base,'-',k);
|
||||
kd = k;
|
||||
}
|
||||
}
|
||||
|
||||
double step_right = 0;
|
||||
int ki = 0;
|
||||
for ( int k = 1 ; k < j ; k++ ) {
|
||||
if ( step_right < sw[i][j-k]+wk('-',b_base,k) ) {
|
||||
step_right=sw[i][j-k]+wk('-',b_base,k);
|
||||
ki = k;
|
||||
}
|
||||
}
|
||||
|
||||
if ( step_down > step_right ) {
|
||||
if ( step_down > step_diag ) {
|
||||
sw[i][j] = Math.max(0,step_down);
|
||||
btrack[i][j] = kd; // positive=vertical
|
||||
}
|
||||
else {
|
||||
sw[i][j] = Math.max(0,step_diag);
|
||||
btrack[i][j] = 0; // 0 = diagonal
|
||||
}
|
||||
} else {
|
||||
// step_down < step_right
|
||||
if ( step_right > step_diag ) {
|
||||
sw[i][j] = Math.max(0,step_right);
|
||||
btrack[i][j] = -ki; // negative = horizontal
|
||||
} else {
|
||||
sw[i][j] = Math.max(0,step_diag);
|
||||
btrack[i][j] = 0; // 0 = diagonal
|
||||
}
|
||||
}
|
||||
sw[i][j] = Math.max(0, Math.max(step_diag,Math.max(step_down,step_right)));
|
||||
}
|
||||
}
|
||||
|
||||
// print(sw,a,b);
|
||||
|
||||
PrimitivePair.Int p = new PrimitivePair.Int();
|
||||
double maxscore = 0.0;
|
||||
// look for largest score. we use >= combined with the traversal direction
|
||||
// to ensure that if two scores are equal, the one closer to diagonal gets picked
|
||||
for ( int i = 1 ; i < n+1 ; i++ ) {
|
||||
if ( sw[i][m] >= maxscore ) { p.first = i; p.second = m ; maxscore = sw[i][m]; }
|
||||
}
|
||||
for ( int j = 1 ; j < m+1 ; j++ ) {
|
||||
if ( sw[n][j] > maxscore ||
|
||||
sw[n][j] == maxscore && Math.abs(n-j) < Math.abs(p.first-p.second)) {
|
||||
p.first = n;
|
||||
p.second = j ;
|
||||
maxscore = sw[n][j];
|
||||
}
|
||||
}
|
||||
|
||||
// System.out.println("\ni="+p.first+"; j="+p.second);
|
||||
|
||||
// p holds the position we start backtracking from; we will be assembling a cigar in the backwards order
|
||||
|
||||
|
||||
// we will be placing all insertions and deletions into sequence b, so the state are named w/regard
|
||||
// to that sequence
|
||||
|
||||
int state = MSTATE;
|
||||
int segment_length = 0; // length of the segment (continuous matches, insertions or deletions)
|
||||
|
||||
double [] scores = new double[3];
|
||||
|
||||
List<CigarElement> lce = new ArrayList<CigarElement>(5);
|
||||
|
||||
do {
|
||||
|
||||
int btr = btrack[p.first][p.second];
|
||||
int step_left = ( btr < 0 ? -btr : 1);
|
||||
int step_up = ( btr > 0 ? btr : 1 );
|
||||
|
||||
int new_state = -1;
|
||||
if ( btr > 0 ) new_state = DSTATE;
|
||||
else if ( btr < 0 ) new_state = ISTATE;
|
||||
else new_state = MSTATE;
|
||||
|
||||
int step_length = 1;
|
||||
|
||||
// move to next best location in the sw matrix:
|
||||
switch( new_state ) {
|
||||
case MSTATE: p.first--; p.second--; break;
|
||||
case ISTATE: p.second-=step_left; step_length = step_left; break;
|
||||
case DSTATE: p.first-=step_up; step_length = step_up; break;
|
||||
}
|
||||
|
||||
// now let's see if the state actually changed:
|
||||
if ( new_state == state ) segment_length+=step_length;
|
||||
else {
|
||||
// state changed, lets emit previous segment, whatever it was (Insertion Deletion, or (Mis)Match).
|
||||
CigarOperator o=null;
|
||||
switch(state) {
|
||||
case MSTATE: o = CigarOperator.M; break;
|
||||
case ISTATE: o = CigarOperator.I; break;
|
||||
case DSTATE: o = CigarOperator.D; break;
|
||||
}
|
||||
CigarElement e = new CigarElement(segment_length,o);
|
||||
lce.add(e);
|
||||
segment_length = step_length;
|
||||
state = new_state;
|
||||
}
|
||||
} while ( sw[p.first][p.second] != 0 );
|
||||
|
||||
// post-process the last segment we are still keeping
|
||||
CigarOperator o=null;
|
||||
switch(state) {
|
||||
case MSTATE: o = CigarOperator.M; break;
|
||||
case ISTATE: o = CigarOperator.I; break;
|
||||
case DSTATE: o = CigarOperator.D; break;
|
||||
}
|
||||
CigarElement e = new CigarElement(segment_length,o);
|
||||
lce.add(e);
|
||||
Collections.reverse(lce);
|
||||
alignmentCigar = new Cigar(lce);
|
||||
alignment_offset = p.first - p.second;
|
||||
}
|
||||
|
||||
private int w(char x, char y) {
|
||||
if ( x == y ) return 2; // match
|
||||
|
|
@ -413,12 +569,12 @@ public void align3(String a, String b) {
|
|||
}
|
||||
|
||||
private double wd ( char x, char y ) {
|
||||
if ( x== y ) return 2.0;
|
||||
else return -1.0;
|
||||
if ( x== y ) return w_match;
|
||||
else return w_mismatch;
|
||||
}
|
||||
|
||||
private double wk(char x, char y, int k) {
|
||||
return -2.0-k/6; // gap
|
||||
return w_open+(k-1)*w_extend; // gap
|
||||
// return -1.0 ; // no extension penalty
|
||||
// return -1.0-Math.log(k+1); // weak extension penalty
|
||||
}
|
||||
|
|
@ -507,6 +663,7 @@ public void align3(String a, String b) {
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder bmm = new StringBuilder();
|
||||
StringBuilder b1 = new StringBuilder();
|
||||
StringBuilder b2 = new StringBuilder();
|
||||
|
||||
|
|
@ -516,12 +673,14 @@ public void align3(String a, String b) {
|
|||
for ( ; pos2 < -alignment_offset ; pos2++ ) {
|
||||
b1.append(' ');
|
||||
b2.append(s2.charAt(pos2));
|
||||
bmm.append(' ');
|
||||
}
|
||||
// now pos2 = -alignment_offset;
|
||||
} else {
|
||||
for ( ; pos1 < alignment_offset ; pos1++ ) {
|
||||
b2.append(' ');
|
||||
b1.append(s1.charAt(pos1));
|
||||
bmm.append(' ');
|
||||
}
|
||||
// now pos1 = alignment_offset
|
||||
}
|
||||
|
|
@ -530,73 +689,85 @@ public void align3(String a, String b) {
|
|||
CigarElement ce = getCigar().getCigarElement(i) ;
|
||||
switch( ce.getOperator() ) {
|
||||
case M:
|
||||
int z = ce.getLength();
|
||||
b1.append(s1, pos1, pos1+z);
|
||||
b2.append(s2, pos2, pos2+z);
|
||||
pos1+=z; pos2+=z;
|
||||
for ( int k = 0 ; k < ce.getLength() ; k++ ) {
|
||||
if ( Character.toUpperCase(s1.charAt(pos1)) != Character.toUpperCase(s2.charAt(pos2)) ) bmm.append('*');
|
||||
else bmm.append(' ');
|
||||
b1.append(s1.charAt(pos1++));
|
||||
b2.append(s2.charAt(pos2++));
|
||||
}
|
||||
break;
|
||||
case I:
|
||||
for ( int k = 0 ; k < ce.getLength() ; k++ ) {
|
||||
b1.append('+');
|
||||
bmm.append('+');
|
||||
b2.append(s2.charAt(pos2++));
|
||||
}
|
||||
break;
|
||||
case D:
|
||||
for ( int k = 0 ; k < ce.getLength() ; k++ ) {
|
||||
b1.append(s1.charAt(pos1++));
|
||||
bmm.append('-');
|
||||
b2.append('-');
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
bmm.append('\n');
|
||||
b1.append(s1,pos1,s1.length());
|
||||
bmm.append(b1);
|
||||
bmm.append('\n');
|
||||
b2.append(s2,pos2,s2.length());
|
||||
b1.append('\n');
|
||||
b1.append(b2);
|
||||
b1.append('\n');
|
||||
return b1.toString();
|
||||
bmm.append(b2);
|
||||
bmm.append('\n');
|
||||
return bmm.toString();
|
||||
}
|
||||
|
||||
public static void testMe() {
|
||||
// String s1 = "ACCTGGTGTATATAGGGTAAGGCTGAT";
|
||||
// String s2 = "TGTATATAGGGTAAGG";
|
||||
/* String s1 = "ACCTGGTGTATATAGGGTAAGGCTGAT";
|
||||
String s2 = "TGTATATAGGGTAAGG";
|
||||
|
||||
// String s1 = "GGTAAGGC";
|
||||
// String s2 = "GGTCTCAA";
|
||||
|
||||
// String s1 = "ACCTGGTGTATATAGGGTAAGGCTGAT";
|
||||
// String s2 = "TGTTAGGGTCTCAAGG";
|
||||
|
||||
// String s1 = "ACCTGGTGTATATAGGGTAAGGCTGAT";
|
||||
// String s2 = "TAGGGTAAGGCTGATCCATGTACCG" ;
|
||||
|
||||
String s1 = "ACCTGGTGTATATAGGGTAAGGCTGAT";
|
||||
String s2 = "CCGTATCATTACCTGGTGTATATAGG";
|
||||
|
||||
// String s1 = "GGTGTATATAGGGT" ;
|
||||
// String s2 = "TGTTAGGG";
|
||||
testMe(s1,s2);
|
||||
|
||||
s1 = "GGTAAGGC";
|
||||
s2 = "GGTCTCAA";
|
||||
|
||||
testMe(s1,s2);
|
||||
|
||||
s1 = "ACCTGGTGTATATAGGGTAAGGCTGAT";
|
||||
s2 = "TGTTAGGGTCTCAAGG";
|
||||
testMe(s1,s2);
|
||||
|
||||
|
||||
s1 = "ACCTGGTGTATATAGGGTAAGGCTGAT";
|
||||
s2 = "TAGGGTAAGGCTGATCCATGTACCG" ;
|
||||
testMe(s1,s2);
|
||||
|
||||
s1 = "ACCTGGTGTATATAGGGTAAGGCTGAT";
|
||||
s2 = "CCGTATCATTACCTGGTGTATATAGG";
|
||||
testMe(s1,s2);
|
||||
|
||||
s1 = "GGTGTATATAGGGT" ;
|
||||
s2 = "TGTTAGGG";
|
||||
testMe(s1,s2);
|
||||
|
||||
s1 = "AGACAGAGAGAAGG";
|
||||
s2 = "AGACAGAGAAGG";
|
||||
testMe(s1,s2);
|
||||
*/
|
||||
String s1 = "CCAGCACACAGGTATCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTGTTTTTTGA";
|
||||
String s2 = "CCAGCACACATCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTGTTTTTTGA";
|
||||
testMe(s1,s2);
|
||||
}
|
||||
|
||||
public static void testMe(String s1, String s2) {
|
||||
|
||||
SWPairwiseAlignment swpa = new SWPairwiseAlignment(s1,s2);
|
||||
SWPairwiseAlignment swpa = new SWPairwiseAlignment(s1,s2,3.0,-1.0,-4,-0.5);
|
||||
|
||||
|
||||
for ( int i = 0 ; i < swpa.getCigar().numCigarElements() ; i++ ) {
|
||||
char c=' ';
|
||||
switch ( swpa.getCigar().getCigarElement(i).getOperator() ) {
|
||||
case M : c = 'M'; break;
|
||||
case D : c = 'D'; break;
|
||||
case I : c = 'I'; break;
|
||||
}
|
||||
System.out.print(c);
|
||||
System.out.print(swpa.getCigar().getCigarElement(i).getLength());
|
||||
}
|
||||
SequencePile sp = new SequencePile(s1);
|
||||
sp.addAlignedSequence(s2,false,swpa.getCigar(),swpa.getAlignmentStart2wrt1());
|
||||
System.out.println();
|
||||
System.out.println(sp.format());
|
||||
System.out.println(AlignmentUtils.toString(swpa.getCigar()));
|
||||
// SequencePile sp = new SequencePile(s1);
|
||||
// sp.addAlignedSequence(s2,false,swpa.getCigar(),swpa.getAlignmentStart2wrt1());
|
||||
// System.out.println();
|
||||
// System.out.println(sp.format());
|
||||
|
||||
System.out.println("--------\n"+swpa.toString());
|
||||
|
||||
|
|
|
|||
|
|
@ -6,13 +6,16 @@ import net.sf.samtools.*;
|
|||
|
||||
public class SequencePile {
|
||||
private List<MSAColumn> mSeqGrid;
|
||||
private StringBuffer mRefGrid;
|
||||
private StringBuilder mRefGrid;
|
||||
private StringBuilder headerGrid;
|
||||
private int mDepth;
|
||||
private List<Boolean> mSeqRC;
|
||||
|
||||
|
||||
public SequencePile(String ref) {
|
||||
mRefGrid = new StringBuffer( ref );
|
||||
mRefGrid = new StringBuilder( ref );
|
||||
headerGrid = new StringBuilder();
|
||||
for ( int i = 0; i < ref.length(); i++ ) headerGrid.append(' ');
|
||||
mSeqGrid = new ArrayList<MSAColumn>();
|
||||
for ( int i = 0 ; i < mRefGrid.length(); i++ ) {
|
||||
mSeqGrid.add(new MSAColumn());
|
||||
|
|
@ -39,19 +42,21 @@ public class SequencePile {
|
|||
// alignedSeq = ReverseComplement(seq);
|
||||
// } else alignedSeq = seq;
|
||||
mSeqRC.add(isRC);
|
||||
|
||||
int pos = 0; // actual position on the grid (reference can have insertions)
|
||||
|
||||
// will hold actual position on the grid; reference can have insertions on the grid,
|
||||
// so position on the grid where we should start placing the read is not refpos!
|
||||
int pos = 0;
|
||||
for ( int i = 0 ; i < refpos ; i++ ) { // i is the position on the original reference
|
||||
// if we got some insertions on the reference prior to refpos, we need to take care of them:
|
||||
// if we got some insertions on the reference prior to refpos, we need to count them in:
|
||||
while( mRefGrid.charAt(pos) == '+' ) {
|
||||
mSeqGrid.get(pos).add(' ');
|
||||
mSeqGrid.get(pos).add(' '); // add additional spaces in the line that will hold sequence seq
|
||||
pos++;
|
||||
}
|
||||
mSeqGrid.get(pos).add(' '); // fill with ' ' to the left of the read
|
||||
pos++;
|
||||
}
|
||||
|
||||
// we reached start position of the alignment
|
||||
// we reached start position of the alignment on the reference grid
|
||||
|
||||
int readpos = 0; // position on the read
|
||||
|
||||
|
|
@ -66,18 +71,17 @@ public class SequencePile {
|
|||
if ( pos >= 0 ) {
|
||||
if ( mRefGrid.charAt(pos) !='+' ) { // there was no insertion here yet: add it now!
|
||||
mRefGrid.insert(pos, '+');
|
||||
headerGrid.insert(pos,'+');
|
||||
MSAColumn c = new MSAColumn();
|
||||
// reads up to the previous depth (prior to adding current read) did not
|
||||
// have an insertion here, so we insert '*' into all of them:
|
||||
for ( int k = 0 ; k < mDepth ; k++ ) {
|
||||
if ( mSeqGrid.get(pos-1).charAt(k) == ' ') c.add(' ');
|
||||
else c.add('*');
|
||||
}
|
||||
mSeqGrid.add(pos, c);
|
||||
}
|
||||
try {
|
||||
mSeqGrid.get(pos).add(alignedSeq.charAt(readpos));
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IllegalArgumentException(e.getMessage()+": "+seq);
|
||||
mSeqGrid.add(pos, c); // finally, add the base from the current read
|
||||
}
|
||||
mSeqGrid.get(pos).add(alignedSeq.charAt(readpos));
|
||||
}
|
||||
readpos++;
|
||||
pos++;
|
||||
|
|
@ -91,6 +95,7 @@ public class SequencePile {
|
|||
}
|
||||
if ( pos >= mRefGrid.length() ) break;
|
||||
mSeqGrid.get(pos).add('-'); // mark deletion
|
||||
headerGrid.setCharAt(pos,'-');
|
||||
pos++;
|
||||
}
|
||||
break;
|
||||
|
|
@ -102,11 +107,10 @@ public class SequencePile {
|
|||
pos++;
|
||||
}
|
||||
if ( pos >= mRefGrid.length() ) break;
|
||||
try {
|
||||
mSeqGrid.get(pos).add(alignedSeq.charAt(readpos));
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IllegalArgumentException(e.getMessage()+": "+seq);
|
||||
}
|
||||
mSeqGrid.get(pos).add(alignedSeq.charAt(readpos));
|
||||
if ( Character.toUpperCase(alignedSeq.charAt(readpos)) !=
|
||||
Character.toUpperCase(mRefGrid.charAt(pos))
|
||||
&& headerGrid.charAt(pos)== ' ') headerGrid.setCharAt(pos,'*');
|
||||
pos++;
|
||||
readpos++;
|
||||
}
|
||||
|
|
@ -158,35 +162,114 @@ public class SequencePile {
|
|||
|
||||
public void colorprint() { colorprint(false); }
|
||||
|
||||
public void colorprint(boolean printId) {
|
||||
if ( printId ) System.out.print(" ");
|
||||
else System.out.print(" ");
|
||||
System.out.println(mRefGrid);
|
||||
public void dotprint(boolean printId) {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
java.util.Formatter frmt = new java.util.Formatter(sb);
|
||||
|
||||
String skip = null;
|
||||
if ( printId ) skip = new String(" ");
|
||||
else skip = new String(" ");
|
||||
|
||||
System.out.print(formatHeader(skip));
|
||||
System.out.print(skip);
|
||||
System.out.println(mRefGrid);
|
||||
|
||||
try {
|
||||
for ( int i = 0 ; i < mDepth; i++ ) {
|
||||
if ( printId ) System.out.printf("%3d",i);
|
||||
if ( mSeqRC.get(i).booleanValue() ) System.out.print("<-");
|
||||
else System.out.print("->");
|
||||
for ( int j = 0 ; j < mRefGrid.length() ; j++) {
|
||||
char seqbase = mSeqGrid.get(j).charAt(i);
|
||||
char refbase = mRefGrid.charAt(j);
|
||||
if ( isBase(refbase) && isBase(seqbase) &&
|
||||
Character.toUpperCase(refbase) ==
|
||||
Character.toUpperCase(seqbase) ) {
|
||||
if ( mSeqRC.get(i) ) System.out.print(',');
|
||||
else System.out.print('.');
|
||||
}
|
||||
else System.out.print(seqbase);
|
||||
}
|
||||
System.out.print('\n');
|
||||
}
|
||||
} catch (Exception e) {}
|
||||
}
|
||||
|
||||
|
||||
public void colorprint(boolean printId) {
|
||||
|
||||
String skip = null;
|
||||
if ( printId ) skip = new String(" ");
|
||||
else skip = new String(" ");
|
||||
|
||||
System.out.print(formatHeader(skip));
|
||||
System.out.print(skip);
|
||||
System.out.println(mRefGrid);
|
||||
|
||||
try {
|
||||
for ( int i = 0 ; i < mDepth; i++ ) {
|
||||
if ( printId ) {
|
||||
sb.delete(0,sb.length());
|
||||
frmt.format("%3d:", i);
|
||||
System.out.print(frmt.out().toString());
|
||||
}
|
||||
if ( printId ) System.out.printf("%3d",i);
|
||||
if ( mSeqRC.get(i).booleanValue() ) System.out.print("<-");
|
||||
else System.out.print("->");
|
||||
for ( int j = 0 ; j < mRefGrid.length() ; j++) {
|
||||
char seqbase = mSeqGrid.get(j).charAt(i);
|
||||
char refbase = mRefGrid.charAt(j);
|
||||
if ( isBase(refbase) && isBase(seqbase) && refbase != seqbase ) System.out.print("\033[31m"+seqbase+"\033[30m");
|
||||
if ( isBase(refbase) && isBase(seqbase) &&
|
||||
Character.toUpperCase(refbase) !=
|
||||
Character.toUpperCase(seqbase) ) System.out.print("\033[31m"+seqbase+"\033[30m");
|
||||
else System.out.print(seqbase);
|
||||
}
|
||||
System.out.print('\n');
|
||||
}
|
||||
} catch (Exception e) {}
|
||||
}
|
||||
|
||||
|
||||
private String formatHeader(String leadString) {
|
||||
char [][] mm_strings = new char[2][mRefGrid.length()];
|
||||
for ( int i = 0 ; i < mRefGrid.length() ; i++ ) {
|
||||
int count = 0;
|
||||
char refC = mRefGrid.charAt(i);
|
||||
MSAColumn col = mSeqGrid.get(i);
|
||||
if ( refC == '+' ) {
|
||||
// count number of observations for insertion
|
||||
for ( int j = 0 ; j < col.size() ; j++ ) {
|
||||
if ( col.charAt(j) != '*' ) count++;
|
||||
}
|
||||
} else {
|
||||
if ( headerGrid.charAt(i) == '-' ) {
|
||||
// count number of observations for deletion
|
||||
for ( int j = 0 ; j < col.size() ; j++ ) {
|
||||
if ( col.charAt(j) == '-' ) count++;
|
||||
}
|
||||
} else {
|
||||
if ( headerGrid.charAt(i) == '*') {
|
||||
for ( int j = 0 ; j < col.size() ; j++ ) {
|
||||
if ( col.charAt(j)!=' ' &&
|
||||
Character.toUpperCase(col.charAt(j)) !=
|
||||
Character.toUpperCase(refC) ) count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( count > 9 ) mm_strings[0][i] = Character.forDigit(count/10,10);
|
||||
else mm_strings[0][i] = ' ';
|
||||
if ( count > 0 ) mm_strings[1][i] = Character.forDigit(count%10,10);
|
||||
else mm_strings[1][i] = ' ';
|
||||
}
|
||||
|
||||
StringBuilder b = new StringBuilder();
|
||||
b.append(leadString);
|
||||
b.append(mm_strings[0]);
|
||||
b.append('\n');
|
||||
b.append(leadString);
|
||||
b.append(mm_strings[1]);
|
||||
b.append('\n');
|
||||
b.append(leadString);
|
||||
b.append(headerGrid);
|
||||
b.append('\n');
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
private boolean isBase(char b) {
|
||||
b = Character.toUpperCase(b);
|
||||
return ( b=='A' ||b == 'C' || b=='G' || b=='T' || b=='N');
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue