git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@166 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a3b8830855
commit
3d1e0bf079
|
|
@ -51,7 +51,9 @@ public class ConsensusSequence {
|
||||||
// count bases from the sequence into the coverage
|
// count bases from the sequence into the coverage
|
||||||
int posOnConsensus = offset - startOffset;
|
int posOnConsensus = offset - startOffset;
|
||||||
for ( int i = 0 ; i < seq.length() ; i++, posOnConsensus++ ) {
|
for ( int i = 0 ; i < seq.length() ; i++, posOnConsensus++ ) {
|
||||||
coverage.get(posOnConsensus)[baseToInt(seq.charAt(i))]++;
|
char base = Character.toUpperCase(seq.charAt(i));
|
||||||
|
if ( base == 'N') continue;
|
||||||
|
coverage.get(posOnConsensus)[baseToInt(base)]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -73,10 +75,24 @@ public class ConsensusSequence {
|
||||||
// subtract sequence bases from the coverage
|
// subtract sequence bases from the coverage
|
||||||
int posOnConsensus = offset - startOffset;
|
int posOnConsensus = offset - startOffset;
|
||||||
for ( int i = 0 ; i < seq.length() ; i++, posOnConsensus++ ) {
|
for ( int i = 0 ; i < seq.length() ; i++, posOnConsensus++ ) {
|
||||||
coverage.get(posOnConsensus)[ baseToInt(seq.charAt(i)) ]--;
|
char base = Character.toUpperCase(seq.charAt(i));
|
||||||
|
if ( base == 'N') continue;
|
||||||
|
coverage.get(posOnConsensus)[ baseToInt(base) ]--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns offset of the start of consensus sequence with respect to the reference position the
|
||||||
|
* consensus is pinned to.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public int getStartOffset() { return startOffset; }
|
||||||
|
|
||||||
|
/** Returns the length (number of bases) of the consensus sequence.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public int length() { return coverage.size(); }
|
||||||
|
|
||||||
/** Returns the "distance" (score measuring the agreement) from the currently held consensus sequence to
|
/** Returns the "distance" (score measuring the agreement) from the currently held consensus sequence to
|
||||||
* the specified sequence <code>seq</code> starting at position <code>offset</code> wrt consenus reference position.
|
* the specified sequence <code>seq</code> starting at position <code>offset</code> wrt consenus reference position.
|
||||||
* @param seq
|
* @param seq
|
||||||
|
|
@ -97,7 +113,7 @@ public class ConsensusSequence {
|
||||||
int stop = Math.min(offset+seq.length(), startOffset+coverage.size() ) - offset;
|
int stop = Math.min(offset+seq.length(), startOffset+coverage.size() ) - offset;
|
||||||
|
|
||||||
for ( ; i < stop ; i++, posOnConsensus++ ) {
|
for ( ; i < stop ; i++, posOnConsensus++ ) {
|
||||||
int base = baseToInt(seq.charAt(posOnConsensus));
|
int base = baseToInt(Character.toUpperCase(seq.charAt(posOnConsensus)));
|
||||||
int [] cov = coverage.get(posOnConsensus);
|
int [] cov = coverage.get(posOnConsensus);
|
||||||
int totalcov = cov[0]+cov[1]+cov[2]+cov[3];
|
int totalcov = cov[0]+cov[1]+cov[2]+cov[3];
|
||||||
|
|
||||||
|
|
@ -109,7 +125,8 @@ public class ConsensusSequence {
|
||||||
* Specified offset must be within the span of currently held consensus sequence. Consensus base is the
|
* Specified offset must be within the span of currently held consensus sequence. Consensus base is the
|
||||||
* one with the maximum count of observations. If two different nucleotides were observed exactly the
|
* one with the maximum count of observations. If two different nucleotides were observed exactly the
|
||||||
* same number of times (and that number is greater than the number of observations for othe nucleotides),
|
* same number of times (and that number is greater than the number of observations for othe nucleotides),
|
||||||
* the "lesser" one, (order being ACGT) will be returned.
|
* the "lesser" one, (order being ACGT) will be returned. If coverage at specified position is zero, 'N' will
|
||||||
|
* be returned.
|
||||||
* @param offset
|
* @param offset
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
|
|
@ -149,6 +166,29 @@ public class ConsensusSequence {
|
||||||
return new Pair<Character,Integer>(base,bmax);
|
return new Pair<Character,Integer>(base,bmax);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns total coverage (all observations regardless of what base what observed) at position
|
||||||
|
* specified by offset with respect to the conensus' reference position. offset does not have to be within
|
||||||
|
* the bounds of the currently kept consensus sequence, if it falls outside, a 0 will be silently returned.
|
||||||
|
* @param offset
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public int coverageAt(int offset) {
|
||||||
|
if ( offset < startOffset || offset >= startOffset + coverage.size() ) return 0;
|
||||||
|
int [] cov = coverage.get(offset+(int)referencePos);
|
||||||
|
return cov[0]+cov[1]+cov[2]+cov[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns consesus sequence as a astring of bases (ACGTN); N will be returned for positions with zero
|
||||||
|
* coverage.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public String getSequence() {
|
||||||
|
char [] b = new char[coverage.size()];
|
||||||
|
for ( int i = 0 ; i < b.length ; i++ ) {
|
||||||
|
b[i] = baseAt(i+startOffset);
|
||||||
|
}
|
||||||
|
return new String(b);
|
||||||
|
}
|
||||||
|
|
||||||
private List<int[]> instantiateCoverageList(int n) {
|
private List<int[]> instantiateCoverageList(int n) {
|
||||||
List< int[] > subseq = new ArrayList<int[] >(n);
|
List< int[] > subseq = new ArrayList<int[] >(n);
|
||||||
|
|
@ -163,7 +203,8 @@ public class ConsensusSequence {
|
||||||
case 'C': base = 1; break;
|
case 'C': base = 1; break;
|
||||||
case 'G': base = 2; break;
|
case 'G': base = 2; break;
|
||||||
case 'T': base = 3; break;
|
case 'T': base = 3; break;
|
||||||
default : throw new IllegalArgumentException("Sequence can contain only ACGT symbols");
|
case 'N': base = -1; break;
|
||||||
|
default : throw new IllegalArgumentException("Sequence can contain only ACGTN symbols");
|
||||||
}
|
}
|
||||||
return base;
|
return base;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ public class IndelInspector extends CommandLineProgram {
|
||||||
|
|
||||||
protected int doWork() {
|
protected int doWork() {
|
||||||
|
|
||||||
System.out.println("I am at version 0.2");
|
System.out.println("I am at version 0.3");
|
||||||
GenomeLoc location = null;
|
GenomeLoc location = null;
|
||||||
if ( GENOME_LOCATION != null ) {
|
if ( GENOME_LOCATION != null ) {
|
||||||
location = GenomeLoc.parseGenomeLoc(GENOME_LOCATION);
|
location = GenomeLoc.parseGenomeLoc(GENOME_LOCATION);
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.playground.indels;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
|
||||||
|
|
||||||
public class MultipleAlignment implements Iterable<Integer> {
|
public class MultipleAlignment implements Iterable<Integer> {
|
||||||
|
|
@ -12,12 +13,14 @@ public class MultipleAlignment implements Iterable<Integer> {
|
||||||
private List<Integer> alignment_offsets; // offset of seqs[i] w/respect to seqs[0] (i.e. in order the seqs were added)
|
private List<Integer> alignment_offsets; // offset of seqs[i] w/respect to seqs[0] (i.e. in order the seqs were added)
|
||||||
private int best_mm; // mismatch count
|
private int best_mm; // mismatch count
|
||||||
private int next_mm; // next-best mismatch count
|
private int next_mm; // next-best mismatch count
|
||||||
|
private ConsensusSequence consensus;
|
||||||
|
|
||||||
public MultipleAlignment() {
|
public MultipleAlignment() {
|
||||||
index = new HashMap<Integer,Integer>();
|
index = new HashMap<Integer,Integer>();
|
||||||
seqs = new ArrayList<String>();
|
seqs = new ArrayList<String>();
|
||||||
alignment_offsets = new ArrayList<Integer>();
|
alignment_offsets = new ArrayList<Integer>();
|
||||||
ext_ids = new ArrayList<Integer>();
|
ext_ids = new ArrayList<Integer>();
|
||||||
|
consensus = new ConsensusSequence(); // we use reference position 0, e.g. we hook onto the first read in the pile
|
||||||
}
|
}
|
||||||
|
|
||||||
public void clear() {
|
public void clear() {
|
||||||
|
|
@ -31,37 +34,46 @@ public class MultipleAlignment implements Iterable<Integer> {
|
||||||
*
|
*
|
||||||
* @param seq sequence to add
|
* @param seq sequence to add
|
||||||
* @param i id of the sequence (can be use later to query the pile)
|
* @param i id of the sequence (can be use later to query the pile)
|
||||||
|
* @see #add(String,int,int)
|
||||||
*/
|
*/
|
||||||
public void add( String seq, int i ) throws IllegalStateException {
|
public void add( String seq, int i ) throws IllegalStateException {
|
||||||
if ( size() != 0 ) throw new IllegalStateException("Single sequence can be added to an empty pile only");
|
if ( size() != 0 ) throw new IllegalStateException("Single sequence can be added to an empty pile only");
|
||||||
index.put(i,0);
|
add(seq,i,0);
|
||||||
ext_ids.add(i);
|
|
||||||
seqs.add(seq);
|
|
||||||
alignment_offsets.add(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Adds single sequence with id set to i and places it at the specified offset wrt the first sequence
|
||||||
|
* in this pile (i.e. wrt reference position 0).
|
||||||
|
*
|
||||||
|
* @param seq sequence to add
|
||||||
|
* @param i id of the sequence (can be use later to query the pile)
|
||||||
|
* @see #add(String,int)
|
||||||
|
*/
|
||||||
|
public void add( String seq, int i, int offset ) throws IllegalStateException {
|
||||||
|
index.put(i,index.size());
|
||||||
|
ext_ids.add(i);
|
||||||
|
seqs.add(seq);
|
||||||
|
alignment_offsets.add(offset);
|
||||||
|
consensus.addSequence(seq,offset);
|
||||||
|
}
|
||||||
|
|
||||||
public void add( PairwiseAlignment a) {
|
public void add( PairwiseAlignment a) {
|
||||||
if ( a.id1() == -1 || a.id2() == -1 ) throw new IllegalArgumentException("Attempt to add pairwise alignemnt with sequence ids not properly set");
|
if ( a.id1() == -1 || a.id2() == -1 ) throw new IllegalArgumentException("Attempt to add pairwise alignemnt with sequence ids not properly set");
|
||||||
add(a,a.id1(),a.id2());
|
add(a,a.id1(),a.id2());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Adds pair of aligned sequences to the pile, with the external ids of the first and second sequences being i and j,
|
/** Adds pair of aligned sequences to the pile, with the external ids of the first and second sequences being i and j,
|
||||||
* respectively. Pairwise alignment can be always added to an empty pile. If the pile is non-empty and either both or
|
* respectively. Pairwise alignment can be always added to an empty pile. If the pile is non-empty, exactly
|
||||||
* none of the specified ids are already in the pile, an IllegalStateException will be thrown
|
* one of the sequences held by the pair-wise alignment should be already in the pile; this sequence (and the
|
||||||
|
* pairwise alignment itself) will be used to stitch the other sequence to the pile. If either both or
|
||||||
|
* none of the specified ids are already in the pile, an IllegalStateException will be thrown.
|
||||||
* @param a
|
* @param a
|
||||||
* @param i
|
* @param i
|
||||||
* @param j
|
* @param j
|
||||||
*/
|
*/
|
||||||
public void add( PairwiseAlignment a, int i, int j ) throws IllegalStateException {
|
public void add( PairwiseAlignment a, int i, int j ) throws IllegalStateException {
|
||||||
if ( seqs.size() == 0 ) {
|
if ( seqs.size() == 0 ) {
|
||||||
index.put(i,0);
|
add(a.getSequence1(),i,0);
|
||||||
ext_ids.add(i);
|
add(a.getSequence2(),j,a.getBestOffset2wrt1());
|
||||||
seqs.add(a.getSequence1());
|
|
||||||
index.put(j,1);
|
|
||||||
ext_ids.add(j);
|
|
||||||
seqs.add(a.getSequence2());
|
|
||||||
alignment_offsets.add(0);
|
|
||||||
alignment_offsets.add(a.getBestOffset2wrt1());
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -76,22 +88,49 @@ public class MultipleAlignment implements Iterable<Integer> {
|
||||||
throw new IllegalStateException("Attempt to add pairwise alignment for two sequences none of which is already in the pile");
|
throw new IllegalStateException("Attempt to add pairwise alignment for two sequences none of which is already in the pile");
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( second == null ) {
|
if ( second == null ) add(a.getSequence2(),j, a.getBestOffset2wrt1() + alignment_offsets.get( first ) );
|
||||||
index.put(j,seqs.size());
|
else add(a.getSequence1(),i, -a.getBestOffset2wrt1() + alignment_offsets.get( second ) );
|
||||||
seqs.add(a.getSequence2());
|
|
||||||
ext_ids.add(j);
|
|
||||||
alignment_offsets.add( a.getBestOffset2wrt1() + alignment_offsets.get( first ));
|
|
||||||
} else {
|
|
||||||
// first = null
|
|
||||||
index.put(i,seqs.size());
|
|
||||||
seqs.add(a.getSequence1());
|
|
||||||
ext_ids.add(i);
|
|
||||||
alignment_offsets.add( -a.getBestOffset2wrt1() + alignment_offsets.get( second ));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Adds another pile of aligned sequences to this pile, stitching them together using specified pairwise alignment
|
||||||
|
* p of the sequences with external ids i and j. One of the indices i, j must be in this pile, and the other in
|
||||||
|
* the pile being added, otherwise an IllegalArgumentException is thrown. Sequence id's i and j MUST be the ids
|
||||||
|
* of the first and second sequences in the pairwise alignment, in that order. Specified ids override
|
||||||
|
* ids, if any, set for the sequences in the pairwise alignment; it is not checked whether the specified and
|
||||||
|
* stored ids match. The piles can not overlap.
|
||||||
|
*/
|
||||||
|
public void add(MultipleAlignment a, PairwiseAlignment p, int i, int j) {
|
||||||
|
int off2; // offset of the first sequence in pile 'a' wrt the first sequence in this pile
|
||||||
|
if ( this.contains(i) ) {
|
||||||
|
if ( ! a.contains(j)) throw new IllegalArgumentException("Sequence is not in the pile");
|
||||||
|
off2 = getOffsetById(i)+p.getBestOffset2wrt1()-a.getOffsetById(j);
|
||||||
|
} else {
|
||||||
|
if ( this.contains(j)) {
|
||||||
|
if ( ! a.contains(i)) throw new IllegalArgumentException("Sequence is not in the pile");
|
||||||
|
off2 = getOffsetById(j)-p.getBestOffset2wrt1()-a.getOffsetById(i);
|
||||||
|
} else throw new IllegalArgumentException("Sequence is not in the pile");
|
||||||
|
}
|
||||||
|
// stitch sequences from a into this pile:
|
||||||
|
for ( Integer id : a ) {
|
||||||
|
if ( this.contains(id) ) throw new IllegalArgumentException("Attempt to add a pile that shares sequences with the current one");
|
||||||
|
add(a.getSequenceById(id),id,off2+a.getOffsetById(id));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Returns sequence associated with the specified external id, or null if sequence with this external id is not found in the pile
|
|
||||||
|
/** Adds another pile of aligned sequences (a) to this pile, stitching them together using specified
|
||||||
|
* pairwise alignment p. Sequence ids must be set in the pairwise alignment, and one of those ids
|
||||||
|
* must be in this pile, and the other in the pile 'a' being added, otherwise an IllegalArgumentException
|
||||||
|
* is thrown. If pairwise alignment does not have sequence ids set, IllegalArgumentException is thrown.
|
||||||
|
* The piles can not overlap.
|
||||||
|
*/
|
||||||
|
public void add(MultipleAlignment a, PairwiseAlignment p) {
|
||||||
|
if ( p.id1() == -1 || p.id2() == -1 ) throw new IllegalArgumentException("Attempt to add MSA based on pairwise alignemnt with sequence ids not properly set");
|
||||||
|
add(a,p,p.id1(),p.id2());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns sequence associated with the specified external id, or null if sequence with this external id is
|
||||||
|
* not found in the pile
|
||||||
*
|
*
|
||||||
* @param id query id
|
* @param id query id
|
||||||
* @return sequence for specified id or null
|
* @return sequence for specified id or null
|
||||||
|
|
@ -101,49 +140,21 @@ public class MultipleAlignment implements Iterable<Integer> {
|
||||||
return seqs.get(index.get(id));
|
return seqs.get(index.get(id));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns offset relative to the first sequence in the pile for sequence associated with the specified external id
|
/** Returns offset relative to the first sequence in the pile for sequence associated with the specified
|
||||||
|
* external id. If sequence with specified id is not found in the pile, RuntimeException is thrown.
|
||||||
*
|
*
|
||||||
* @param id query id
|
* @param id query id
|
||||||
* @return offset for sequence with specified id
|
* @return offset for sequence with specified id
|
||||||
*/
|
*/
|
||||||
public int getOffsetById(int id) {
|
public int getOffsetById(int id) {
|
||||||
//TODO: do something meaningful when id is not in the pile (exception?)
|
if ( ! contains(id) ) throw new RuntimeException("Specified id is not in the pile");
|
||||||
if ( ! contains(id)) return 0;
|
|
||||||
return alignment_offsets.get(index.get(id));
|
return alignment_offsets.get(index.get(id));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Adds pile of aligned sequences to this pile, stitching them together using specified pairwise alignment
|
|
||||||
* p of the sequences with external ids i and j. One of the indices i, j must be in this pile, and the other in the pile being added,
|
|
||||||
* otherwise an IllegalArgumentException is thrown. The piles can not overlap.
|
|
||||||
*/
|
|
||||||
public void add(MultipleAlignment a, PairwiseAlignment p, int i, int j) {
|
|
||||||
int off2; // offset of the first sequence in pile 'a' wrt the first sequence in this pile
|
|
||||||
if ( this.contains(i) ) {
|
|
||||||
if ( ! a.contains(j)) throw new IllegalArgumentException("Sequence is not in the pile");
|
|
||||||
off2 = getOffsetById(i)+p.getBestOffset2wrt1()-a.getOffsetById(j);
|
|
||||||
} else {
|
|
||||||
if ( this.contains(j)) {
|
|
||||||
if ( ! a.contains(i)) throw new IllegalArgumentException("Sequence is not in the pile");
|
|
||||||
off2 = getOffsetById(j)-p.getBestOffset2wrt1()-a.getOffsetById(i);
|
|
||||||
} else throw new IllegalArgumentException("Sequence is not in the pile");
|
|
||||||
}
|
|
||||||
// stitch sequences from a into this pile:
|
|
||||||
for ( Integer id : a ) {
|
|
||||||
if ( this.contains(id) ) throw new IllegalArgumentException("Attempt to add a pile that shares sequences with the current one");
|
|
||||||
index.put(id,seqs.size());
|
|
||||||
ext_ids.add(id);
|
|
||||||
seqs.add(a.getSequenceById(id));
|
|
||||||
alignment_offsets.add(off2+a.getOffsetById(id));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public void add(MultipleAlignment a, PairwiseAlignment p) {
|
|
||||||
if ( p.id1() == -1 || p.id2() == -1 ) throw new IllegalArgumentException("Attempt to add MSA based on pairwise alignemnt with sequence ids not properly set");
|
|
||||||
add(a,p,p.id1(),p.id2());
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns true if the alignment already contains sequence with the specified id
|
/** Returns true if the alignment already contains sequence with the specified id.
|
||||||
*
|
*
|
||||||
* @param id
|
* @param id
|
||||||
* @return
|
* @return
|
||||||
|
|
@ -151,7 +162,15 @@ public class MultipleAlignment implements Iterable<Integer> {
|
||||||
public boolean contains(int id) {
|
public boolean contains(int id) {
|
||||||
return index.containsKey(id);
|
return index.containsKey(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns number of mismatches between sequences i and j (external ids) in the currently held multiple alignment.
|
||||||
|
* Will return 0 if sequences do not overlap. Will throw RuntimeException if any of the specified ids is not
|
||||||
|
* found in the current pile.
|
||||||
|
* @param i id of the first sequence
|
||||||
|
* @param j id of the second sequence
|
||||||
|
* @return mismatch count
|
||||||
|
*
|
||||||
|
* */
|
||||||
public int countMismatches(int i, int j) {
|
public int countMismatches(int i, int j) {
|
||||||
return PairwiseAlignment.countMismatches(getSequenceById(i), getSequenceById(j), getOffsetById(j)-getOffsetById(i));
|
return PairwiseAlignment.countMismatches(getSequenceById(i), getSequenceById(j), getOffsetById(j)-getOffsetById(i));
|
||||||
}
|
}
|
||||||
|
|
@ -216,58 +235,63 @@ public class MultipleAlignment implements Iterable<Integer> {
|
||||||
|
|
||||||
if ( seqs.size() == 0 ) return b.toString();
|
if ( seqs.size() == 0 ) return b.toString();
|
||||||
|
|
||||||
int skip_first = 0;
|
// int first_offset = 0; // offset of the first sequence wrt the start of the MSA
|
||||||
int msa_length = 0;
|
final int first_offset = -consensus.getStartOffset();
|
||||||
for ( int i = 0 ; i < seqs.size() ; i++ ) {
|
// int msa_length = 0;
|
||||||
if ( -alignment_offsets.get(i) > skip_first ) skip_first = -alignment_offsets.get(i);
|
// for ( int i = 0 ; i < seqs.size() ; i++ ) {
|
||||||
msa_length = Math.max( alignment_offsets.get(i)+seqs.get(i).length() , msa_length );
|
// if ( -alignment_offsets.get(i) > first_offset ) first_offset = -alignment_offsets.get(i);
|
||||||
}
|
// msa_length = Math.max( alignment_offsets.get(i)+seqs.get(i).length() , msa_length );
|
||||||
|
// }
|
||||||
|
|
||||||
msa_length += skip_first;
|
// msa_length += first_offset;
|
||||||
int [] cov = new int[4];
|
final int msa_length = consensus.length();
|
||||||
char[] bases = { 'A' , 'C', 'G', 'T' };
|
// int [] cov = new int[4];
|
||||||
char[][] consensus = new char[4][msa_length];
|
// char[] bases = { 'A' , 'C', 'G', 'T' };
|
||||||
|
char[][] consensusString = new char[4][msa_length];
|
||||||
|
|
||||||
for ( int i = 0 ; i < msa_length ; i++ ) {
|
for ( int i = -first_offset ; i < msa_length - first_offset ; i++ ) {
|
||||||
cov[0] = cov[1] = cov[2] = cov[3] = 0;
|
// cov[0] = cov[1] = cov[2] = cov[3] = 0;
|
||||||
for ( int j = 0 ; j < seqs.size(); j++ ) {
|
// for ( int j = 0 ; j < seqs.size(); j++ ) {
|
||||||
// offset of the sequence j wrt start of the msa region
|
// // offset of the sequence j wrt start of the msa region
|
||||||
int seq_offset = skip_first + alignment_offsets.get(j);
|
// int seq_offset = first_offset + alignment_offsets.get(j);
|
||||||
if ( i < seq_offset || i >= seq_offset + seqs.get(j).length() ) continue; // sequence j has no bases at position i
|
// if ( i < seq_offset || i >= seq_offset + seqs.get(j).length() ) continue; // sequence j has no bases at position i
|
||||||
int base = -1;
|
// int base = -1;
|
||||||
switch( Character.toUpperCase(seqs.get(j).charAt(i-seq_offset)) ) {
|
// switch( Character.toUpperCase(seqs.get(j).charAt(i-seq_offset)) ) {
|
||||||
case 'A': base = 0; break;
|
// case 'A': base = 0; break;
|
||||||
case 'C': base = 1 ; break;
|
// case 'C': base = 1 ; break;
|
||||||
case 'G': base = 2 ; break;
|
// case 'G': base = 2 ; break;
|
||||||
case 'T': base = 3 ; break;
|
// case 'T': base = 3 ; break;
|
||||||
}
|
// }
|
||||||
if ( base >= 0 ) cov[base]++;
|
// if ( base >= 0 ) cov[base]++;
|
||||||
}
|
// }
|
||||||
int total_cov = cov[0] + cov[1] + cov[2] + cov[3];
|
// int total_cov = cov[0] + cov[1] + cov[2] + cov[3];
|
||||||
int bmax = 0;
|
// int bmax = 0;
|
||||||
int mm = 0;
|
// int mm = 0;
|
||||||
consensus[3][i] = 'N';
|
|
||||||
for ( int z = 0; z < 4 ; z++ ) {
|
Pair<Character,Integer> base = consensus.baseWithCountAt(i-first_offset);
|
||||||
if ( cov[z] > bmax ) {
|
consensusString[3][i] = base.first;
|
||||||
bmax = cov[z];
|
// for ( int z = 0; z < 4 ; z++ ) {
|
||||||
consensus[3][i] = bases[z];
|
// if ( cov[z] > bmax ) {
|
||||||
mm = total_cov - bmax;
|
// bmax = cov[z];
|
||||||
}
|
// consensus[3][i] = bases[z];
|
||||||
}
|
// mm = total_cov - bmax;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
int mm = consensus.coverageAt(i) - base.second;
|
||||||
if ( mm > 0 ) {
|
if ( mm > 0 ) {
|
||||||
consensus[2][i] = '*';
|
consensusString[2][i] = '*';
|
||||||
if ( mm > 9 ) consensus[0][i] = Character.forDigit(mm/10,10);
|
if ( mm > 9 ) consensusString[0][i] = Character.forDigit(mm/10,10);
|
||||||
else consensus[0][i] = ' ';
|
else consensusString[0][i] = ' ';
|
||||||
consensus[1][i] = Character.forDigit(mm%10,10);
|
consensusString[1][i] = Character.forDigit(mm%10,10);
|
||||||
} else {
|
} else {
|
||||||
consensus[0][i] = consensus[1][i] = consensus[2][i] = ' ';
|
consensusString[0][i] = consensusString[1][i] = consensusString[2][i] = ' ';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
b.append(" "); b.append(consensus[0]); b.append('\n');
|
b.append(" "); b.append(consensusString[0]); b.append('\n');
|
||||||
b.append(" "); b.append(consensus[1]); b.append('\n');
|
b.append(" "); b.append(consensusString[1]); b.append('\n');
|
||||||
b.append(" "); b.append(consensus[2]); b.append('\n');
|
b.append(" "); b.append(consensusString[2]); b.append('\n');
|
||||||
b.append(" "); b.append(consensus[3]); b.append('\n');
|
b.append(" "); b.append(consensusString[3]); b.append('\n');
|
||||||
|
|
||||||
Integer[] perm = null;
|
Integer[] perm = null;
|
||||||
if ( inorder ) perm = Utils.SortPermutation(alignment_offsets);
|
if ( inorder ) perm = Utils.SortPermutation(alignment_offsets);
|
||||||
|
|
@ -275,7 +299,7 @@ public class MultipleAlignment implements Iterable<Integer> {
|
||||||
for ( int i = 0 ; i < seqs.size() ; i++ ) {
|
for ( int i = 0 ; i < seqs.size() ; i++ ) {
|
||||||
int index = (inorder ? perm[i] : i);
|
int index = (inorder ? perm[i] : i);
|
||||||
frmt.format("%3d:", ext_ids.get(index));
|
frmt.format("%3d:", ext_ids.get(index));
|
||||||
skipN(alignment_offsets.get(index)+skip_first,b);
|
skipN(alignment_offsets.get(index)+ first_offset,b);
|
||||||
b.append(seqs.get(index));
|
b.append(seqs.get(index));
|
||||||
b.append('\n');
|
b.append('\n');
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -103,6 +103,9 @@ public class PileBuilder implements RecordPileReceiver {
|
||||||
System.out.println("\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") ;
|
System.out.println("\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") ;
|
||||||
|
|
||||||
List<MultipleAlignment> piles = doMultipleAlignment2(seqs);
|
List<MultipleAlignment> piles = doMultipleAlignment2(seqs);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// System.out.print("Distance between final piles: "+distance(alignments1, alignments2));
|
// System.out.print("Distance between final piles: "+distance(alignments1, alignments2));
|
||||||
// System.out.print("; diameter of PILE1: "+ diameter(alignments1));
|
// System.out.print("; diameter of PILE1: "+ diameter(alignments1));
|
||||||
// System.out.println("; diameter of PILE2: "+ diameter(alignments2));
|
// System.out.println("; diameter of PILE2: "+ diameter(alignments2));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue