auxiliary class for SequencePile, just one column of the MSA

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@150 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2009-03-23 14:37:49 +00:00
parent 29136ee892
commit 2dd14d7c17
3 changed files with 88 additions and 12 deletions

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.indels;
import org.broadinstitute.sting.utils.Pair;
import java.util.List;
import java.util.ArrayList;
@ -15,6 +17,7 @@ public class ConsensusSequence {
private long referencePos;// (arbitrary) reference position; when adding sequences, their offsets should be wrt this position
private int startOffset; // offset of the leftmost base of this consensus sequence wrt referencePos; negative=left, positive=right
private final static int NUMBINS = 4;
private final static char[] BASES = { 'A','C','G','T' };
public ConsensusSequence(int refPos) {
coverage = new ArrayList< int[] >();
@ -102,6 +105,51 @@ public class ConsensusSequence {
return 0.0;
}
/** Returns consensus base at the specified offset wrt the consesus sequence's reference position.
* Specified offset must be within the span of currently held consensus sequence. Consensus base is the
* one with the maximum count of observations. If two different nucleotides were observed exactly the
* same number of times (and that number is greater than the number of observations for othe nucleotides),
* the "lesser" one, (order being ACGT) will be returned.
* @param offset
* @return
*/
public char baseAt(int offset) {
assert offset >= startOffset && offset < startOffset + coverage.size() : "Offset out of bounds";
int [] cov = coverage.get(offset+(int)referencePos);
int total_cov = cov[0] + cov[1] + cov[2] + cov[3];
int bmax = 0;
char base = 'N';
for ( int z = 0; z < 4 ; z++ ) {
if ( cov[z] > bmax ) {
bmax = cov[z];
base = BASES[z];
}
}
return base;
}
/** Returns consensus base at the specified offset together with its observation count.
*
* @param offset
* @return
* @see #baseAt(int)
*/
public Pair<Character,Integer> baseWithCountAt(int offset) {
assert offset >= startOffset && offset < startOffset + coverage.size() : "Offset out of bounds";
int [] cov = coverage.get(offset+(int)referencePos);
int total_cov = cov[0] + cov[1] + cov[2] + cov[3];
int bmax = 0;
char base = 'N';
for ( int z = 0; z < 4 ; z++ ) {
if ( cov[z] > bmax ) {
bmax = cov[z];
base = BASES[z];
}
}
return base;
}
private List<int[]> instantiateCoverageList(int n) {
List< int[] > subseq = new ArrayList<int[] >(n);
for ( int i = 0 ; i < n ; i++ ) subseq.add(new int[NUMBINS]);

View File

@ -0,0 +1,35 @@
package org.broadinstitute.sting.indels;
import java.util.List;
import java.util.ArrayList;
public class MSAColumn {
private List<Character> mBytes;
public MSAColumn() {
mBytes = new ArrayList<Character>();
}
/** Adds specified byte to the end of the column */
public void add(char b) throws IllegalArgumentException {
if ( b == 'A' || b == 'C' || b == 'T' || b == 'G' || b == '-' || b==' ' || b == '*' || b=='N') {
mBytes.add(b);
} else {
throw new IllegalArgumentException("Invalid base letter passed to MSAColumn");
}
}
/** Removes first element from the column */
public void removeFirst() throws IndexOutOfBoundsException {
mBytes.remove(0);
}
/** Removes value at the specified position from the column */
public void remove (int index) throws IndexOutOfBoundsException {
mBytes.remove(index);
}
public int size() { return mBytes.size(); }
public Character charAt(int offset) { return mBytes.get(offset); }
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.indels;
import net.sf.samtools.SAMRecord;
import java.util.*;
import org.broadinstitute.sting.utils.PrimitivePair;
public class PileBuilder implements RecordPileReceiver {
@ -14,14 +15,6 @@ public class PileBuilder implements RecordPileReceiver {
private String referenceSequence;
private int reference_start;
private static class IntPair {
int first;
int second;
public IntPair(int i, int j) { set(i,j); }
public int first() { return first; }
public int second() { return second; }
public void set(int i, int j) { first = i; second = j; }
}
private static class SelectedPair {
private int i_;
@ -269,7 +262,7 @@ public class PileBuilder implements RecordPileReceiver {
public SelectedPair findBestAlignment(MultipleAlignment a1, MultipleAlignment a2) {
Map<Integer, IntPair > all_offsets = new HashMap<Integer, IntPair >();
Map<Integer, PrimitivePair.Int > all_offsets = new HashMap<Integer, PrimitivePair.Int >();
SelectedPair p = new SelectedPair(-1,-1,1e100);
for ( Integer id1 : a1 ) {
@ -281,14 +274,14 @@ public class PileBuilder implements RecordPileReceiver {
// pairwise alignment that suggested it
int suggested_offset = a1.getOffsetById(id1) + pa.getBestOffset2wrt1(id1,id2) - a2.getOffsetById(id2);
if ( ! all_offsets.containsKey(suggested_offset) ) {
all_offsets.put( suggested_offset , new IntPair(id1,id2)) ;
all_offsets.put( suggested_offset , new PrimitivePair.Int(id1,id2)) ;
}
}
}
for ( Map.Entry<Integer,IntPair> offset_record : all_offsets.entrySet() ) {
for ( Map.Entry<Integer,PrimitivePair.Int> offset_record : all_offsets.entrySet() ) {
double d = averageDistanceForOffset(a1,a2,offset_record.getKey());
if ( d < p.d() ) p.set(offset_record.getValue().first(),offset_record.getValue().second(),d);
if ( d < p.d() ) p.set(offset_record.getValue().first,offset_record.getValue().second,d);
}
return p;
}