auxiliary class for SequencePile, just one column of the MSA
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@150 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
29136ee892
commit
2dd14d7c17
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.indels;
|
||||
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
|
@ -15,6 +17,7 @@ public class ConsensusSequence {
|
|||
private long referencePos;// (arbitrary) reference position; when adding sequences, their offsets should be wrt this position
|
||||
private int startOffset; // offset of the leftmost base of this consensus sequence wrt referencePos; negative=left, positive=right
|
||||
private final static int NUMBINS = 4;
|
||||
private final static char[] BASES = { 'A','C','G','T' };
|
||||
|
||||
public ConsensusSequence(int refPos) {
|
||||
coverage = new ArrayList< int[] >();
|
||||
|
|
@ -102,6 +105,51 @@ public class ConsensusSequence {
|
|||
return 0.0;
|
||||
}
|
||||
|
||||
/** Returns consensus base at the specified offset wrt the consesus sequence's reference position.
|
||||
* Specified offset must be within the span of currently held consensus sequence. Consensus base is the
|
||||
* one with the maximum count of observations. If two different nucleotides were observed exactly the
|
||||
* same number of times (and that number is greater than the number of observations for othe nucleotides),
|
||||
* the "lesser" one, (order being ACGT) will be returned.
|
||||
* @param offset
|
||||
* @return
|
||||
*/
|
||||
public char baseAt(int offset) {
|
||||
assert offset >= startOffset && offset < startOffset + coverage.size() : "Offset out of bounds";
|
||||
int [] cov = coverage.get(offset+(int)referencePos);
|
||||
int total_cov = cov[0] + cov[1] + cov[2] + cov[3];
|
||||
int bmax = 0;
|
||||
char base = 'N';
|
||||
for ( int z = 0; z < 4 ; z++ ) {
|
||||
if ( cov[z] > bmax ) {
|
||||
bmax = cov[z];
|
||||
base = BASES[z];
|
||||
}
|
||||
}
|
||||
return base;
|
||||
}
|
||||
|
||||
/** Returns consensus base at the specified offset together with its observation count.
|
||||
*
|
||||
* @param offset
|
||||
* @return
|
||||
* @see #baseAt(int)
|
||||
*/
|
||||
public Pair<Character,Integer> baseWithCountAt(int offset) {
|
||||
assert offset >= startOffset && offset < startOffset + coverage.size() : "Offset out of bounds";
|
||||
int [] cov = coverage.get(offset+(int)referencePos);
|
||||
int total_cov = cov[0] + cov[1] + cov[2] + cov[3];
|
||||
int bmax = 0;
|
||||
char base = 'N';
|
||||
for ( int z = 0; z < 4 ; z++ ) {
|
||||
if ( cov[z] > bmax ) {
|
||||
bmax = cov[z];
|
||||
base = BASES[z];
|
||||
}
|
||||
}
|
||||
return base;
|
||||
}
|
||||
|
||||
|
||||
private List<int[]> instantiateCoverageList(int n) {
|
||||
List< int[] > subseq = new ArrayList<int[] >(n);
|
||||
for ( int i = 0 ; i < n ; i++ ) subseq.add(new int[NUMBINS]);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,35 @@
|
|||
package org.broadinstitute.sting.indels;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class MSAColumn {
|
||||
private List<Character> mBytes;
|
||||
|
||||
public MSAColumn() {
|
||||
mBytes = new ArrayList<Character>();
|
||||
}
|
||||
|
||||
/** Adds specified byte to the end of the column */
|
||||
public void add(char b) throws IllegalArgumentException {
|
||||
if ( b == 'A' || b == 'C' || b == 'T' || b == 'G' || b == '-' || b==' ' || b == '*' || b=='N') {
|
||||
mBytes.add(b);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Invalid base letter passed to MSAColumn");
|
||||
}
|
||||
}
|
||||
|
||||
/** Removes first element from the column */
|
||||
public void removeFirst() throws IndexOutOfBoundsException {
|
||||
mBytes.remove(0);
|
||||
}
|
||||
|
||||
/** Removes value at the specified position from the column */
|
||||
public void remove (int index) throws IndexOutOfBoundsException {
|
||||
mBytes.remove(index);
|
||||
}
|
||||
|
||||
public int size() { return mBytes.size(); }
|
||||
|
||||
public Character charAt(int offset) { return mBytes.get(offset); }
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.indels;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
import org.broadinstitute.sting.utils.PrimitivePair;
|
||||
|
||||
|
||||
public class PileBuilder implements RecordPileReceiver {
|
||||
|
|
@ -14,14 +15,6 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
private String referenceSequence;
|
||||
private int reference_start;
|
||||
|
||||
private static class IntPair {
|
||||
int first;
|
||||
int second;
|
||||
public IntPair(int i, int j) { set(i,j); }
|
||||
public int first() { return first; }
|
||||
public int second() { return second; }
|
||||
public void set(int i, int j) { first = i; second = j; }
|
||||
}
|
||||
|
||||
private static class SelectedPair {
|
||||
private int i_;
|
||||
|
|
@ -269,7 +262,7 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
|
||||
public SelectedPair findBestAlignment(MultipleAlignment a1, MultipleAlignment a2) {
|
||||
|
||||
Map<Integer, IntPair > all_offsets = new HashMap<Integer, IntPair >();
|
||||
Map<Integer, PrimitivePair.Int > all_offsets = new HashMap<Integer, PrimitivePair.Int >();
|
||||
SelectedPair p = new SelectedPair(-1,-1,1e100);
|
||||
|
||||
for ( Integer id1 : a1 ) {
|
||||
|
|
@ -281,14 +274,14 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
// pairwise alignment that suggested it
|
||||
int suggested_offset = a1.getOffsetById(id1) + pa.getBestOffset2wrt1(id1,id2) - a2.getOffsetById(id2);
|
||||
if ( ! all_offsets.containsKey(suggested_offset) ) {
|
||||
all_offsets.put( suggested_offset , new IntPair(id1,id2)) ;
|
||||
all_offsets.put( suggested_offset , new PrimitivePair.Int(id1,id2)) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
for ( Map.Entry<Integer,IntPair> offset_record : all_offsets.entrySet() ) {
|
||||
for ( Map.Entry<Integer,PrimitivePair.Int> offset_record : all_offsets.entrySet() ) {
|
||||
|
||||
double d = averageDistanceForOffset(a1,a2,offset_record.getKey());
|
||||
if ( d < p.d() ) p.set(offset_record.getValue().first(),offset_record.getValue().second(),d);
|
||||
if ( d < p.d() ) p.set(offset_record.getValue().first,offset_record.getValue().second,d);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue