From 2dd14d7c17b6809d91d96defdc5437451f4fcffb Mon Sep 17 00:00:00 2001 From: asivache Date: Mon, 23 Mar 2009 14:37:49 +0000 Subject: [PATCH] auxiliary class for SequencePile, just one column of the MSA git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@150 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/indels/ConsensusSequence.java | 48 +++++++++++++++++++ .../sting/indels/MSAColumn.java | 35 ++++++++++++++ .../sting/indels/PileBuilder.java | 17 ++----- 3 files changed, 88 insertions(+), 12 deletions(-) create mode 100755 playground/java/src/org/broadinstitute/sting/indels/MSAColumn.java diff --git a/playground/java/src/org/broadinstitute/sting/indels/ConsensusSequence.java b/playground/java/src/org/broadinstitute/sting/indels/ConsensusSequence.java index 6cbd688f0..d1666fc2a 100644 --- a/playground/java/src/org/broadinstitute/sting/indels/ConsensusSequence.java +++ b/playground/java/src/org/broadinstitute/sting/indels/ConsensusSequence.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.indels; +import org.broadinstitute.sting.utils.Pair; + import java.util.List; import java.util.ArrayList; @@ -15,6 +17,7 @@ public class ConsensusSequence { private long referencePos;// (arbitrary) reference position; when adding sequences, their offsets should be wrt this position private int startOffset; // offset of the leftmost base of this consensus sequence wrt referencePos; negative=left, positive=right private final static int NUMBINS = 4; + private final static char[] BASES = { 'A','C','G','T' }; public ConsensusSequence(int refPos) { coverage = new ArrayList< int[] >(); @@ -102,6 +105,51 @@ public class ConsensusSequence { return 0.0; } + /** Returns consensus base at the specified offset wrt the consesus sequence's reference position. + * Specified offset must be within the span of currently held consensus sequence. Consensus base is the + * one with the maximum count of observations. If two different nucleotides were observed exactly the + * same number of times (and that number is greater than the number of observations for othe nucleotides), + * the "lesser" one, (order being ACGT) will be returned. + * @param offset + * @return + */ + public char baseAt(int offset) { + assert offset >= startOffset && offset < startOffset + coverage.size() : "Offset out of bounds"; + int [] cov = coverage.get(offset+(int)referencePos); + int total_cov = cov[0] + cov[1] + cov[2] + cov[3]; + int bmax = 0; + char base = 'N'; + for ( int z = 0; z < 4 ; z++ ) { + if ( cov[z] > bmax ) { + bmax = cov[z]; + base = BASES[z]; + } + } + return base; + } + + /** Returns consensus base at the specified offset together with its observation count. + * + * @param offset + * @return + * @see #baseAt(int) + */ + public Pair baseWithCountAt(int offset) { + assert offset >= startOffset && offset < startOffset + coverage.size() : "Offset out of bounds"; + int [] cov = coverage.get(offset+(int)referencePos); + int total_cov = cov[0] + cov[1] + cov[2] + cov[3]; + int bmax = 0; + char base = 'N'; + for ( int z = 0; z < 4 ; z++ ) { + if ( cov[z] > bmax ) { + bmax = cov[z]; + base = BASES[z]; + } + } + return base; + } + + private List instantiateCoverageList(int n) { List< int[] > subseq = new ArrayList(n); for ( int i = 0 ; i < n ; i++ ) subseq.add(new int[NUMBINS]); diff --git a/playground/java/src/org/broadinstitute/sting/indels/MSAColumn.java b/playground/java/src/org/broadinstitute/sting/indels/MSAColumn.java new file mode 100755 index 000000000..a86ebde78 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/indels/MSAColumn.java @@ -0,0 +1,35 @@ +package org.broadinstitute.sting.indels; + +import java.util.List; +import java.util.ArrayList; + +public class MSAColumn { + private List mBytes; + + public MSAColumn() { + mBytes = new ArrayList(); + } + + /** Adds specified byte to the end of the column */ + public void add(char b) throws IllegalArgumentException { + if ( b == 'A' || b == 'C' || b == 'T' || b == 'G' || b == '-' || b==' ' || b == '*' || b=='N') { + mBytes.add(b); + } else { + throw new IllegalArgumentException("Invalid base letter passed to MSAColumn"); + } + } + + /** Removes first element from the column */ + public void removeFirst() throws IndexOutOfBoundsException { + mBytes.remove(0); + } + + /** Removes value at the specified position from the column */ + public void remove (int index) throws IndexOutOfBoundsException { + mBytes.remove(index); + } + + public int size() { return mBytes.size(); } + + public Character charAt(int offset) { return mBytes.get(offset); } +} diff --git a/playground/java/src/org/broadinstitute/sting/indels/PileBuilder.java b/playground/java/src/org/broadinstitute/sting/indels/PileBuilder.java index 396d12a72..a75db65e8 100755 --- a/playground/java/src/org/broadinstitute/sting/indels/PileBuilder.java +++ b/playground/java/src/org/broadinstitute/sting/indels/PileBuilder.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.indels; import net.sf.samtools.SAMRecord; import java.util.*; +import org.broadinstitute.sting.utils.PrimitivePair; public class PileBuilder implements RecordPileReceiver { @@ -14,14 +15,6 @@ public class PileBuilder implements RecordPileReceiver { private String referenceSequence; private int reference_start; - private static class IntPair { - int first; - int second; - public IntPair(int i, int j) { set(i,j); } - public int first() { return first; } - public int second() { return second; } - public void set(int i, int j) { first = i; second = j; } - } private static class SelectedPair { private int i_; @@ -269,7 +262,7 @@ public class PileBuilder implements RecordPileReceiver { public SelectedPair findBestAlignment(MultipleAlignment a1, MultipleAlignment a2) { - Map all_offsets = new HashMap(); + Map all_offsets = new HashMap(); SelectedPair p = new SelectedPair(-1,-1,1e100); for ( Integer id1 : a1 ) { @@ -281,14 +274,14 @@ public class PileBuilder implements RecordPileReceiver { // pairwise alignment that suggested it int suggested_offset = a1.getOffsetById(id1) + pa.getBestOffset2wrt1(id1,id2) - a2.getOffsetById(id2); if ( ! all_offsets.containsKey(suggested_offset) ) { - all_offsets.put( suggested_offset , new IntPair(id1,id2)) ; + all_offsets.put( suggested_offset , new PrimitivePair.Int(id1,id2)) ; } } } - for ( Map.Entry offset_record : all_offsets.entrySet() ) { + for ( Map.Entry offset_record : all_offsets.entrySet() ) { double d = averageDistanceForOffset(a1,a2,offset_record.getKey()); - if ( d < p.d() ) p.set(offset_record.getValue().first(),offset_record.getValue().second(),d); + if ( d < p.d() ) p.set(offset_record.getValue().first,offset_record.getValue().second,d); } return p; }