two fixes in the new cached DbSNP code:

-isBiallelic would incorrectly say triallelic sites are biallelic.
-getAlternateAlleleList was broken, since the new cached list is immutable, we couldn’t remove list items.

Also added a dbSNP validating walker to the one-offs, for testing the new b37 130 dbSNP rod.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2568 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-01-13 00:27:34 +00:00
parent a138bad95a
commit 0513690416
2 changed files with 125 additions and 7 deletions

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.util.SequenceUtil;
import org.broadinstitute.sting.utils.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@ -94,9 +95,9 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
* @return an alternate allele list
*/
public List<String> getAlternateAlleleList() {
List<String> ret = getAlleleList();
for (String allele : ret)
if (allele.equals(getReference())) ret.remove(allele);
List<String> ret = new ArrayList<String>();
for (String allele : getAlleleList())
if (!allele.equals(getReference())) ret.add(allele);
return ret;
}
@ -160,8 +161,8 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
}
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
* gets the alternate base is the case of a SNP. Throws a StingException when we can't parse out a
* single base to return, the site isn't a snp, or the site isn't biallelic
*
* @return a char, representing the alternate base
*/
@ -175,7 +176,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
* gets the reference base is the case of a SNP. Throws a StingException if we're not a SNP.
*
* @return a char, representing the alternate base
*/
@ -276,7 +277,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
}
public boolean isBiallelic() {
return getAlleleList().size() == 2;
return getAlternateAlleleList().size() == 1;
}
public static rodDbSNP getFirstRealSNP(RODRecordList<ReferenceOrderedDatum> dbsnpList) {

View File

@ -0,0 +1,117 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.utils.Pair;
import java.io.PrintStream;
/**
* @author aaron
*
* Class ValidateDbSNPConversion
*
* a quick walker to validate the dbSNP conversion.
*/
public class ValidateDbSNPConversion extends RefWalker<Pair<Matrix.BASE, Matrix.BASE>, Matrix> {
@Override
public Pair<Matrix.BASE, Matrix.BASE> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (!tracker.hasROD("dbsnp")) return null;
rodDbSNP rod = (rodDbSNP) tracker.lookup("dbSNP", null);
if (rod != null && rod.isSNP() && rod.isBiallelic()) {
return new Pair<Matrix.BASE, Matrix.BASE>(Matrix.BASE.toBase((byte) ref.getBase()), Matrix.BASE.toBase((byte) rod.getAlternativeBaseForSNP()));
}
return null;
}
/**
* Provide an initial value for reduce computations.
*
* @return Initial value of reduce.
*/
@Override
public Matrix reduceInit() {
return new Matrix();
}
/**
* Reduces a single map with the accumulator provided as the ReduceType.
*
* @param value result of the map.
* @param sum accumulator for the reduce.
*
* @return accumulator with result of the map taken into account.
*/
@Override
public Matrix reduce(Pair<Matrix.BASE, Matrix.BASE> value, Matrix sum) {
if (value != null) sum.addValue(value.first,value.second);
return sum;
}
public void onTraversalDone(Matrix result) {
result.printStats(out);
}
}
class Matrix {
public enum BASE {
A((byte) 65), C((byte) 67), G((byte) 71), T((byte) 84), N((byte)78);
private byte val = 65;
BASE(byte val) {
this.val = val;
}
byte getVal() {
return val;
}
static BASE toBase(byte b) {
if (b > 96) b =- 32;
if (b == A.val) return A;
if (b == C.val) return C;
if (b == G.val) return G;
if (b == T.val) return T;
// we don't really care, let return N instead of: throw new UnsupportedOperationException("Unknown base " + b);
return N;
}
}
private int[][] mat = new int[BASE.values().length][BASE.values().length];
public void addValue(BASE ref, BASE alt) {
mat[ref.ordinal()][alt.ordinal()] += 1;
}
public void printStats(PrintStream stream) {
for (BASE a : BASE.values()) {
stream.print("ref " + a.toString() + ":");
for (BASE b : BASE.values()) {
stream.print(" " + mat[a.ordinal()][b.ordinal()]);
}
stream.println();
}
}
public void addMatrix(Matrix mt) {
for (BASE a : BASE.values()) {
for (BASE b : BASE.values()) {
mat[a.ordinal()][b.ordinal()] += mt.mat[a.ordinal()][b.ordinal()];
}
}
}
public Matrix() {
for (int x = 0; x < BASE.values().length; x++) {
for (int y = 0; y < BASE.values().length; y++) {
mat[x][y] = 0;
}
}
}
}