diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java index 6d145dbd8..3f87d596d 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.refdata; import net.sf.samtools.util.SequenceUtil; import org.broadinstitute.sting.utils.*; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -94,9 +95,9 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod * @return an alternate allele list */ public List getAlternateAlleleList() { - List ret = getAlleleList(); - for (String allele : ret) - if (allele.equals(getReference())) ret.remove(allele); + List ret = new ArrayList(); + for (String allele : getAlleleList()) + if (!allele.equals(getReference())) ret.add(allele); return ret; } @@ -160,8 +161,8 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod } /** - * gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case - * of + * gets the alternate base is the case of a SNP. Throws a StingException when we can't parse out a + * single base to return, the site isn't a snp, or the site isn't biallelic * * @return a char, representing the alternate base */ @@ -175,7 +176,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod } /** - * gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP + * gets the reference base is the case of a SNP. Throws a StingException if we're not a SNP. * * @return a char, representing the alternate base */ @@ -276,7 +277,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod } public boolean isBiallelic() { - return getAlleleList().size() == 2; + return getAlternateAlleleList().size() == 1; } public static rodDbSNP getFirstRealSNP(RODRecordList dbsnpList) { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateDbSNPConversion.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateDbSNPConversion.java new file mode 100644 index 000000000..bb34119af --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateDbSNPConversion.java @@ -0,0 +1,117 @@ +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; +import org.broadinstitute.sting.gatk.walkers.RefWalker; +import org.broadinstitute.sting.utils.Pair; + +import java.io.PrintStream; + + +/** + * @author aaron + * + * Class ValidateDbSNPConversion + * + * a quick walker to validate the dbSNP conversion. + */ +public class ValidateDbSNPConversion extends RefWalker, Matrix> { + @Override + public Pair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (!tracker.hasROD("dbsnp")) return null; + rodDbSNP rod = (rodDbSNP) tracker.lookup("dbSNP", null); + if (rod != null && rod.isSNP() && rod.isBiallelic()) { + return new Pair(Matrix.BASE.toBase((byte) ref.getBase()), Matrix.BASE.toBase((byte) rod.getAlternativeBaseForSNP())); + } + return null; + } + + /** + * Provide an initial value for reduce computations. + * + * @return Initial value of reduce. + */ + @Override + public Matrix reduceInit() { + return new Matrix(); + } + + /** + * Reduces a single map with the accumulator provided as the ReduceType. + * + * @param value result of the map. + * @param sum accumulator for the reduce. + * + * @return accumulator with result of the map taken into account. + */ + @Override + public Matrix reduce(Pair value, Matrix sum) { + if (value != null) sum.addValue(value.first,value.second); + return sum; + } + + public void onTraversalDone(Matrix result) { + result.printStats(out); + } + +} + + +class Matrix { + public enum BASE { + A((byte) 65), C((byte) 67), G((byte) 71), T((byte) 84), N((byte)78); + private byte val = 65; + + BASE(byte val) { + this.val = val; + } + + byte getVal() { + return val; + } + + static BASE toBase(byte b) { + if (b > 96) b =- 32; + if (b == A.val) return A; + if (b == C.val) return C; + if (b == G.val) return G; + if (b == T.val) return T; + // we don't really care, let return N instead of: throw new UnsupportedOperationException("Unknown base " + b); + return N; + } + } + + private int[][] mat = new int[BASE.values().length][BASE.values().length]; + + public void addValue(BASE ref, BASE alt) { + mat[ref.ordinal()][alt.ordinal()] += 1; + } + + public void printStats(PrintStream stream) { + for (BASE a : BASE.values()) { + stream.print("ref " + a.toString() + ":"); + for (BASE b : BASE.values()) { + stream.print(" " + mat[a.ordinal()][b.ordinal()]); + } + stream.println(); + } + } + + public void addMatrix(Matrix mt) { + for (BASE a : BASE.values()) { + for (BASE b : BASE.values()) { + mat[a.ordinal()][b.ordinal()] += mt.mat[a.ordinal()][b.ordinal()]; + } + } + } + + public Matrix() { + for (int x = 0; x < BASE.values().length; x++) { + for (int y = 0; y < BASE.values().length; y++) { + mat[x][y] = 0; + } + } + } +} \ No newline at end of file