diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java index 24abc4106..46a7a1972 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java @@ -71,7 +71,8 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria * @return reference allele, forward strand */ public String getRefBasesFWD() { - return getAllelesFWD().get(0); + // fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD() + return (getAllelesFWD().get(0).equals(refBases)) ? getAllelesFWD().get(0) : getAllelesFWD().get(1); //if ( onFwdStrand() ) // return refBases; //else @@ -87,7 +88,10 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria public char getRefSnpFWD() throws IllegalStateException { //System.out.printf("refbases is %s but %s%n", refBases, toString()); if ( isIndel() ) throw new IllegalStateException("Variant is not a SNP"); - return getAllelesFWD().get(0).charAt(0); + // fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD() + List alleles = getAllelesFWD(); + String val = (alleles.get(0).equals(refBases) ? alleles.get(0) : alleles.get(1)); + return val.charAt(0); // if ( onFwdStrand() ) return refBases.charAt(0); // else return SequenceUtil.reverseComplement(refBases).charAt(0); } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/rodDbSNPTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/rodDbSNPTest.java new file mode 100644 index 000000000..8e8f21e61 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/refdata/rodDbSNPTest.java @@ -0,0 +1,87 @@ +package org.broadinstitute.sting.gatk.refdata; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.*; + + +/** + * @author aaron + *

+ * Class rodDbSNPTest + *

+ * A descriptions should go here. Blame aaron if it's missing. + */ +public class rodDbSNPTest extends BaseTest { + private static IndexedFastaSequenceFile seq; + + @BeforeClass + public static void beforeTests() { + try { + seq = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta")); + } catch (FileNotFoundException e) { + throw new StingException("unable to load the sequence dictionary"); + } + GenomeLocParser.setupRefContigOrdering(seq); + + } + + public BufferedReader openFile(String filename) { + try { + return new BufferedReader(new FileReader(filename)); + } catch (FileNotFoundException e) { + throw new StingException("Couldn't open file " + filename); + } + + } + + @Test + // count the number of SNP's between 10M and 11M on chr1 in dbSNP + public void testDBSNPInput() { + BufferedReader stream = openFile("/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod"); + int snpCount = 0; + int indelCount = 0; + try { + String line = stream.readLine(); + rodDbSNP rod = new rodDbSNP("db"); + boolean stop = false; + while (line != null && !stop) { + rod.parseLine(null,line.split("\t")); + rodDbSNP var = (rodDbSNP)rod; + if (rod.isSNP()) { + // quick check, if we're not triallelic, make sure the ref is right + if (var.getRefSnpFWD() == var.refBases.charAt(0) || var.getAltSnpFWD() == var.refBases.charAt(0)) + // also make sure the ref is a single character + if (var.refBases.length() == 1) + Assert.assertTrue(var.refBases.charAt(0)==var.getRefSnpFWD()); + if (var.getLocation().getContig().equals("1") && + var.getLocation().getStart() >= 10000000 && + var.getLocation().getStart() <= 11000000) { + if (var.isSNP()) { + snpCount++; + } + + } + } + if (rod.isIndel()) + indelCount++; + + stop = (var.getLocation().getContig().equals("1") && var.getLocation().getStart() > 11000000); + line = stream.readLine(); + } + Assert.assertEquals(3717,snpCount); + Assert.assertEquals(9902,indelCount); + + } catch (IOException e) { + e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. + } + } + + +} diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java index ea0189104..1f44d2a78 100644 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java @@ -19,7 +19,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantROD() { List md5 = new ArrayList(); - md5.add("094c0adb8e4ae4de424f26482fd43152"); + md5.add("013d13d405f33b0fe4038f5aea087e7f"); /** * the above MD5 was calculated from running the following command: @@ -50,7 +50,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantRODConfSix() { List md5 = new ArrayList(); - md5.add("eb8716965fa1df93954c5a22b0aeda7c"); + md5.add("422d94ab60a3db42b0a5dc9c254b78f4"); /** * the above MD5 was calculated from running the following command: @@ -82,7 +82,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantRODOutputViolations() { List md5 = new ArrayList(); - md5.add("9449cfb668f5175013161d2f287c30ff"); + md5.add("8e613d57fd770ff04eb6531f583f4448"); /** * the above MD5 was calculated from running the following command: @@ -114,7 +114,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalGenotypeROD() { List md5 = new ArrayList(); - md5.add("ff2bf8987e288198b28201b5c3121c0e"); + md5.add("439c2983a6e1250cd845c686a0e5a085"); /** * the above MD5 was calculated after running the following command: *