From 5b41ef5f70b87e7921e9d7729637bae2ac77b830 Mon Sep 17 00:00:00 2001 From: aaron Date: Sun, 13 Sep 2009 23:48:58 +0000 Subject: [PATCH] rod DBSNP had a bug where the reference wasn't calculated correctly under certain conditions. Fixed getRefBasesFWD and getRefSnpFWD so that they were more in line with getAltBasesFWD and getAltSnpFWD. Also updated Variant Eval tests to reflect this change. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1609 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/rodDbSNP.java | 8 +- .../sting/gatk/refdata/rodDbSNPTest.java | 87 +++++++++++++++++++ .../VariantEvalWalkerIntegrationTest.java | 8 +- 3 files changed, 97 insertions(+), 6 deletions(-) create mode 100644 java/test/org/broadinstitute/sting/gatk/refdata/rodDbSNPTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java index 24abc4106..46a7a1972 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java @@ -71,7 +71,8 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria * @return reference allele, forward strand */ public String getRefBasesFWD() { - return getAllelesFWD().get(0); + // fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD() + return (getAllelesFWD().get(0).equals(refBases)) ? getAllelesFWD().get(0) : getAllelesFWD().get(1); //if ( onFwdStrand() ) // return refBases; //else @@ -87,7 +88,10 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria public char getRefSnpFWD() throws IllegalStateException { //System.out.printf("refbases is %s but %s%n", refBases, toString()); if ( isIndel() ) throw new IllegalStateException("Variant is not a SNP"); - return getAllelesFWD().get(0).charAt(0); + // fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD() + List alleles = getAllelesFWD(); + String val = (alleles.get(0).equals(refBases) ? alleles.get(0) : alleles.get(1)); + return val.charAt(0); // if ( onFwdStrand() ) return refBases.charAt(0); // else return SequenceUtil.reverseComplement(refBases).charAt(0); } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/rodDbSNPTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/rodDbSNPTest.java new file mode 100644 index 000000000..8e8f21e61 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/refdata/rodDbSNPTest.java @@ -0,0 +1,87 @@ +package org.broadinstitute.sting.gatk.refdata; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.*; + + +/** + * @author aaron + *

+ * Class rodDbSNPTest + *

+ * A descriptions should go here. Blame aaron if it's missing. + */ +public class rodDbSNPTest extends BaseTest { + private static IndexedFastaSequenceFile seq; + + @BeforeClass + public static void beforeTests() { + try { + seq = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta")); + } catch (FileNotFoundException e) { + throw new StingException("unable to load the sequence dictionary"); + } + GenomeLocParser.setupRefContigOrdering(seq); + + } + + public BufferedReader openFile(String filename) { + try { + return new BufferedReader(new FileReader(filename)); + } catch (FileNotFoundException e) { + throw new StingException("Couldn't open file " + filename); + } + + } + + @Test + // count the number of SNP's between 10M and 11M on chr1 in dbSNP + public void testDBSNPInput() { + BufferedReader stream = openFile("/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod"); + int snpCount = 0; + int indelCount = 0; + try { + String line = stream.readLine(); + rodDbSNP rod = new rodDbSNP("db"); + boolean stop = false; + while (line != null && !stop) { + rod.parseLine(null,line.split("\t")); + rodDbSNP var = (rodDbSNP)rod; + if (rod.isSNP()) { + // quick check, if we're not triallelic, make sure the ref is right + if (var.getRefSnpFWD() == var.refBases.charAt(0) || var.getAltSnpFWD() == var.refBases.charAt(0)) + // also make sure the ref is a single character + if (var.refBases.length() == 1) + Assert.assertTrue(var.refBases.charAt(0)==var.getRefSnpFWD()); + if (var.getLocation().getContig().equals("1") && + var.getLocation().getStart() >= 10000000 && + var.getLocation().getStart() <= 11000000) { + if (var.isSNP()) { + snpCount++; + } + + } + } + if (rod.isIndel()) + indelCount++; + + stop = (var.getLocation().getContig().equals("1") && var.getLocation().getStart() > 11000000); + line = stream.readLine(); + } + Assert.assertEquals(3717,snpCount); + Assert.assertEquals(9902,indelCount); + + } catch (IOException e) { + e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. + } + } + + +} diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java index ea0189104..1f44d2a78 100644 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java @@ -19,7 +19,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantROD() { List md5 = new ArrayList(); - md5.add("094c0adb8e4ae4de424f26482fd43152"); + md5.add("013d13d405f33b0fe4038f5aea087e7f"); /** * the above MD5 was calculated from running the following command: @@ -50,7 +50,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantRODConfSix() { List md5 = new ArrayList(); - md5.add("eb8716965fa1df93954c5a22b0aeda7c"); + md5.add("422d94ab60a3db42b0a5dc9c254b78f4"); /** * the above MD5 was calculated from running the following command: @@ -82,7 +82,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantRODOutputViolations() { List md5 = new ArrayList(); - md5.add("9449cfb668f5175013161d2f287c30ff"); + md5.add("8e613d57fd770ff04eb6531f583f4448"); /** * the above MD5 was calculated from running the following command: @@ -114,7 +114,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalGenotypeROD() { List md5 = new ArrayList(); - md5.add("ff2bf8987e288198b28201b5c3121c0e"); + md5.add("439c2983a6e1250cd845c686a0e5a085"); /** * the above MD5 was calculated after running the following command: *