rod DBSNP had a bug where the reference wasn't calculated correctly under certain conditions. Fixed getRefBasesFWD and getRefSnpFWD so that they were more in line with getAltBasesFWD and getAltSnpFWD. Also updated Variant Eval tests to reflect this change.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1609 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-13 23:48:58 +00:00
parent 5cf1d6c104
commit 5b41ef5f70
3 changed files with 97 additions and 6 deletions

View File

@ -71,7 +71,8 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
* @return reference allele, forward strand
*/
public String getRefBasesFWD() {
return getAllelesFWD().get(0);
// fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD()
return (getAllelesFWD().get(0).equals(refBases)) ? getAllelesFWD().get(0) : getAllelesFWD().get(1);
//if ( onFwdStrand() )
// return refBases;
//else
@ -87,7 +88,10 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
public char getRefSnpFWD() throws IllegalStateException {
//System.out.printf("refbases is %s but %s%n", refBases, toString());
if ( isIndel() ) throw new IllegalStateException("Variant is not a SNP");
return getAllelesFWD().get(0).charAt(0);
// fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD()
List<String> alleles = getAllelesFWD();
String val = (alleles.get(0).equals(refBases) ? alleles.get(0) : alleles.get(1));
return val.charAt(0);
// if ( onFwdStrand() ) return refBases.charAt(0);
// else return SequenceUtil.reverseComplement(refBases).charAt(0);
}

View File

@ -0,0 +1,87 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.*;
/**
* @author aaron
* <p/>
* Class rodDbSNPTest
* <p/>
* A descriptions should go here. Blame aaron if it's missing.
*/
public class rodDbSNPTest extends BaseTest {
private static IndexedFastaSequenceFile seq;
@BeforeClass
public static void beforeTests() {
try {
seq = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta"));
} catch (FileNotFoundException e) {
throw new StingException("unable to load the sequence dictionary");
}
GenomeLocParser.setupRefContigOrdering(seq);
}
public BufferedReader openFile(String filename) {
try {
return new BufferedReader(new FileReader(filename));
} catch (FileNotFoundException e) {
throw new StingException("Couldn't open file " + filename);
}
}
@Test
// count the number of SNP's between 10M and 11M on chr1 in dbSNP
public void testDBSNPInput() {
BufferedReader stream = openFile("/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod");
int snpCount = 0;
int indelCount = 0;
try {
String line = stream.readLine();
rodDbSNP rod = new rodDbSNP("db");
boolean stop = false;
while (line != null && !stop) {
rod.parseLine(null,line.split("\t"));
rodDbSNP var = (rodDbSNP)rod;
if (rod.isSNP()) {
// quick check, if we're not triallelic, make sure the ref is right
if (var.getRefSnpFWD() == var.refBases.charAt(0) || var.getAltSnpFWD() == var.refBases.charAt(0))
// also make sure the ref is a single character
if (var.refBases.length() == 1)
Assert.assertTrue(var.refBases.charAt(0)==var.getRefSnpFWD());
if (var.getLocation().getContig().equals("1") &&
var.getLocation().getStart() >= 10000000 &&
var.getLocation().getStart() <= 11000000) {
if (var.isSNP()) {
snpCount++;
}
}
}
if (rod.isIndel())
indelCount++;
stop = (var.getLocation().getContig().equals("1") && var.getLocation().getStart() > 11000000);
line = stream.readLine();
}
Assert.assertEquals(3717,snpCount);
Assert.assertEquals(9902,indelCount);
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
}

View File

@ -19,7 +19,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalVariantROD() {
List<String> md5 = new ArrayList<String>();
md5.add("094c0adb8e4ae4de424f26482fd43152");
md5.add("013d13d405f33b0fe4038f5aea087e7f");
/**
* the above MD5 was calculated from running the following command:
@ -50,7 +50,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalVariantRODConfSix() {
List<String> md5 = new ArrayList<String>();
md5.add("eb8716965fa1df93954c5a22b0aeda7c");
md5.add("422d94ab60a3db42b0a5dc9c254b78f4");
/**
* the above MD5 was calculated from running the following command:
@ -82,7 +82,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalVariantRODOutputViolations() {
List<String> md5 = new ArrayList<String>();
md5.add("9449cfb668f5175013161d2f287c30ff");
md5.add("8e613d57fd770ff04eb6531f583f4448");
/**
* the above MD5 was calculated from running the following command:
@ -114,7 +114,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalGenotypeROD() {
List<String> md5 = new ArrayList<String>();
md5.add("ff2bf8987e288198b28201b5c3121c0e");
md5.add("439c2983a6e1250cd845c686a0e5a085");
/**
* the above MD5 was calculated after running the following command:
*