rod DBSNP had a bug where the reference wasn't calculated correctly under certain conditions. Fixed getRefBasesFWD and getRefSnpFWD so that they were more in line with getAltBasesFWD and getAltSnpFWD. Also updated Variant Eval tests to reflect this change.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1609 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5cf1d6c104
commit
5b41ef5f70
|
|
@ -71,7 +71,8 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
|||
* @return reference allele, forward strand
|
||||
*/
|
||||
public String getRefBasesFWD() {
|
||||
return getAllelesFWD().get(0);
|
||||
// fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD()
|
||||
return (getAllelesFWD().get(0).equals(refBases)) ? getAllelesFWD().get(0) : getAllelesFWD().get(1);
|
||||
//if ( onFwdStrand() )
|
||||
// return refBases;
|
||||
//else
|
||||
|
|
@ -87,7 +88,10 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
|||
public char getRefSnpFWD() throws IllegalStateException {
|
||||
//System.out.printf("refbases is %s but %s%n", refBases, toString());
|
||||
if ( isIndel() ) throw new IllegalStateException("Variant is not a SNP");
|
||||
return getAllelesFWD().get(0).charAt(0);
|
||||
// fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD()
|
||||
List<String> alleles = getAllelesFWD();
|
||||
String val = (alleles.get(0).equals(refBases) ? alleles.get(0) : alleles.get(1));
|
||||
return val.charAt(0);
|
||||
// if ( onFwdStrand() ) return refBases.charAt(0);
|
||||
// else return SequenceUtil.reverseComplement(refBases).charAt(0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,87 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class rodDbSNPTest
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
*/
|
||||
public class rodDbSNPTest extends BaseTest {
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
try {
|
||||
seq = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta"));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
|
||||
}
|
||||
|
||||
public BufferedReader openFile(String filename) {
|
||||
try {
|
||||
return new BufferedReader(new FileReader(filename));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Couldn't open file " + filename);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
// count the number of SNP's between 10M and 11M on chr1 in dbSNP
|
||||
public void testDBSNPInput() {
|
||||
BufferedReader stream = openFile("/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod");
|
||||
int snpCount = 0;
|
||||
int indelCount = 0;
|
||||
try {
|
||||
String line = stream.readLine();
|
||||
rodDbSNP rod = new rodDbSNP("db");
|
||||
boolean stop = false;
|
||||
while (line != null && !stop) {
|
||||
rod.parseLine(null,line.split("\t"));
|
||||
rodDbSNP var = (rodDbSNP)rod;
|
||||
if (rod.isSNP()) {
|
||||
// quick check, if we're not triallelic, make sure the ref is right
|
||||
if (var.getRefSnpFWD() == var.refBases.charAt(0) || var.getAltSnpFWD() == var.refBases.charAt(0))
|
||||
// also make sure the ref is a single character
|
||||
if (var.refBases.length() == 1)
|
||||
Assert.assertTrue(var.refBases.charAt(0)==var.getRefSnpFWD());
|
||||
if (var.getLocation().getContig().equals("1") &&
|
||||
var.getLocation().getStart() >= 10000000 &&
|
||||
var.getLocation().getStart() <= 11000000) {
|
||||
if (var.isSNP()) {
|
||||
snpCount++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
if (rod.isIndel())
|
||||
indelCount++;
|
||||
|
||||
stop = (var.getLocation().getContig().equals("1") && var.getLocation().getStart() > 11000000);
|
||||
line = stream.readLine();
|
||||
}
|
||||
Assert.assertEquals(3717,snpCount);
|
||||
Assert.assertEquals(9902,indelCount);
|
||||
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -19,7 +19,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testEvalVariantROD() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("094c0adb8e4ae4de424f26482fd43152");
|
||||
md5.add("013d13d405f33b0fe4038f5aea087e7f");
|
||||
|
||||
/**
|
||||
* the above MD5 was calculated from running the following command:
|
||||
|
|
@ -50,7 +50,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testEvalVariantRODConfSix() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("eb8716965fa1df93954c5a22b0aeda7c");
|
||||
md5.add("422d94ab60a3db42b0a5dc9c254b78f4");
|
||||
|
||||
/**
|
||||
* the above MD5 was calculated from running the following command:
|
||||
|
|
@ -82,7 +82,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testEvalVariantRODOutputViolations() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("9449cfb668f5175013161d2f287c30ff");
|
||||
md5.add("8e613d57fd770ff04eb6531f583f4448");
|
||||
|
||||
/**
|
||||
* the above MD5 was calculated from running the following command:
|
||||
|
|
@ -114,7 +114,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testEvalGenotypeROD() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("ff2bf8987e288198b28201b5c3121c0e");
|
||||
md5.add("439c2983a6e1250cd845c686a0e5a085");
|
||||
/**
|
||||
* the above MD5 was calculated after running the following command:
|
||||
*
|
||||
|
|
|
|||
Loading…
Reference in New Issue