From b3bdcd0e60ec8572bf0bf33d35df1fd050699c75 Mon Sep 17 00:00:00 2001 From: aaron Date: Thu, 3 Dec 2009 06:59:27 +0000 Subject: [PATCH] make sure we close the error log stream in CommandLineProgram if it's opened; unit tests and clean-up for BasicVariation git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2241 348d0f76-0448-11de-a6fe-93d51630548a --- .../utils/cmdLine/CommandLineProgram.java | 1 + .../sting/utils/genotype/BasicVariation.java | 31 ++--- .../utils/genotype/BasicVariationTest.java | 117 ++++++++++++++++++ 3 files changed, 135 insertions(+), 14 deletions(-) create mode 100644 java/test/org/broadinstitute/sting/utils/genotype/BasicVariationTest.java diff --git a/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java b/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java index 128665202..2f7e4e0ad 100644 --- a/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java +++ b/java/src/org/broadinstitute/sting/utils/cmdLine/CommandLineProgram.java @@ -261,6 +261,7 @@ public abstract class CommandLineProgram { throw new RuntimeException(e); } clp.generateErrorLog(stream, e); + stream.close(); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/BasicVariation.java b/java/src/org/broadinstitute/sting/utils/genotype/BasicVariation.java index de1e0fac8..0ebe8ceb9 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/BasicVariation.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/BasicVariation.java @@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; /** * User: aaron @@ -65,14 +67,14 @@ public class BasicVariation implements Variation { */ @Override public VARIANT_TYPE getType() { - if (mLength != 0) return VARIANT_TYPE.INSERTION; + if (mLength > 0) return VARIANT_TYPE.INSERTION; + if (mLength < 0) return VARIANT_TYPE.DELETION; return (isSNP()) ? VARIANT_TYPE.SNP : VARIANT_TYPE.REFERENCE; } @Override public boolean isSNP() { - if (mLength == 0) return true; - return false; + return ((mLength == 0) && (new HashSet(getAlternateAlleleList()).size() == 1)); } @Override @@ -95,10 +97,15 @@ public class BasicVariation implements Variation { return (mRef); } - /** are we bi-allelic? */ + /** + * are we bi-allelic? In this case we always + * count the reference as an allele + */ @Override public boolean isBiallelic() { - return (getAlternateAlleleList().size() == 1); + // put the alternate alleles into a set, there may be duplicates (i.e. hom var) + Set alleles = new HashSet(getAlternateAlleleList()); + return (alleles.size() == 1); // if the alt list contained one unqiue non-ref base, we're biallelic } @Override @@ -133,20 +140,16 @@ public class BasicVariation implements Variation { @Override public List getAlleleList() { List list = new ArrayList(); - if (this.mBases.contains(mRef)) list.add(mRef); for (char c : this.mBases.toCharArray()) - if (c != Utils.stringToChar(mRef)) - list.add(String.valueOf(c)); + list.add(String.valueOf(c)); return list; } @Override public boolean isReference() { if (mLength != 0) return false; - int refIndex = 0; - for (char c : mBases.toCharArray()) { - if (mRef.charAt(refIndex) != c) return false; - } + for (String str : getAlleleList()) + if (!str.equals(mRef)) return false; return true; } @@ -169,8 +172,8 @@ public class BasicVariation implements Variation { @Override public char getAlternativeBaseForSNP() { if (!this.isSNP()) throw new IllegalStateException("we're not a SNP"); - if (!this.isBiallelic() || this.getAlternateAlleleList().size() != 1) throw new IllegalStateException("we're not biallelic"); - return Utils.stringToChar(this.getAlternateAlleleList().get(0)); + if (!this.isBiallelic()) throw new IllegalStateException("we're not biallelic"); + return Utils.stringToChar((new HashSet(getAlternateAlleleList())).iterator().next()); } /** diff --git a/java/test/org/broadinstitute/sting/utils/genotype/BasicVariationTest.java b/java/test/org/broadinstitute/sting/utils/genotype/BasicVariationTest.java new file mode 100644 index 000000000..e78023df6 --- /dev/null +++ b/java/test/org/broadinstitute/sting/utils/genotype/BasicVariationTest.java @@ -0,0 +1,117 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.File; +import java.io.FileNotFoundException; + +/** + * Created by IntelliJ IDEA. + * User: aaron + * Date: Dec 2, 2009 + * Time: 1:05:58 AM + *

+ * some quick tests for the BasicVariation class + */ +public class BasicVariationTest extends BaseTest { + private static IndexedFastaSequenceFile seq; + + @BeforeClass + public static void beforeTests() { + try { + seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); + } catch (FileNotFoundException e) { + throw new StingException("unable to load the sequence dictionary"); + } + GenomeLocParser.setupRefContigOrdering(seq); + + } + + @Test + public void testIsBiallelic() { + BasicVariation var = new BasicVariation("CC", "C", 0, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + Assert.assertTrue(!var.isBiallelic()); + BasicVariation var2 = new BasicVariation("CA", "C", 0, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + Assert.assertTrue(var2.isBiallelic()); + BasicVariation var3 = new BasicVariation("CC", "A", 0, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + Assert.assertTrue(var3.isBiallelic()); + } + + @Test + public void testVariantType() { + // test reference + BasicVariation var = new BasicVariation("CC", "C", 0, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + Assert.assertTrue(var.getType() == Variation.VARIANT_TYPE.REFERENCE); + + // test SNP's + BasicVariation var2 = new BasicVariation("CA", "C", 0, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + Assert.assertTrue(var2.getType() == Variation.VARIANT_TYPE.SNP); + BasicVariation var3 = new BasicVariation("AA", "C", 0, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + Assert.assertTrue(var3.getType() == Variation.VARIANT_TYPE.SNP); + + // test deletions + BasicVariation var4 = new BasicVariation("", "C", -10, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + Assert.assertTrue(var4.getType() == Variation.VARIANT_TYPE.DELETION); + + // test insertions + BasicVariation var5 = new BasicVariation("ACACACACACA", "C", "ACACACACACA".length(), GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + Assert.assertTrue(var5.getType() == Variation.VARIANT_TYPE.INSERTION); + + } + + @Test(expected = IllegalStateException.class) + public void testGetAlternativeBaseForSNPNotASNP() { + // test reference + BasicVariation var = new BasicVariation("CC", "C", 0, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + var.getAlternativeBaseForSNP(); + } + + @Test(expected = IllegalStateException.class) + public void testGetAlternativeBaseForSNPFromIndel() { + // test reference + BasicVariation var = new BasicVariation("ACACACACACA", "C", "ACACACACACA".length(), GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + var.getAlternativeBaseForSNP(); + } + + @Test(expected = IllegalStateException.class) + public void testGetAlternativeBaseForSNPFromDel() { + // test reference + BasicVariation var = new BasicVariation("", "C", -10, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + var.getAlternativeBaseForSNP(); + } + + @Test + public void testGetAlternativeBaseForSNP() { + // test SNP's + BasicVariation var = new BasicVariation("CA", "C", 0, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + Assert.assertEquals('A', var.getAlternativeBaseForSNP()); + var = new BasicVariation("AC", "C", 0, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + Assert.assertEquals('A', var.getAlternativeBaseForSNP()); + var = new BasicVariation("AA", "C", 0, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + Assert.assertEquals('A', var.getAlternativeBaseForSNP()); + } + + @Test + public void testGetAlleleList() { + BasicVariation var = new BasicVariation("CA", "C", 0, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + Assert.assertTrue(var.getAlleleList().size() == 2); + Assert.assertTrue(var.getAlleleList().contains("C")); + Assert.assertTrue(var.getAlleleList().contains("A")); + + var = new BasicVariation("AC", "C", 0, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + Assert.assertTrue(var.getAlleleList().size() == 2); + Assert.assertTrue(var.getAlleleList().contains("C")); + Assert.assertTrue(var.getAlleleList().contains("A")); + + var = new BasicVariation("AA", "C", 0, GenomeLocParser.createGenomeLoc(1, 1, 1), 1.22); + Assert.assertTrue(var.getAlleleList().size() == 2); + Assert.assertTrue(var.getAlleleList().get(0).equals("A")); + Assert.assertTrue(var.getAlleleList().get(1).equals("A")); + } +}