From b4adb5133aca8950a45641b8f6c0d569dabb61df Mon Sep 17 00:00:00 2001 From: aaron Date: Tue, 21 Jul 2009 00:55:52 +0000 Subject: [PATCH] GLF rod as a AllelicVariant object. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1282 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/refdata/RefMetaDataTracker.java | 9 +- .../sting/gatk/refdata/RodGLF.java | 450 ++++++++++++++---- .../varianteval/VariantEvalWalker.java | 58 +-- .../sting/utils/genotype/glf/GLFRecord.java | 12 +- .../sting/gatk/refdata/RodGLFTest.java | 241 +++++++--- .../utils/genotype/glf/GLFReaderTest.java | 2 + 6 files changed, 573 insertions(+), 199 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 0b3074693..949c1c7c5 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -2,9 +2,8 @@ package org.broadinstitute.sting.gatk.refdata; import org.apache.log4j.Logger; -import java.util.HashMap; -import java.util.List; import java.util.Collection; +import java.util.HashMap; /** * This class represents the Reference Metadata available at a particular site in the genome. It can be @@ -70,10 +69,10 @@ public class RefMetaDataTracker { /** * Is there a binding at this site to a ROD with name? * - * @param name - * @return + * @param name the name of the rod + * @return true if it has the rod */ - public Object hasROD(final String name) { + public boolean hasROD(final String name) { return map.containsKey(canonicalName(name)); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java index cc00c61a1..ce0b94bd3 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java @@ -2,14 +2,20 @@ package org.broadinstitute.sting.gatk.refdata; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import org.broadinstitute.sting.utils.genotype.glf.GLFReader; import org.broadinstitute.sting.utils.genotype.glf.GLFRecord; import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall; import org.broadinstitute.sting.utils.genotype.glf.VariableLengthCall; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; +import java.util.ArrayList; import java.util.Iterator; +import java.util.List; /** @@ -19,109 +25,385 @@ import java.util.Iterator; *

* the rod class for GLF data. */ -public class RodGLF extends BasicReferenceOrderedDatum { - public GLFRecord mRecord; +public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator { + static int count = 0; + public GLFReader mReader; + private final String mName; private GenomeLoc mLoc; - private static GLFRODIterator mWrap; - private String contig; - private int contigLength; + public GLFRecord mRecord; public RodGLF(String name) { - super(name); + mName = name; } + /** + * get the name + * @return the name + */ + @Override + public String getName() { + return mName; + } + + /** + * Backdoor hook to read header, meta-data, etc. associated with the file. Will be + * called by the ROD system before streaming starts + * + * @param source source data file on disk from which this rod stream will be pulled + * + * @return a header object that will be passed to parseLine command + */ + @Override + public Object initialize(File source) throws FileNotFoundException { + mReader = new GLFReader(source); + return null; + } + + @Override + public String toSimpleString() { + return toString(); + } + + /** + * @return a string representation of the ROD GLF object + */ + public String toString() { + return String.format("%s\t%d\t%s\t%d\t%d\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f", + mLoc.getContig(), + mLoc.getStart(), + mRecord.getRefBase(), + mRecord.getReadDepth(), + mRecord.getRmsMapQ(), + getBestGenotypeValue(1), + ((SinglePointCall)mRecord).getLikelihoods()[0], + ((SinglePointCall)mRecord).getLikelihoods()[1], + ((SinglePointCall)mRecord).getLikelihoods()[2], + ((SinglePointCall)mRecord).getLikelihoods()[3], + ((SinglePointCall)mRecord).getLikelihoods()[4], + ((SinglePointCall)mRecord).getLikelihoods()[5], + ((SinglePointCall)mRecord).getLikelihoods()[6], + ((SinglePointCall)mRecord).getLikelihoods()[7], + ((SinglePointCall)mRecord).getLikelihoods()[8], + ((SinglePointCall)mRecord).getLikelihoods()[9] + + + ); + } + + @Override + public String repl() { + return this.toString(); + } + + /** + * Used by the ROD system to determine how to split input lines + * + * @return Regex string delimiter separating fields + */ + @Override + public String delimiterRegex() { + return ""; + } + + /** + * return a genome loc representing the current location + * @return the geonome loc + */ + @Override + public GenomeLoc getLocation() { + return mLoc; + } + + /** + * Returns bases in the reference allele as a String. String can be empty (as in insertion into + * the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters + * (for longer indels). + * + * @return reference allele, forward strand + */ + @Override + public String getRefBasesFWD() { + if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) return ""; + return String.valueOf(mRecord.getRefBase()); + } + + /** + * Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException + * if variant is not a SNP. + * + * This doesn't make much sense to me, what if the best genotype is hom non-ref? + * + * @return reference base on the forward strand + */ + @Override + public char getRefSnpFWD() throws IllegalStateException { + if (!isSNP()) throw new IllegalStateException("Current GLF Record is not a SNP"); + return mRecord.getRefBase().toChar(); + } + + /** + * Returns bases in the alternative allele as a String. String can be empty (as in deletion from + * the reference), can contain a single character (as in SNP or one-base insertion), or multiple characters + * (for longer indels). + * + * @return alternative allele, forward strand + */ + @Override + public String getAltBasesFWD() { + return getBestGenotype(2).toString(); + } + + /** + * Returns alternative (minor) allele base for a SNP variant as a character; should throw IllegalStateException + * if variant is not a SNP. + * + * @return alternative allele base on the forward starnd + */ + @Override + public char getAltSnpFWD() throws IllegalStateException { + if (!isSNP()) { + throw new IllegalStateException("Not a SNP"); + } + String str = getBestGenotype(1).toString(); + if (String.valueOf(str.charAt(0)).equals(mRecord.getRefBase().toString())) { + return str.charAt(1); + } + return str.charAt(0); + } + + /** + * Returns true if all observed alleles are reference alleles. All is methods (where Variant=SNP,Insertion, etc) should + * return false at such site to ensure consistency. This method is included for use with genotyping calls (isGenotype()==true), it makes + * no sense for, e.g. dbSNP and should return false for the latter. + * + * @return + */ + @Override + public boolean isReference() { + return (!isSNP()); + } + + /** + * Is this variant a SNP? + * + * @return true or false + */ + @Override + public boolean isSNP() { + return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.SINGLE) && + (!getBestGenotype(1).toString().equals(refString(mRecord.getRefBase().toChar())))); + } + + /** + * return a string representing the reference + * @param ref the reference character + * @return a string for the homozygous ref in a diploid + */ + private static String refString(char ref) { + return new String(new char[]{ref, ref}); + } + + /** + * Get the nth best genotype (one based), i.e. to get the best genotype pass in 1, + * the second best 2, etdc. + * + * @param nthBest the nth best genotype to get + * + * @return a GENOTYPE object representing the nth best genotype + */ + public LikelihoodObject.GENOTYPE getBestGenotype(int nthBest) { + Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods()); + return LikelihoodObject.GENOTYPE.values()[sorted[nthBest-1]]; + } + + /** + * Get the nth best genotype value (one based), i.e. to get the best genotype pass in 1, + * the second best 2, etdc. + * + * @param nthBest the nth best genotype value to get + * + * @return a GENOTYPE object representing the nth best genotype + */ + public double getBestGenotypeValue(int nthBest) { + Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods()); + return (((SinglePointCall) mRecord).getLikelihoods())[sorted[nthBest-1]]; + } + + /** + * Is this variant an insertion? The contract requires isIndel() to return true + * if this method returns true. + * + * @return true or false + */ + @Override + public boolean isInsertion() { + return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) && + ((VariableLengthCall) mRecord).getIndelLen1() > 0); + } + + /** + * Is this variant a deletion? The contract requires isIndel() to return true + * if isDeletion() returns true. + * + * @return true or false + */ + @Override + public boolean isDeletion() { + return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) && + ((VariableLengthCall) mRecord).getIndelLen1() < 0); + } + + /** + * Is this variant an insertion or a deletion? The contract requires + * this to be true if either isInsertion() or isDeletion() returns true. However, + * this method is currently allowed to return true even if neither of isInsertion() + * and isDeletion() does. + * + * @return + */ + @Override + public boolean isIndel() { + return (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE); + } + + /** + * Returns minor allele frequency. + * + * @return + */ + @Override + public double getMAF() { + throw new UnsupportedOperationException("getMAF is unsupported"); + } + + /** + * Returns heterozygosity, a more accessible general feature of a variant. + * + * @return + */ + @Override + public double getHeterozygosity() { + throw new UnsupportedOperationException("getHeterozygosity is unsupported"); + } + + /** + * Is this variant an actual genotype (such as individual call from sequencing, HapMap chip etc), or + * population allelic variant (call from pooled sequencing, dbSNP site etc). Only if variant is a genotype, there + * is a meaningful question of, e.g., whether it is a het, or homogeneous non-ref. + * + * @return true if this variant is an actual genotype. + */ + @Override + public boolean isGenotype() { + return true; + } + + /** + * Returns phred-mapped confidence in variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref). + * + * @return + */ + @Override + public double getVariationConfidence() { + String ref = new String() + mRecord.getRefBase() + mRecord.getRefBase(); + int index = 0; + for (LikelihoodObject.GENOTYPE g: LikelihoodObject.GENOTYPE.values()) { + if (g.toString().equals(ref)) break; + index++; + } + return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall)mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR; + } + + /** + * Returns phred-mapped confidence in called genotype (e.g. MAQ's consensus confidence, or AlleleCaller's + * best vs next-best. + * + * @return + */ + @Override + public double getConsensusConfidence() { + return Math.abs(getBestGenotypeValue(1) - getBestGenotypeValue(2)) / GLFRecord.LIKELIHOOD_SCALE_FACTOR; + } + + /** + * Returns actual observed genotype. Allowed to return more than two alleles (@see #getPloidy()). If this variant + * is not a genotype, should throw an IllegalStateException. + * + * @return + */ + @Override + public List getGenotype() throws IllegalStateException { + List ret = new ArrayList(); + ret.add(this.getBestGenotype(1).toString()); + return ret; + } + + /** + * Return actual number of observed alleles (chromosomes) in the genotype. If this variant is not a genotype, + * should throw IllegalStateException. + * + * @return + */ + @Override + public int getPloidy() throws IllegalStateException { + return 2; // we're so haploid it hurts + } + + /** + * Returns true if the site has at most two known or observed alleles (ref and non-ref), or false if there are > 2 allelic variants known or observed. When + * the implementing class is a genotype, alleles should be always counted including the reference allele whether it was observed in the particular + * individual or not: i.e. if the reference is 'C', then both 'CA' and 'AA' genotypes must be reported as bi-allelic, while 'AT' is not bi-allelic (since there are + * two allelic variants, 'A' and 'T' in addition to the (known) reference variant 'C'). + * + * @return + */ + @Override + public boolean isBiallelic() { + return false; + } + + @Override + public int compareTo(ReferenceOrderedDatum that) { + return this.mLoc.compareTo(that.getLocation()); + } + + /** + * the parse line, which is not used by the GLF rod + * @param header the header to pass in + * @param parts the string object + * @return false, alwayss + * @throws java.io.IOException + */ @Override public boolean parseLine(Object header, String[] parts) throws IOException { return false; //To change body of implemented methods use File | Settings | File Templates. } @Override - public String toString() { - final StringBuilder builder = new StringBuilder(); - builder.append(contig); - builder.append("\t"); - builder.append(mRecord.getOffset()); - builder.append("\t"); - builder.append(mRecord.getRefBase()); - builder.append("\t"); - builder.append(mRecord.getReadDepth()); - builder.append("\t"); - builder.append(mRecord.getRmsMapQ()); - builder.append("\t"); - builder.append(mRecord.getMinimumLikelihood()); - - if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.SINGLE) { - for (double d : ((SinglePointCall) mRecord).getLikelihoods()) { - builder.append("\t"); - builder.append(d); - } - } else if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) { - VariableLengthCall call = (VariableLengthCall) mRecord; - builder.append("\t"); - builder.append(call.getLkHom1()); - builder.append("\t"); - builder.append(call.getLkHom2()); - builder.append("\t"); - builder.append(call.getLkHet()); - builder.append("\t"); - builder.append(call.getIndelLen1()); - builder.append("\t"); - builder.append(call.getIndelSeq1()); - builder.append("\t"); - builder.append(call.getIndelLen2()); - builder.append("\t"); - builder.append(call.getIndelSeq2()); - } - return builder.toString(); + public boolean hasNext() { + return (mReader.hasNext()); } @Override - public GenomeLoc getLocation() { - return mLoc; + public RodGLF next() { + mRecord = mReader.next(); + mLoc = GenomeLocParser.createGenomeLoc(mReader.getReferenceName(), mReader.getCurrentLocation(), mReader.getCurrentLocation()); + return this; } - public static Iterator createIterator(String name, File file) { - if (mWrap == null) { - return new RodGLF.GLFWrapper(name, file); - } - return mWrap; + @Override + public void remove() { + throw new UnsupportedOperationException("GLF Rods don't support the remove() function"); } - static void setGLFWrapper(GLFRODIterator iter) { - mWrap = iter; - } - - private static class GLFWrapper implements GLFRODIterator { - private GLFReader mReader; - private String mName; - - public GLFWrapper(String name, File file) { - mName = name; - mReader = new GLFReader(file); - } - - @Override - public boolean hasNext() { - return (mReader.hasNext()); - } - - @Override - public RodGLF next() { - RodGLF glf = new RodGLF(mName); - glf.mRecord = mReader.next(); - glf.mLoc = GenomeLocParser.createGenomeLoc(mReader.getReferenceName(), mReader.getCurrentLocation()); - glf.contig = mReader.getReferenceName(); - glf.contigLength = mReader.getReferenceLength(); - return glf; - } - - @Override - public void remove() { - throw new UnsupportedOperationException("GLF Rods don't support the remove() function"); + public static RodGLF createIterator(String name, File file) { + RodGLF glf = new RodGLF(name); + try { + glf.initialize(file); + } catch (FileNotFoundException e) { + throw new StingException("Unable to find file " + file); } + return glf; } } -// for testing -interface GLFRODIterator extends Iterator { -} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index b892b2479..cf6b1e76d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -1,18 +1,19 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.gatk.refdata.AllelicVariant; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes; +import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotype.glf.GLFRecord; -import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.cmdLine.Argument; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.PrintStream; import java.util.*; -import java.io.*; /** * The Broad Institute @@ -146,50 +147,9 @@ public class VariantEvalWalker extends RefWalker { } } - // OMG this is painful - private rodVariants glf2geli(char ref, RodGLF glf) { - SinglePointCall rec = (SinglePointCall)glf.mRecord; - // contig pos refBase depth maxMappingQ bestGenotype lodBtr lodBtnb genotypes - Integer[] sorted = Utils.SortPermutation(rec.getLikelihoods()); - int bestIndex = sorted[0]; - char[] refs = {ref, ref}; - String homRef = new String(refs); - int refIndex = rodVariants.Genotype.valueOf(homRef).ordinal(); - double refLikelihood = rec.getLikelihoods()[refIndex]; - double bestLikelihood = rec.getLikelihoods()[bestIndex]; - double secondBestLikelihood = rec.getLikelihoods()[sorted[1]]; - - rodVariants var = new rodVariants("eval"); - var.loc = glf.getLocation(); - var.refBase = ref; - var.depth = rec.getReadDepth(); - var.maxMappingQuality = rec.getRmsMapQ(); - var.bestGenotype = rodVariants.Genotype.values()[bestIndex].toString(); - var.lodBtr = Math.abs((bestLikelihood - refLikelihood) / GLFRecord.LIKELIHOOD_SCALE_FACTOR); - var.lodBtnb = Math.abs((bestLikelihood - secondBestLikelihood) / GLFRecord.LIKELIHOOD_SCALE_FACTOR); - var.genotypeLikelihoods = rec.getLikelihoods(); - for ( int i = 0; i < var.genotypeLikelihoods.length; i++ ) - var.genotypeLikelihoods[i] /= GLFRecord.LIKELIHOOD_SCALE_FACTOR; - - if ( false ) { - System.out.printf("Converting : %s%n", glf); - System.out.printf(" homRef: %s%n", homRef); - System.out.printf(" refindex : %d%n", refIndex); - System.out.printf(" bestIndex : %d%n", sorted[0]); - System.out.printf(" 2ndindex : %d%n", sorted[1]); - System.out.printf(" => %s%n", var); - } - - return var; - } - public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) { nSites++; - if ( tracker.lookup("eval", null) instanceof RodGLF) { - tracker.bind("eval", glf2geli(ref, (RodGLF)tracker.lookup("eval", null))); - } - // Iterate over each analysis, and update it AllelicVariant eval = (AllelicVariant)tracker.lookup("eval", null); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java index cf0c27aec..a38a0db1a 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java @@ -2,10 +2,6 @@ package org.broadinstitute.sting.utils.genotype.glf; import net.sf.samtools.util.BinaryCodec; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.gatk.refdata.AllelicVariant; - -import java.util.List; -import java.util.Arrays; /* @@ -87,6 +83,14 @@ public abstract class GLFRecord { fieldValue = value; } + /** + * return the character representation + * @return the char for the reference base + */ + public char toChar() { + return this.toString().charAt(0); + } + /** * static method from returning a REF_BASE given the character representation * diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java index fd8b2e1c7..1e4bf2601 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java @@ -5,80 +5,55 @@ import net.sf.picard.reference.ReferenceSequenceFileFactory; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall; +import org.broadinstitute.sting.utils.genotype.GenotypeWriter; +import org.broadinstitute.sting.utils.genotype.LikelihoodObject; +import org.broadinstitute.sting.utils.genotype.glf.GLFWriter; +import org.junit.Assert; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import java.io.File; -import java.util.Iterator; +import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. - * User: aaronmckenna + * User: aaron * Date: Jul 15, 2009 * Time: 12:18:50 AM + *

+ * These tests work upon a very small data set, with the following samtools glfview dump: + *

+ * chrM 1 A 5 20 0 0 127 127 127 254 254 254 254 254 254 + * chrM 2 A 5 20 0 254 254 254 127 254 254 127 254 127 0 + * chrM 3 A 5 20 0 254 127 254 254 0 127 127 254 254 254 + *

+ * You'll notice that the first is a hom ref, and the other two are hom alt SNP's */ public class RodGLFTest extends BaseTest { - static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test_likelihoods.glf"); - static final int finalRecordCount = 484140; // the number of records in the above file - static final int contigCount = 25; + static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/glfTestFile.glf"); + static final int finalRecordCount = 3; // the number of records in the above file + static final int contigCount = 1; static final String ref = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"; + static ReferenceSequenceFile r; + private RodGLF iter = null; @BeforeClass public static void before() { - ReferenceSequenceFile r = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(ref)); + r = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(ref)); GenomeLocParser.setupRefContigOrdering(r); } - static class TestRODClass implements GLFRODIterator { - private int count = 0; - private int readCount = 0; - - public TestRODClass(int count) { - this.count = count; - } - - @Override - public boolean hasNext() { - if (count <= 0) { - return false; - } - return true; - } - - @Override - public RodGLF next() { - RodGLF glf = new RodGLF("Test"); - glf.mRecord = new SinglePointCall('A', readCount, 0, (short) 0, new double[]{0.0, 0.0}); - --count; - readCount++; - return glf; - } - - @Override - public void remove() { - throw new UnsupportedOperationException("BOO"); - } - } - - //@Test - public void testNext() { - RodGLF.setGLFWrapper(new RodGLFTest.TestRODClass(10)); - Iterator iter = RodGLF.createIterator("test", new File("")); - int counter = 0; - while (iter.hasNext()) { - iter.next(); - counter++; - } - assertEquals(10, counter); + @Before + public void setup() { + iter = RodGLF.createIterator("test", glfFile); } @Test public void testRodCount() { - - Iterator iter = RodGLF.createIterator("test", glfFile); int counter = 0; while (iter.hasNext()) { RodGLF glf = iter.next(); @@ -87,21 +62,173 @@ public class RodGLFTest extends BaseTest { assertEquals(finalRecordCount, counter); } + /** make sure we're returning true to biallelic */ + @Test + public void testIsBiallelic() { + RodGLF glf = iter.next(); + Assert.assertFalse(iter.isBiallelic()); + } + + // best vs next-best + @Test + public void testGetConsensusConfidence() { + RodGLF glf = iter.next(); + assertEquals(120.0, iter.getConsensusConfidence(), 0.005); + glf = iter.next(); + assertEquals(120.0, iter.getConsensusConfidence(), 0.005); + } + + // best vs. ref + @Test + public void testGetVariationConfidence() { + RodGLF glf = iter.next(); + assertEquals(0.0, iter.getVariationConfidence(), 0.005); + glf = iter.next(); + assertEquals(250.0, iter.getVariationConfidence(), 0.005); + } + + + @Test + public void testGetGenotype() { + RodGLF glf = iter.next(); + List str = iter.getGenotype(); + Assert.assertTrue(str.get(0).equals("AA")); + } + + @Test + public void testIsSNP() { + RodGLF glf = iter.next(); + Assert.assertFalse(iter.isSNP()); + glf = iter.next(); + Assert.assertTrue(iter.isSNP()); + glf = iter.next(); + Assert.assertTrue(iter.isSNP()); + } + + @Test + public void testIsReference() { + RodGLF glf = iter.next(); + Assert.assertTrue(iter.isReference()); + glf = iter.next(); + Assert.assertFalse(iter.isReference()); + glf = iter.next(); + Assert.assertFalse(iter.isReference()); + } + + @Test(expected = IllegalStateException.class) + public void testGetAltSnpFWDIllegalException() { + RodGLF glf = iter.next(); + iter.getAltSnpFWD(); + } + + + + + @Test + public void testCompareTo() { + RodGLF iter2 = RodGLF.createIterator("test", glfFile); + RodGLF glf = iter.next(); + glf = iter2.next(); + assertEquals(0, iter.compareTo(iter2)); + RodGLF glf2 = iter.next(); + assertEquals(-1, iter2.compareTo(iter)); + assertEquals(1, iter.compareTo(iter2)); + + } + + @Test + public void testGetAltSnpFWD() { + RodGLF glf = iter.next(); + glf = iter.next(); + Assert.assertEquals('T', iter.getAltSnpFWD()); + } + + @Test + public void testGetRefSnpFWD() { + RodGLF glf = iter.next(); + glf = iter.next(); + Assert.assertEquals('A', iter.getRefSnpFWD()); + } + + @Test + public void testGetRefBasesFWD() { + RodGLF glf = iter.next(); + Assert.assertTrue("A".equals(iter.getRefBasesFWD())); + glf = iter.next(); + Assert.assertTrue("A".equals(iter.getRefBasesFWD())); + } + + /** + * move to the second and third bases, and check that the + * alternate bases are correct. + */ + @Test + public void testGetAltBasesFWD() { + RodGLF glf = iter.next(); + glf = iter.next(); + Assert.assertTrue("GT".equals(iter.getAltBasesFWD())); + glf = iter.next(); + Assert.assertTrue("CT".equals(iter.getAltBasesFWD())); + + } + @Test public void testRodLocations() { - - Iterator iter = RodGLF.createIterator("test", glfFile); GenomeLoc loc = null; while (iter.hasNext()) { RodGLF glf = iter.next(); if (loc != null) { - if (glf.getLocation().isBefore(loc)) { - fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + glf.getLocation().toString()); + if (iter.getLocation().isBefore(loc)) { + fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + iter.getLocation().toString()); } } - loc = glf.getLocation(); - } + loc = iter.getLocation(); + } + } + + //@Test + /** + * create the example glf file for the test, you can uncomment the above test line to have this + * test run, regenerating the file. + */ + public void createRodFile() { + GenotypeWriter writer = new GLFWriter("", new File("glfTestFile.glf")); + int location = 1; + int x = 0; + writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 1, 20, 'A', 5, createLikelihood('A')); + writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 2, 20, 'A', 5, createLikelihood('T')); + writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 3, 20, 'A', 5, createLikelihood('C')); + writer.close(); + } + + /** + * create a likelihood object, given the appropriate reference base + * + * @param ref the reference base + * + * @return the likelihood object + */ + private LikelihoodObject createLikelihood(char ref) { + ArrayList vals = new ArrayList(); + for (LikelihoodObject.GENOTYPE type : LikelihoodObject.GENOTYPE.values()) { + double x = (type.toString().charAt(0) == ref) ? 0 : 127 - (10 * Math.random()); + x += (type.toString().charAt(1) == ref) ? 0 : 127 - (10 * Math.random()); + vals.add(x); + } + double ret[] = new double[vals.size()]; + for (int x = 0; x < vals.size(); x++) { + ret[x] = vals.get(x); + } + return new LikelihoodObject(ret, LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG); } -} + /** + * just make sure that we do get a string back, and no exceptions are thrown + */ + @Test + public void testToString() { + RodGLF glf = iter.next(); + iter.toString(); + } +} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFReaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFReaderTest.java index b0dd75c14..8923de939 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFReaderTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFReaderTest.java @@ -21,7 +21,9 @@ public class GLFReaderTest extends BaseTest { // our test file static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test_likelihoods.glf"); + //static final File glfFile = new File("CALLS.glf"); static final int finalRecordCount = 484140; // the number of records in the above file + //static final int finalRecordCount = 484445; static final int contigCount = 25; /** read in the records from the file */