diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java
index 0b3074693..949c1c7c5 100644
--- a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java
@@ -2,9 +2,8 @@ package org.broadinstitute.sting.gatk.refdata;
import org.apache.log4j.Logger;
-import java.util.HashMap;
-import java.util.List;
import java.util.Collection;
+import java.util.HashMap;
/**
* This class represents the Reference Metadata available at a particular site in the genome. It can be
@@ -70,10 +69,10 @@ public class RefMetaDataTracker {
/**
* Is there a binding at this site to a ROD with name?
*
- * @param name
- * @return
+ * @param name the name of the rod
+ * @return true if it has the rod
*/
- public Object hasROD(final String name) {
+ public boolean hasROD(final String name) {
return map.containsKey(canonicalName(name));
}
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java
index cc00c61a1..ce0b94bd3 100644
--- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java
@@ -2,14 +2,20 @@ package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
import org.broadinstitute.sting.utils.genotype.glf.VariableLengthCall;
import java.io.File;
+import java.io.FileNotFoundException;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Iterator;
+import java.util.List;
/**
@@ -19,109 +25,385 @@ import java.util.Iterator;
*
* the rod class for GLF data.
*/
-public class RodGLF extends BasicReferenceOrderedDatum {
- public GLFRecord mRecord;
+public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator {
+ static int count = 0;
+ public GLFReader mReader;
+ private final String mName;
private GenomeLoc mLoc;
- private static GLFRODIterator mWrap;
- private String contig;
- private int contigLength;
+ public GLFRecord mRecord;
public RodGLF(String name) {
- super(name);
+ mName = name;
}
+ /**
+ * get the name
+ * @return the name
+ */
+ @Override
+ public String getName() {
+ return mName;
+ }
+
+ /**
+ * Backdoor hook to read header, meta-data, etc. associated with the file. Will be
+ * called by the ROD system before streaming starts
+ *
+ * @param source source data file on disk from which this rod stream will be pulled
+ *
+ * @return a header object that will be passed to parseLine command
+ */
+ @Override
+ public Object initialize(File source) throws FileNotFoundException {
+ mReader = new GLFReader(source);
+ return null;
+ }
+
+ @Override
+ public String toSimpleString() {
+ return toString();
+ }
+
+ /**
+ * @return a string representation of the ROD GLF object
+ */
+ public String toString() {
+ return String.format("%s\t%d\t%s\t%d\t%d\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f",
+ mLoc.getContig(),
+ mLoc.getStart(),
+ mRecord.getRefBase(),
+ mRecord.getReadDepth(),
+ mRecord.getRmsMapQ(),
+ getBestGenotypeValue(1),
+ ((SinglePointCall)mRecord).getLikelihoods()[0],
+ ((SinglePointCall)mRecord).getLikelihoods()[1],
+ ((SinglePointCall)mRecord).getLikelihoods()[2],
+ ((SinglePointCall)mRecord).getLikelihoods()[3],
+ ((SinglePointCall)mRecord).getLikelihoods()[4],
+ ((SinglePointCall)mRecord).getLikelihoods()[5],
+ ((SinglePointCall)mRecord).getLikelihoods()[6],
+ ((SinglePointCall)mRecord).getLikelihoods()[7],
+ ((SinglePointCall)mRecord).getLikelihoods()[8],
+ ((SinglePointCall)mRecord).getLikelihoods()[9]
+
+
+ );
+ }
+
+ @Override
+ public String repl() {
+ return this.toString();
+ }
+
+ /**
+ * Used by the ROD system to determine how to split input lines
+ *
+ * @return Regex string delimiter separating fields
+ */
+ @Override
+ public String delimiterRegex() {
+ return "";
+ }
+
+ /**
+ * return a genome loc representing the current location
+ * @return the geonome loc
+ */
+ @Override
+ public GenomeLoc getLocation() {
+ return mLoc;
+ }
+
+ /**
+ * Returns bases in the reference allele as a String. String can be empty (as in insertion into
+ * the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters
+ * (for longer indels).
+ *
+ * @return reference allele, forward strand
+ */
+ @Override
+ public String getRefBasesFWD() {
+ if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) return "";
+ return String.valueOf(mRecord.getRefBase());
+ }
+
+ /**
+ * Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException
+ * if variant is not a SNP.
+ *
+ * This doesn't make much sense to me, what if the best genotype is hom non-ref?
+ *
+ * @return reference base on the forward strand
+ */
+ @Override
+ public char getRefSnpFWD() throws IllegalStateException {
+ if (!isSNP()) throw new IllegalStateException("Current GLF Record is not a SNP");
+ return mRecord.getRefBase().toChar();
+ }
+
+ /**
+ * Returns bases in the alternative allele as a String. String can be empty (as in deletion from
+ * the reference), can contain a single character (as in SNP or one-base insertion), or multiple characters
+ * (for longer indels).
+ *
+ * @return alternative allele, forward strand
+ */
+ @Override
+ public String getAltBasesFWD() {
+ return getBestGenotype(2).toString();
+ }
+
+ /**
+ * Returns alternative (minor) allele base for a SNP variant as a character; should throw IllegalStateException
+ * if variant is not a SNP.
+ *
+ * @return alternative allele base on the forward starnd
+ */
+ @Override
+ public char getAltSnpFWD() throws IllegalStateException {
+ if (!isSNP()) {
+ throw new IllegalStateException("Not a SNP");
+ }
+ String str = getBestGenotype(1).toString();
+ if (String.valueOf(str.charAt(0)).equals(mRecord.getRefBase().toString())) {
+ return str.charAt(1);
+ }
+ return str.charAt(0);
+ }
+
+ /**
+ * Returns true if all observed alleles are reference alleles. All is methods (where Variant=SNP,Insertion, etc) should
+ * return false at such site to ensure consistency. This method is included for use with genotyping calls (isGenotype()==true), it makes
+ * no sense for, e.g. dbSNP and should return false for the latter.
+ *
+ * @return
+ */
+ @Override
+ public boolean isReference() {
+ return (!isSNP());
+ }
+
+ /**
+ * Is this variant a SNP?
+ *
+ * @return true or false
+ */
+ @Override
+ public boolean isSNP() {
+ return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.SINGLE) &&
+ (!getBestGenotype(1).toString().equals(refString(mRecord.getRefBase().toChar()))));
+ }
+
+ /**
+ * return a string representing the reference
+ * @param ref the reference character
+ * @return a string for the homozygous ref in a diploid
+ */
+ private static String refString(char ref) {
+ return new String(new char[]{ref, ref});
+ }
+
+ /**
+ * Get the nth best genotype (one based), i.e. to get the best genotype pass in 1,
+ * the second best 2, etdc.
+ *
+ * @param nthBest the nth best genotype to get
+ *
+ * @return a GENOTYPE object representing the nth best genotype
+ */
+ public LikelihoodObject.GENOTYPE getBestGenotype(int nthBest) {
+ Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
+ return LikelihoodObject.GENOTYPE.values()[sorted[nthBest-1]];
+ }
+
+ /**
+ * Get the nth best genotype value (one based), i.e. to get the best genotype pass in 1,
+ * the second best 2, etdc.
+ *
+ * @param nthBest the nth best genotype value to get
+ *
+ * @return a GENOTYPE object representing the nth best genotype
+ */
+ public double getBestGenotypeValue(int nthBest) {
+ Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
+ return (((SinglePointCall) mRecord).getLikelihoods())[sorted[nthBest-1]];
+ }
+
+ /**
+ * Is this variant an insertion? The contract requires isIndel() to return true
+ * if this method returns true.
+ *
+ * @return true or false
+ */
+ @Override
+ public boolean isInsertion() {
+ return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) &&
+ ((VariableLengthCall) mRecord).getIndelLen1() > 0);
+ }
+
+ /**
+ * Is this variant a deletion? The contract requires isIndel() to return true
+ * if isDeletion() returns true.
+ *
+ * @return true or false
+ */
+ @Override
+ public boolean isDeletion() {
+ return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) &&
+ ((VariableLengthCall) mRecord).getIndelLen1() < 0);
+ }
+
+ /**
+ * Is this variant an insertion or a deletion? The contract requires
+ * this to be true if either isInsertion() or isDeletion() returns true. However,
+ * this method is currently allowed to return true even if neither of isInsertion()
+ * and isDeletion() does.
+ *
+ * @return
+ */
+ @Override
+ public boolean isIndel() {
+ return (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE);
+ }
+
+ /**
+ * Returns minor allele frequency.
+ *
+ * @return
+ */
+ @Override
+ public double getMAF() {
+ throw new UnsupportedOperationException("getMAF is unsupported");
+ }
+
+ /**
+ * Returns heterozygosity, a more accessible general feature of a variant.
+ *
+ * @return
+ */
+ @Override
+ public double getHeterozygosity() {
+ throw new UnsupportedOperationException("getHeterozygosity is unsupported");
+ }
+
+ /**
+ * Is this variant an actual genotype (such as individual call from sequencing, HapMap chip etc), or
+ * population allelic variant (call from pooled sequencing, dbSNP site etc). Only if variant is a genotype, there
+ * is a meaningful question of, e.g., whether it is a het, or homogeneous non-ref.
+ *
+ * @return true if this variant is an actual genotype.
+ */
+ @Override
+ public boolean isGenotype() {
+ return true;
+ }
+
+ /**
+ * Returns phred-mapped confidence in variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref).
+ *
+ * @return
+ */
+ @Override
+ public double getVariationConfidence() {
+ String ref = new String() + mRecord.getRefBase() + mRecord.getRefBase();
+ int index = 0;
+ for (LikelihoodObject.GENOTYPE g: LikelihoodObject.GENOTYPE.values()) {
+ if (g.toString().equals(ref)) break;
+ index++;
+ }
+ return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall)mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
+ }
+
+ /**
+ * Returns phred-mapped confidence in called genotype (e.g. MAQ's consensus confidence, or AlleleCaller's
+ * best vs next-best.
+ *
+ * @return
+ */
+ @Override
+ public double getConsensusConfidence() {
+ return Math.abs(getBestGenotypeValue(1) - getBestGenotypeValue(2)) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
+ }
+
+ /**
+ * Returns actual observed genotype. Allowed to return more than two alleles (@see #getPloidy()). If this variant
+ * is not a genotype, should throw an IllegalStateException.
+ *
+ * @return
+ */
+ @Override
+ public List getGenotype() throws IllegalStateException {
+ List ret = new ArrayList();
+ ret.add(this.getBestGenotype(1).toString());
+ return ret;
+ }
+
+ /**
+ * Return actual number of observed alleles (chromosomes) in the genotype. If this variant is not a genotype,
+ * should throw IllegalStateException.
+ *
+ * @return
+ */
+ @Override
+ public int getPloidy() throws IllegalStateException {
+ return 2; // we're so haploid it hurts
+ }
+
+ /**
+ * Returns true if the site has at most two known or observed alleles (ref and non-ref), or false if there are > 2 allelic variants known or observed. When
+ * the implementing class is a genotype, alleles should be always counted including the reference allele whether it was observed in the particular
+ * individual or not: i.e. if the reference is 'C', then both 'CA' and 'AA' genotypes must be reported as bi-allelic, while 'AT' is not bi-allelic (since there are
+ * two allelic variants, 'A' and 'T' in addition to the (known) reference variant 'C').
+ *
+ * @return
+ */
+ @Override
+ public boolean isBiallelic() {
+ return false;
+ }
+
+ @Override
+ public int compareTo(ReferenceOrderedDatum that) {
+ return this.mLoc.compareTo(that.getLocation());
+ }
+
+ /**
+ * the parse line, which is not used by the GLF rod
+ * @param header the header to pass in
+ * @param parts the string object
+ * @return false, alwayss
+ * @throws java.io.IOException
+ */
@Override
public boolean parseLine(Object header, String[] parts) throws IOException {
return false; //To change body of implemented methods use File | Settings | File Templates.
}
@Override
- public String toString() {
- final StringBuilder builder = new StringBuilder();
- builder.append(contig);
- builder.append("\t");
- builder.append(mRecord.getOffset());
- builder.append("\t");
- builder.append(mRecord.getRefBase());
- builder.append("\t");
- builder.append(mRecord.getReadDepth());
- builder.append("\t");
- builder.append(mRecord.getRmsMapQ());
- builder.append("\t");
- builder.append(mRecord.getMinimumLikelihood());
-
- if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.SINGLE) {
- for (double d : ((SinglePointCall) mRecord).getLikelihoods()) {
- builder.append("\t");
- builder.append(d);
- }
- } else if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) {
- VariableLengthCall call = (VariableLengthCall) mRecord;
- builder.append("\t");
- builder.append(call.getLkHom1());
- builder.append("\t");
- builder.append(call.getLkHom2());
- builder.append("\t");
- builder.append(call.getLkHet());
- builder.append("\t");
- builder.append(call.getIndelLen1());
- builder.append("\t");
- builder.append(call.getIndelSeq1());
- builder.append("\t");
- builder.append(call.getIndelLen2());
- builder.append("\t");
- builder.append(call.getIndelSeq2());
- }
- return builder.toString();
+ public boolean hasNext() {
+ return (mReader.hasNext());
}
@Override
- public GenomeLoc getLocation() {
- return mLoc;
+ public RodGLF next() {
+ mRecord = mReader.next();
+ mLoc = GenomeLocParser.createGenomeLoc(mReader.getReferenceName(), mReader.getCurrentLocation(), mReader.getCurrentLocation());
+ return this;
}
- public static Iterator createIterator(String name, File file) {
- if (mWrap == null) {
- return new RodGLF.GLFWrapper(name, file);
- }
- return mWrap;
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("GLF Rods don't support the remove() function");
}
- static void setGLFWrapper(GLFRODIterator iter) {
- mWrap = iter;
- }
-
- private static class GLFWrapper implements GLFRODIterator {
- private GLFReader mReader;
- private String mName;
-
- public GLFWrapper(String name, File file) {
- mName = name;
- mReader = new GLFReader(file);
- }
-
- @Override
- public boolean hasNext() {
- return (mReader.hasNext());
- }
-
- @Override
- public RodGLF next() {
- RodGLF glf = new RodGLF(mName);
- glf.mRecord = mReader.next();
- glf.mLoc = GenomeLocParser.createGenomeLoc(mReader.getReferenceName(), mReader.getCurrentLocation());
- glf.contig = mReader.getReferenceName();
- glf.contigLength = mReader.getReferenceLength();
- return glf;
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException("GLF Rods don't support the remove() function");
+ public static RodGLF createIterator(String name, File file) {
+ RodGLF glf = new RodGLF(name);
+ try {
+ glf.initialize(file);
+ } catch (FileNotFoundException e) {
+ throw new StingException("Unable to find file " + file);
}
+ return glf;
}
}
-// for testing
-interface GLFRODIterator extends Iterator {
-}
diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java
index b892b2479..cf6b1e76d 100755
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java
@@ -1,18 +1,19 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
-import org.broadinstitute.sting.gatk.walkers.*;
-import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.LocusContext;
-import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes;
+import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.StingException;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
-import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
+import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.PrintStream;
import java.util.*;
-import java.io.*;
/**
* The Broad Institute
@@ -146,50 +147,9 @@ public class VariantEvalWalker extends RefWalker {
}
}
- // OMG this is painful
- private rodVariants glf2geli(char ref, RodGLF glf) {
- SinglePointCall rec = (SinglePointCall)glf.mRecord;
- // contig pos refBase depth maxMappingQ bestGenotype lodBtr lodBtnb genotypes
- Integer[] sorted = Utils.SortPermutation(rec.getLikelihoods());
- int bestIndex = sorted[0];
- char[] refs = {ref, ref};
- String homRef = new String(refs);
- int refIndex = rodVariants.Genotype.valueOf(homRef).ordinal();
- double refLikelihood = rec.getLikelihoods()[refIndex];
- double bestLikelihood = rec.getLikelihoods()[bestIndex];
- double secondBestLikelihood = rec.getLikelihoods()[sorted[1]];
-
- rodVariants var = new rodVariants("eval");
- var.loc = glf.getLocation();
- var.refBase = ref;
- var.depth = rec.getReadDepth();
- var.maxMappingQuality = rec.getRmsMapQ();
- var.bestGenotype = rodVariants.Genotype.values()[bestIndex].toString();
- var.lodBtr = Math.abs((bestLikelihood - refLikelihood) / GLFRecord.LIKELIHOOD_SCALE_FACTOR);
- var.lodBtnb = Math.abs((bestLikelihood - secondBestLikelihood) / GLFRecord.LIKELIHOOD_SCALE_FACTOR);
- var.genotypeLikelihoods = rec.getLikelihoods();
- for ( int i = 0; i < var.genotypeLikelihoods.length; i++ )
- var.genotypeLikelihoods[i] /= GLFRecord.LIKELIHOOD_SCALE_FACTOR;
-
- if ( false ) {
- System.out.printf("Converting : %s%n", glf);
- System.out.printf(" homRef: %s%n", homRef);
- System.out.printf(" refindex : %d%n", refIndex);
- System.out.printf(" bestIndex : %d%n", sorted[0]);
- System.out.printf(" 2ndindex : %d%n", sorted[1]);
- System.out.printf(" => %s%n", var);
- }
-
- return var;
- }
-
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
nSites++;
- if ( tracker.lookup("eval", null) instanceof RodGLF) {
- tracker.bind("eval", glf2geli(ref, (RodGLF)tracker.lookup("eval", null)));
- }
-
// Iterate over each analysis, and update it
AllelicVariant eval = (AllelicVariant)tracker.lookup("eval", null);
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java
index cf0c27aec..a38a0db1a 100755
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java
@@ -2,10 +2,6 @@ package org.broadinstitute.sting.utils.genotype.glf;
import net.sf.samtools.util.BinaryCodec;
import org.broadinstitute.sting.utils.StingException;
-import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
-
-import java.util.List;
-import java.util.Arrays;
/*
@@ -87,6 +83,14 @@ public abstract class GLFRecord {
fieldValue = value;
}
+ /**
+ * return the character representation
+ * @return the char for the reference base
+ */
+ public char toChar() {
+ return this.toString().charAt(0);
+ }
+
/**
* static method from returning a REF_BASE given the character representation
*
diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java
index fd8b2e1c7..1e4bf2601 100644
--- a/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java
@@ -5,80 +5,55 @@ import net.sf.picard.reference.ReferenceSequenceFileFactory;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
+import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
+import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
+import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
+import org.junit.Assert;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
+import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
-import java.util.Iterator;
+import java.util.ArrayList;
+import java.util.List;
/**
* Created by IntelliJ IDEA.
- * User: aaronmckenna
+ * User: aaron
* Date: Jul 15, 2009
* Time: 12:18:50 AM
+ *
+ * These tests work upon a very small data set, with the following samtools glfview dump:
+ *
+ * chrM 1 A 5 20 0 0 127 127 127 254 254 254 254 254 254
+ * chrM 2 A 5 20 0 254 254 254 127 254 254 127 254 127 0
+ * chrM 3 A 5 20 0 254 127 254 254 0 127 127 254 254 254
+ *
+ * You'll notice that the first is a hom ref, and the other two are hom alt SNP's
*/
public class RodGLFTest extends BaseTest {
- static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test_likelihoods.glf");
- static final int finalRecordCount = 484140; // the number of records in the above file
- static final int contigCount = 25;
+ static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/glfTestFile.glf");
+ static final int finalRecordCount = 3; // the number of records in the above file
+ static final int contigCount = 1;
static final String ref = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
+ static ReferenceSequenceFile r;
+ private RodGLF iter = null;
@BeforeClass
public static void before() {
- ReferenceSequenceFile r = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(ref));
+ r = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(ref));
GenomeLocParser.setupRefContigOrdering(r);
}
- static class TestRODClass implements GLFRODIterator {
- private int count = 0;
- private int readCount = 0;
-
- public TestRODClass(int count) {
- this.count = count;
- }
-
- @Override
- public boolean hasNext() {
- if (count <= 0) {
- return false;
- }
- return true;
- }
-
- @Override
- public RodGLF next() {
- RodGLF glf = new RodGLF("Test");
- glf.mRecord = new SinglePointCall('A', readCount, 0, (short) 0, new double[]{0.0, 0.0});
- --count;
- readCount++;
- return glf;
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException("BOO");
- }
- }
-
- //@Test
- public void testNext() {
- RodGLF.setGLFWrapper(new RodGLFTest.TestRODClass(10));
- Iterator iter = RodGLF.createIterator("test", new File(""));
- int counter = 0;
- while (iter.hasNext()) {
- iter.next();
- counter++;
- }
- assertEquals(10, counter);
+ @Before
+ public void setup() {
+ iter = RodGLF.createIterator("test", glfFile);
}
@Test
public void testRodCount() {
-
- Iterator iter = RodGLF.createIterator("test", glfFile);
int counter = 0;
while (iter.hasNext()) {
RodGLF glf = iter.next();
@@ -87,21 +62,173 @@ public class RodGLFTest extends BaseTest {
assertEquals(finalRecordCount, counter);
}
+ /** make sure we're returning true to biallelic */
+ @Test
+ public void testIsBiallelic() {
+ RodGLF glf = iter.next();
+ Assert.assertFalse(iter.isBiallelic());
+ }
+
+ // best vs next-best
+ @Test
+ public void testGetConsensusConfidence() {
+ RodGLF glf = iter.next();
+ assertEquals(120.0, iter.getConsensusConfidence(), 0.005);
+ glf = iter.next();
+ assertEquals(120.0, iter.getConsensusConfidence(), 0.005);
+ }
+
+ // best vs. ref
+ @Test
+ public void testGetVariationConfidence() {
+ RodGLF glf = iter.next();
+ assertEquals(0.0, iter.getVariationConfidence(), 0.005);
+ glf = iter.next();
+ assertEquals(250.0, iter.getVariationConfidence(), 0.005);
+ }
+
+
+ @Test
+ public void testGetGenotype() {
+ RodGLF glf = iter.next();
+ List str = iter.getGenotype();
+ Assert.assertTrue(str.get(0).equals("AA"));
+ }
+
+ @Test
+ public void testIsSNP() {
+ RodGLF glf = iter.next();
+ Assert.assertFalse(iter.isSNP());
+ glf = iter.next();
+ Assert.assertTrue(iter.isSNP());
+ glf = iter.next();
+ Assert.assertTrue(iter.isSNP());
+ }
+
+ @Test
+ public void testIsReference() {
+ RodGLF glf = iter.next();
+ Assert.assertTrue(iter.isReference());
+ glf = iter.next();
+ Assert.assertFalse(iter.isReference());
+ glf = iter.next();
+ Assert.assertFalse(iter.isReference());
+ }
+
+ @Test(expected = IllegalStateException.class)
+ public void testGetAltSnpFWDIllegalException() {
+ RodGLF glf = iter.next();
+ iter.getAltSnpFWD();
+ }
+
+
+
+
+ @Test
+ public void testCompareTo() {
+ RodGLF iter2 = RodGLF.createIterator("test", glfFile);
+ RodGLF glf = iter.next();
+ glf = iter2.next();
+ assertEquals(0, iter.compareTo(iter2));
+ RodGLF glf2 = iter.next();
+ assertEquals(-1, iter2.compareTo(iter));
+ assertEquals(1, iter.compareTo(iter2));
+
+ }
+
+ @Test
+ public void testGetAltSnpFWD() {
+ RodGLF glf = iter.next();
+ glf = iter.next();
+ Assert.assertEquals('T', iter.getAltSnpFWD());
+ }
+
+ @Test
+ public void testGetRefSnpFWD() {
+ RodGLF glf = iter.next();
+ glf = iter.next();
+ Assert.assertEquals('A', iter.getRefSnpFWD());
+ }
+
+ @Test
+ public void testGetRefBasesFWD() {
+ RodGLF glf = iter.next();
+ Assert.assertTrue("A".equals(iter.getRefBasesFWD()));
+ glf = iter.next();
+ Assert.assertTrue("A".equals(iter.getRefBasesFWD()));
+ }
+
+ /**
+ * move to the second and third bases, and check that the
+ * alternate bases are correct.
+ */
+ @Test
+ public void testGetAltBasesFWD() {
+ RodGLF glf = iter.next();
+ glf = iter.next();
+ Assert.assertTrue("GT".equals(iter.getAltBasesFWD()));
+ glf = iter.next();
+ Assert.assertTrue("CT".equals(iter.getAltBasesFWD()));
+
+ }
+
@Test
public void testRodLocations() {
-
- Iterator iter = RodGLF.createIterator("test", glfFile);
GenomeLoc loc = null;
while (iter.hasNext()) {
RodGLF glf = iter.next();
if (loc != null) {
- if (glf.getLocation().isBefore(loc)) {
- fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + glf.getLocation().toString());
+ if (iter.getLocation().isBefore(loc)) {
+ fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + iter.getLocation().toString());
}
}
- loc = glf.getLocation();
- }
+ loc = iter.getLocation();
+ }
+ }
+
+ //@Test
+ /**
+ * create the example glf file for the test, you can uncomment the above test line to have this
+ * test run, regenerating the file.
+ */
+ public void createRodFile() {
+ GenotypeWriter writer = new GLFWriter("", new File("glfTestFile.glf"));
+ int location = 1;
+ int x = 0;
+ writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 1, 20, 'A', 5, createLikelihood('A'));
+ writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 2, 20, 'A', 5, createLikelihood('T'));
+ writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 3, 20, 'A', 5, createLikelihood('C'));
+ writer.close();
+ }
+
+ /**
+ * create a likelihood object, given the appropriate reference base
+ *
+ * @param ref the reference base
+ *
+ * @return the likelihood object
+ */
+ private LikelihoodObject createLikelihood(char ref) {
+ ArrayList vals = new ArrayList();
+ for (LikelihoodObject.GENOTYPE type : LikelihoodObject.GENOTYPE.values()) {
+ double x = (type.toString().charAt(0) == ref) ? 0 : 127 - (10 * Math.random());
+ x += (type.toString().charAt(1) == ref) ? 0 : 127 - (10 * Math.random());
+ vals.add(x);
+ }
+ double ret[] = new double[vals.size()];
+ for (int x = 0; x < vals.size(); x++) {
+ ret[x] = vals.get(x);
+ }
+ return new LikelihoodObject(ret, LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
}
-}
+ /**
+ * just make sure that we do get a string back, and no exceptions are thrown
+ */
+ @Test
+ public void testToString() {
+ RodGLF glf = iter.next();
+ iter.toString();
+ }
+}
\ No newline at end of file
diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFReaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFReaderTest.java
index b0dd75c14..8923de939 100644
--- a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFReaderTest.java
+++ b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFReaderTest.java
@@ -21,7 +21,9 @@ public class GLFReaderTest extends BaseTest {
// our test file
static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test_likelihoods.glf");
+ //static final File glfFile = new File("CALLS.glf");
static final int finalRecordCount = 484140; // the number of records in the above file
+ //static final int finalRecordCount = 484445;
static final int contigCount = 25;
/** read in the records from the file */