removing the GLF ROD.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3624 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
17d2043354
commit
8a9b2f4256
|
|
@ -1,371 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFVariableLengthCall;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class RodGLF
|
||||
* <p/>
|
||||
* the rod class for GLF data.
|
||||
*/
|
||||
public class RodGLF implements Iterator<RodGLF>, ReferenceOrderedDatum {
|
||||
public GLFReader mReader;
|
||||
private final String mName;
|
||||
private GenomeLoc mLoc;
|
||||
public GLFRecord mRecord;
|
||||
|
||||
public RodGLF(String name) {
|
||||
mName = name;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the name
|
||||
*
|
||||
* @return the name
|
||||
*/
|
||||
public String getName() {
|
||||
return mName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Backdoor hook to read header, meta-data, etc. associated with the file. Will be
|
||||
* called by the ROD system before streaming starts
|
||||
*
|
||||
* @param source source data file on disk from which this rod stream will be pulled
|
||||
*
|
||||
* @return a header object that will be passed to parseLine command
|
||||
*/
|
||||
public Object initialize(File source) throws FileNotFoundException {
|
||||
mReader = new GLFReader(source);
|
||||
return null;
|
||||
}
|
||||
|
||||
public String toSimpleString() {
|
||||
return toString();
|
||||
}
|
||||
|
||||
/** @return a string representation of the ROD GLF object */
|
||||
public String toString() {
|
||||
return String.format("%s\t%d\t%s\t%d\t%d\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f",
|
||||
mLoc.getContig(),
|
||||
mLoc.getStart(),
|
||||
mRecord.getRefBase(),
|
||||
mRecord.getReadDepth(),
|
||||
mRecord.getRmsMapQ(),
|
||||
getBestGenotypeValue(1),
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[0],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[1],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[2],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[3],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[4],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[5],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[6],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[7],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[8],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[9]
|
||||
|
||||
|
||||
);
|
||||
}
|
||||
|
||||
public String repl() {
|
||||
return this.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Used by the ROD system to determine how to split input lines
|
||||
*
|
||||
* @return Regex string delimiter separating fields
|
||||
*/
|
||||
public String delimiterRegex() {
|
||||
return "";
|
||||
}
|
||||
|
||||
/**
|
||||
* return a genome loc representing the current location
|
||||
*
|
||||
* @return the geonome loc
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
return mLoc;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference base(s) at this position
|
||||
*
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
public String getReference() {
|
||||
return mRecord.getRefBase().toString();
|
||||
}
|
||||
|
||||
/** are we bi-allelic? */
|
||||
public boolean isBiallelic() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if all observed alleles are reference alleles. All is<Variant> methods (where Variant=SNP,Insertion, etc) should
|
||||
* return false at such site to ensure consistency. This method is included for use with genotyping calls (isGenotype()==true), it makes
|
||||
* no sense for, e.g. dbSNP and should return false for the latter.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean isReference() {
|
||||
return (!isSNP());
|
||||
}
|
||||
|
||||
/**
|
||||
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
|
||||
*
|
||||
* @return true if we're an insertion or deletion
|
||||
*/
|
||||
public boolean isIndel() {
|
||||
return (isDeletion() || isInsertion());
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||
* of
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
List<String> alleles = this.getAlternateAlleleList();
|
||||
if (alleles.size() != 1) throw new StingException("We're not biAllelic()");
|
||||
return Utils.stringToChar(alleles.get(0));
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
public char getReferenceForSNP() {
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
return Utils.stringToChar(getReference());
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this variant a SNP?
|
||||
*
|
||||
* @return true or false
|
||||
*/
|
||||
public boolean isSNP() {
|
||||
return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.SINGLE) &&
|
||||
(!getBestGenotype(1).toString().equals(refString(mRecord.getRefBase().toChar()))));
|
||||
}
|
||||
|
||||
/**
|
||||
* return a string representing the reference
|
||||
*
|
||||
* @param ref the reference character
|
||||
*
|
||||
* @return a string for the homozygous ref in a diploid
|
||||
*/
|
||||
private static String refString(char ref) {
|
||||
return new String(new char[]{ref, ref});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the nth best genotype (one based), i.e. to get the best genotype pass in 1,
|
||||
* the second best 2, etdc.
|
||||
*
|
||||
* @param nthBest the nth best genotype to get (1 based, NOT ZERO BASED)
|
||||
*
|
||||
* @return a GENOTYPE object representing the nth best genotype
|
||||
*/
|
||||
public LikelihoodObject.GENOTYPE getBestGenotype(int nthBest) {
|
||||
Integer[] sorted = MathUtils.sortPermutation(((GLFSingleCall) mRecord).getLikelihoods());
|
||||
return LikelihoodObject.GENOTYPE.values()[sorted[nthBest - 1]];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the nth best genotype value (one based), i.e. to get the best genotype pass in 1,
|
||||
* the second best 2, etdc.
|
||||
*
|
||||
* @param nthBest the nth best genotype value to get
|
||||
*
|
||||
* @return a GENOTYPE object representing the nth best genotype
|
||||
*/
|
||||
public double getBestGenotypeValue(int nthBest) {
|
||||
Integer[] sorted = MathUtils.sortPermutation(((GLFSingleCall) mRecord).getLikelihoods());
|
||||
return (((GLFSingleCall) mRecord).getLikelihoods())[sorted[nthBest - 1]];
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this variant an insertion? The contract requires isIndel() to return true
|
||||
* if this method returns true.
|
||||
*
|
||||
* @return true or false
|
||||
*/
|
||||
public boolean isInsertion() {
|
||||
return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) &&
|
||||
((GLFVariableLengthCall) mRecord).getIndelLen1() > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this variant a deletion? The contract requires isIndel() to return true
|
||||
* if isDeletion() returns true.
|
||||
*
|
||||
* @return true or false
|
||||
*/
|
||||
public boolean isDeletion() {
|
||||
return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) &&
|
||||
((GLFVariableLengthCall) mRecord).getIndelLen1() < 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns minor allele frequency.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public double getNonRefAlleleFrequency() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns phred-mapped confidence in variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref).
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public double getNegLog10PError() {
|
||||
String ref = new String() + mRecord.getRefBase() + mRecord.getRefBase();
|
||||
int index = 0;
|
||||
for (LikelihoodObject.GENOTYPE g : LikelihoodObject.GENOTYPE.values()) {
|
||||
if (g.toString().equals(ref)) break;
|
||||
index++;
|
||||
}
|
||||
return Math.abs(getBestGenotypeValue(1) - ((GLFSingleCall) mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public List<String> getAlternateAlleleList() {
|
||||
LikelihoodObject.GENOTYPE genotype = getBestGenotype(1);
|
||||
List<String> ret = new ArrayList<String>();
|
||||
for (char c : genotype.toString().toCharArray()) {
|
||||
if (!String.valueOf(c).equals(this.getReference())) ret.add(String.valueOf(c));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public List<String> getAlleleList() {
|
||||
LikelihoodObject.GENOTYPE genotype = getBestGenotype(1);
|
||||
List<String> list = new ArrayList<String>();
|
||||
if (genotype.toString().contains(this.getReference())) list.add(this.getReference());
|
||||
for (char c : genotype.toString().toCharArray())
|
||||
if (c != Utils.stringToChar(getReference()))
|
||||
list.add(String.valueOf(c));
|
||||
return list;
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
public int compareTo(ReferenceOrderedDatum that) {
|
||||
return this.mLoc.compareTo(that.getLocation());
|
||||
}
|
||||
|
||||
/**
|
||||
* the parse line, which is not used by the GLF rod
|
||||
*
|
||||
* @param header the header to pass in
|
||||
* @param parts the string object
|
||||
*
|
||||
* @return false, alwayss
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
public boolean parseLine(Object header, String[] parts) throws IOException {
|
||||
return false; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return (mReader.hasNext());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the next element in the iteration.
|
||||
* @throws NoSuchElementException - iterator has no more elements.
|
||||
*/
|
||||
@Override
|
||||
public RodGLF next() {
|
||||
if (!this.hasNext()) throw new NoSuchElementException("RodGLF next called on iterator with no more elements");
|
||||
mRecord = mReader.next();
|
||||
mLoc = GenomeLocParser.createGenomeLoc(mRecord.getContig(), mRecord.getPosition(), mRecord.getPosition());
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("GLF Rods don't support the remove() function");
|
||||
}
|
||||
|
||||
public static RodGLF createIterator(String name, File file) {
|
||||
RodGLF glf = new RodGLF(name);
|
||||
try {
|
||||
glf.initialize(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Unable to find file " + file);
|
||||
}
|
||||
return glf;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -49,7 +49,6 @@ public class VariantContextAdaptors {
|
|||
adaptors.put(VCFRecord.class, new VCFRecordAdaptor());
|
||||
adaptors.put(PlinkRod.class, new PlinkRodAdaptor());
|
||||
adaptors.put(HapMapROD.class, new HapMapAdaptor());
|
||||
adaptors.put(RodGLF.class, new GLFAdaptor());
|
||||
adaptors.put(GeliTextFeature.class, new GeliTextAdaptor());
|
||||
adaptors.put(rodGELI.class, new GeliAdaptor());
|
||||
adaptors.put(VariantContext.class, new VariantContextAdaptor());
|
||||
|
|
@ -505,77 +504,6 @@ public class VariantContextAdaptors {
|
|||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// GLF to VariantContext
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
private static class GLFAdaptor extends VCAdaptor {
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
* @param input the Rod object, in this case a RodGLF
|
||||
* @return a VariantContext object
|
||||
*/
|
||||
VariantContext convert(String name, Object input) {
|
||||
return convert(name, input, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
* @param input the Rod object, in this case a RodGLF
|
||||
* @param ref the reference context
|
||||
* @return a VariantContext object
|
||||
*/
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
RodGLF glf = (RodGLF)input;
|
||||
|
||||
if ( ! Allele.acceptableAlleleBases(glf.getReference(),true) )
|
||||
return null;
|
||||
Allele refAllele = Allele.create(glf.getReference(), true);
|
||||
|
||||
// make sure we can convert it
|
||||
if ( glf.isSNP() || glf.isIndel()) {
|
||||
// add the reference allele
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(refAllele);
|
||||
|
||||
// add all of the alt alleles
|
||||
for ( String alt : glf.getAlternateAlleleList() ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt,false) ) {
|
||||
return null;
|
||||
}
|
||||
Allele allele = Allele.create(alt, false);
|
||||
if (!alleles.contains(allele)) alleles.add(allele);
|
||||
}
|
||||
|
||||
|
||||
Map<String, String> attributes = new HashMap<String, String>();
|
||||
Collection<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
MutableGenotype call = new MutableGenotype(name, alleles);
|
||||
|
||||
if (glf.mRecord instanceof GLFSingleCall) {
|
||||
// transform the likelihoods from negative log (positive double values) to log values (negitive values)
|
||||
LikelihoodObject obj = new LikelihoodObject(((GLFSingleCall)glf.mRecord).getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG);
|
||||
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
|
||||
// set the likelihoods, depth, and RMS mapping quality values
|
||||
call.putAttribute(CalledGenotype.LIKELIHOODS_ATTRIBUTE_KEY,obj.toDoubleArray());
|
||||
call.putAttribute(VCFGenotypeRecord.DEPTH_KEY,(glf.mRecord.getReadDepth()));
|
||||
call.putAttribute(GLFWriter.RMS_MAPPING_QUAL, (double) glf.mRecord.getRmsMapQ());
|
||||
} else {
|
||||
throw new UnsupportedOperationException("We don't currenly support indel calls");
|
||||
}
|
||||
|
||||
// add the call to the genotype list, and then use this list to create a VariantContext
|
||||
genotypes.add(call);
|
||||
VariantContext vc = new VariantContext(name, glf.getLocation(), alleles, genotypes, glf.getNegLog10PError(), null, attributes);
|
||||
return vc;
|
||||
} else
|
||||
return null; // can't handle anything else
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
|
|||
|
|
@ -58,7 +58,6 @@ public class RODTrackBuilder implements RMDTrackBuilder {
|
|||
Types.put("Table", TabularROD.class);
|
||||
Types.put("HapMap", HapMapROD.class);
|
||||
Types.put("Intervals", IntervalRod.class);
|
||||
Types.put("GLF", RodGLF.class);
|
||||
Types.put("PicardDbSNP", rodPicardDbSNP.class);
|
||||
Types.put("Beagle", BeagleROD.class);
|
||||
Types.put("Plink", PlinkRod.class);
|
||||
|
|
|
|||
|
|
@ -1,191 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.junit.Assert;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: aaron
|
||||
* Date: Jul 15, 2009
|
||||
* Time: 12:18:50 AM
|
||||
* <p/>
|
||||
* These tests work upon a very small data set, with the following samtools glfview dump:
|
||||
* <p/>
|
||||
* chrM 1 A 5 20 0 0 127 127 127 254 254 254 254 254 254
|
||||
* chrM 2 A 5 20 0 254 254 254 127 254 254 127 254 127 0
|
||||
* chrM 3 A 5 20 0 254 127 254 254 0 127 127 254 254 254
|
||||
* <p/>
|
||||
* You'll notice that the first is a hom ref, and the other two are hom alt SNP's
|
||||
*/
|
||||
public class RodGLFUnitTest extends BaseTest {
|
||||
static final File glfFile = new File(validationDataLocation + "glfTestFile.glf");
|
||||
static final int finalRecordCount = 100; // the number of records in the above file
|
||||
static final int contigCount = 1;
|
||||
static final String ref = oneKGLocation + "reference/human_b36_both.fasta";
|
||||
static ReferenceSequenceFile r;
|
||||
private RodGLF iter = null;
|
||||
|
||||
@BeforeClass
|
||||
public static void before() {
|
||||
r = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(ref));
|
||||
GenomeLocParser.setupRefContigOrdering(r);
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
iter = RodGLF.createIterator("test", glfFile);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRodCount() {
|
||||
int counter = 0;
|
||||
while (iter.hasNext()) {
|
||||
RodGLF glf = iter.next();
|
||||
counter++;
|
||||
}
|
||||
assertEquals(finalRecordCount, counter);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testIsSNP() {
|
||||
RodGLF glf = iter.next();
|
||||
Assert.assertFalse(iter.isSNP());
|
||||
glf = iter.next();
|
||||
Assert.assertTrue(iter.isSNP());
|
||||
glf = iter.next();
|
||||
Assert.assertTrue(iter.isSNP());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIsReference() {
|
||||
RodGLF glf = iter.next();
|
||||
Assert.assertTrue(iter.isReference());
|
||||
glf = iter.next();
|
||||
Assert.assertFalse(iter.isReference());
|
||||
glf = iter.next();
|
||||
Assert.assertFalse(iter.isReference());
|
||||
}
|
||||
|
||||
@Test(expected = IllegalStateException.class)
|
||||
public void testGetAltSnpFWDIllegalException() {
|
||||
RodGLF glf = iter.next();
|
||||
iter.getAlternativeBaseForSNP();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testCompareTo() {
|
||||
RodGLF iter2 = RodGLF.createIterator("test", glfFile);
|
||||
RodGLF glf = iter.next();
|
||||
RodGLF glf2 = iter2.next();
|
||||
assertEquals(0, glf.compareTo(glf2));
|
||||
glf2 = iter2.next();
|
||||
assertEquals(-1, glf.compareTo(glf2));
|
||||
assertEquals(1, glf2.compareTo(glf));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetAltSnpFWD() {
|
||||
RodGLF glf = iter.next();
|
||||
glf = iter.next();
|
||||
Assert.assertEquals('C', iter.getAlternativeBaseForSNP());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetRefSnpFWD() {
|
||||
RodGLF glf = iter.next();
|
||||
glf = iter.next();
|
||||
glf = iter.next();
|
||||
Assert.assertEquals('A', iter.getReferenceForSNP());
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* move to the second and third bases, and check that the
|
||||
* alternate bases are correct.
|
||||
*
|
||||
@Test
|
||||
public void testGetAltBasesFWD() {
|
||||
RodGLF glf = iter.next();
|
||||
glf = iter.next();
|
||||
Assert.assertTrue("GT".equals(iter.getAltBasesFWD()));
|
||||
glf = iter.next();
|
||||
Assert.assertTrue("CT".equals(iter.getAltBasesFWD()));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRodLocations() {
|
||||
GenomeLoc loc = null;
|
||||
while (iter.hasNext()) {
|
||||
RodGLF glf = iter.next();
|
||||
if (loc != null) {
|
||||
if (iter.getLocation().isBefore(loc)) {
|
||||
Assert.fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + iter.getLocation().toString());
|
||||
}
|
||||
}
|
||||
loc = iter.getLocation();
|
||||
}
|
||||
}
|
||||
|
||||
//@Test
|
||||
/**
|
||||
* create the example glf file for the test, you can uncomment the above test line to have this
|
||||
* test run, regenerating the file.
|
||||
*
|
||||
public void createRodFile() {
|
||||
GenotypeWriter writer = new GLFWriter("", new File("glfTestFile.glf"));
|
||||
int location = 1;
|
||||
int x = 0;
|
||||
writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 1, 20, 'A', 5, createLikelihood('A'));
|
||||
writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 2, 20, 'A', 5, createLikelihood('T'));
|
||||
writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 3, 20, 'A', 5, createLikelihood('C'));
|
||||
writer.close();
|
||||
}*/
|
||||
|
||||
/**
|
||||
* create a likelihood object, given the appropriate reference base
|
||||
*
|
||||
* @param ref the reference base
|
||||
*
|
||||
* @return the likelihood object
|
||||
*
|
||||
private LikelihoodObject createLikelihood(char ref) {
|
||||
ArrayList<Double> vals = new ArrayList<Double>();
|
||||
for (LikelihoodObject.GENOTYPE type : LikelihoodObject.GENOTYPE.values()) {
|
||||
double x = (type.toString().charAt(0) == ref) ? 0 : 127 - (10 * Math.random());
|
||||
x += (type.toString().charAt(1) == ref) ? 0 : 127 - (10 * Math.random());
|
||||
vals.add(x);
|
||||
}
|
||||
double ret[] = new double[vals.size()];
|
||||
for (int x = 0; x < vals.size(); x++) {
|
||||
ret[x] = vals.get(x);
|
||||
}
|
||||
return new LikelihoodObject(ret, LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG);
|
||||
}*/
|
||||
|
||||
|
||||
/**
|
||||
* just make sure that we do get a string back, and no exceptions are thrown
|
||||
*/
|
||||
@Test
|
||||
public void testToString() {
|
||||
RodGLF glf = iter.next();
|
||||
iter.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -51,70 +51,6 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
|
|||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* this test takes a known GLF file, reads in the records (storing them into an array),
|
||||
* and creates VariantContext records. These VC records are then outputted through a genotype writer,
|
||||
* and then read back in off of disk and compared to the original records. This way we are positive all
|
||||
* the information that encodes a GLF makes it into the VC and then out to disk.
|
||||
*/
|
||||
@Test
|
||||
public void testVariantContextGLFToGLF() {
|
||||
|
||||
// our input and output files
|
||||
File referenceFile = new File(validationDataLocation + "/well_formed.glf"); // our known good GLF
|
||||
File tempFile = new File("temp.glf"); // our temporary GLF output -> input file
|
||||
tempFile.deleteOnExit(); // delete when we're done
|
||||
|
||||
// create our genotype writer for GLFs
|
||||
GenotypeWriter gw = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GLF,tempFile);
|
||||
((GLFWriter)gw).writeHeader("");
|
||||
|
||||
RodGLF glf = new RodGLF("myROD"); // now cycle the input file to the output file
|
||||
try {
|
||||
glf.initialize(referenceFile);
|
||||
} catch (FileNotFoundException e) {
|
||||
Assert.fail("Unable to open GLF file" + referenceFile);
|
||||
}
|
||||
|
||||
// buffer the records we see
|
||||
List<GLFSingleCall> records = new ArrayList<GLFSingleCall>();
|
||||
|
||||
// while we have records, make a Variant Context and output it to a GLF file
|
||||
while (glf.hasNext()) {
|
||||
glf.next();
|
||||
records.add((GLFSingleCall)glf.mRecord); // we know they're all single calls in the reference file
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext("GLF",glf, null);
|
||||
gw.addCall(vc,null);
|
||||
}
|
||||
gw.close(); // close the file
|
||||
|
||||
|
||||
// now reopen the file with the temp GLF file and read it back in, compare against what we first stored
|
||||
glf = new RodGLF("myROD");
|
||||
try {
|
||||
glf.initialize(tempFile);
|
||||
} catch (FileNotFoundException e) {
|
||||
Assert.fail("Unable to open GLF file" + tempFile);
|
||||
}
|
||||
|
||||
// buffer the new records we see
|
||||
List<GLFSingleCall> records2 = new ArrayList<GLFSingleCall>();
|
||||
|
||||
// while we have records, make a Variant Context and output it to a GLF file
|
||||
while (glf.hasNext()) {
|
||||
glf.next();
|
||||
records2.add((GLFSingleCall)glf.mRecord); // we know they're all single calls in the reference file
|
||||
}
|
||||
|
||||
// compare sizes
|
||||
Assert.assertEquals("The input GLF file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size());
|
||||
|
||||
// now compare each record
|
||||
for (int x = 0; x < records.size(); x++)
|
||||
Assert.assertTrue("GLF Records were not preserved when cycling them to and from disc", records.get(x).equals(records2.get(x)));
|
||||
}
|
||||
|
||||
/**
|
||||
* this test takes a known Geli file, reads in the records (storing them into an array),
|
||||
* and creates VariantContext records. These VC records are then outputted through a genotype writer,
|
||||
|
|
|
|||
|
|
@ -211,7 +211,7 @@ public class VCF4UnitTest extends BaseTest {
|
|||
// test that we're getting the right genotype for what appears to be a multi-base polymorphism, but is really just a SNP
|
||||
String MNPLine2 = "20\t14370\trs6054257\tGT\tAT\t29\tPASS\tNS=3;DP=14;AF=0.5;DB;H2\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:.,.";
|
||||
@Test
|
||||
public void testMNP2Validation() {
|
||||
public void testMNPWannabeButReallyASNPValidation() {
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
|
||||
VariantContext vc = (VariantContext)testSetup.codec.decode(MNPLine2);
|
||||
Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||
|
|
@ -287,8 +287,7 @@ public class VCF4UnitTest extends BaseTest {
|
|||
try {
|
||||
testSetup.codec.decode(line);
|
||||
} catch (Exception e) {
|
||||
System.err.println(e.getMessage() + " -> " + line);
|
||||
System.err.println(line);
|
||||
Assert.fail("Bad record from line " + line + " message = " + e.getMessage());
|
||||
badRecordCount++;
|
||||
}
|
||||
line = reader.readLine();
|
||||
|
|
|
|||
Loading…
Reference in New Issue