GLF rod as a AllelicVariant object.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1282 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f314ef8d84
commit
b4adb5133a
|
|
@ -2,9 +2,8 @@ package org.broadinstitute.sting.gatk.refdata;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* This class represents the Reference Metadata available at a particular site in the genome. It can be
|
||||
|
|
@ -70,10 +69,10 @@ public class RefMetaDataTracker {
|
|||
/**
|
||||
* Is there a binding at this site to a ROD with name?
|
||||
*
|
||||
* @param name
|
||||
* @return
|
||||
* @param name the name of the rod
|
||||
* @return true if it has the rod
|
||||
*/
|
||||
public Object hasROD(final String name) {
|
||||
public boolean hasROD(final String name) {
|
||||
return map.containsKey(canonicalName(name));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,14 +2,20 @@ package org.broadinstitute.sting.gatk.refdata;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.VariableLengthCall;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -19,109 +25,385 @@ import java.util.Iterator;
|
|||
* <p/>
|
||||
* the rod class for GLF data.
|
||||
*/
|
||||
public class RodGLF extends BasicReferenceOrderedDatum {
|
||||
public GLFRecord mRecord;
|
||||
public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<RodGLF> {
|
||||
static int count = 0;
|
||||
public GLFReader mReader;
|
||||
private final String mName;
|
||||
private GenomeLoc mLoc;
|
||||
private static GLFRODIterator mWrap;
|
||||
private String contig;
|
||||
private int contigLength;
|
||||
public GLFRecord mRecord;
|
||||
|
||||
public RodGLF(String name) {
|
||||
super(name);
|
||||
mName = name;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the name
|
||||
* @return the name
|
||||
*/
|
||||
@Override
|
||||
public String getName() {
|
||||
return mName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Backdoor hook to read header, meta-data, etc. associated with the file. Will be
|
||||
* called by the ROD system before streaming starts
|
||||
*
|
||||
* @param source source data file on disk from which this rod stream will be pulled
|
||||
*
|
||||
* @return a header object that will be passed to parseLine command
|
||||
*/
|
||||
@Override
|
||||
public Object initialize(File source) throws FileNotFoundException {
|
||||
mReader = new GLFReader(source);
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toSimpleString() {
|
||||
return toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a string representation of the ROD GLF object
|
||||
*/
|
||||
public String toString() {
|
||||
return String.format("%s\t%d\t%s\t%d\t%d\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f",
|
||||
mLoc.getContig(),
|
||||
mLoc.getStart(),
|
||||
mRecord.getRefBase(),
|
||||
mRecord.getReadDepth(),
|
||||
mRecord.getRmsMapQ(),
|
||||
getBestGenotypeValue(1),
|
||||
((SinglePointCall)mRecord).getLikelihoods()[0],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[1],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[2],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[3],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[4],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[5],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[6],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[7],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[8],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[9]
|
||||
|
||||
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String repl() {
|
||||
return this.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Used by the ROD system to determine how to split input lines
|
||||
*
|
||||
* @return Regex string delimiter separating fields
|
||||
*/
|
||||
@Override
|
||||
public String delimiterRegex() {
|
||||
return "";
|
||||
}
|
||||
|
||||
/**
|
||||
* return a genome loc representing the current location
|
||||
* @return the geonome loc
|
||||
*/
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return mLoc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns bases in the reference allele as a String. String can be empty (as in insertion into
|
||||
* the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters
|
||||
* (for longer indels).
|
||||
*
|
||||
* @return reference allele, forward strand
|
||||
*/
|
||||
@Override
|
||||
public String getRefBasesFWD() {
|
||||
if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) return "";
|
||||
return String.valueOf(mRecord.getRefBase());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException
|
||||
* if variant is not a SNP.
|
||||
*
|
||||
* This doesn't make much sense to me, what if the best genotype is hom non-ref?
|
||||
*
|
||||
* @return reference base on the forward strand
|
||||
*/
|
||||
@Override
|
||||
public char getRefSnpFWD() throws IllegalStateException {
|
||||
if (!isSNP()) throw new IllegalStateException("Current GLF Record is not a SNP");
|
||||
return mRecord.getRefBase().toChar();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns bases in the alternative allele as a String. String can be empty (as in deletion from
|
||||
* the reference), can contain a single character (as in SNP or one-base insertion), or multiple characters
|
||||
* (for longer indels).
|
||||
*
|
||||
* @return alternative allele, forward strand
|
||||
*/
|
||||
@Override
|
||||
public String getAltBasesFWD() {
|
||||
return getBestGenotype(2).toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns alternative (minor) allele base for a SNP variant as a character; should throw IllegalStateException
|
||||
* if variant is not a SNP.
|
||||
*
|
||||
* @return alternative allele base on the forward starnd
|
||||
*/
|
||||
@Override
|
||||
public char getAltSnpFWD() throws IllegalStateException {
|
||||
if (!isSNP()) {
|
||||
throw new IllegalStateException("Not a SNP");
|
||||
}
|
||||
String str = getBestGenotype(1).toString();
|
||||
if (String.valueOf(str.charAt(0)).equals(mRecord.getRefBase().toString())) {
|
||||
return str.charAt(1);
|
||||
}
|
||||
return str.charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if all observed alleles are reference alleles. All is<Variant> methods (where Variant=SNP,Insertion, etc) should
|
||||
* return false at such site to ensure consistency. This method is included for use with genotyping calls (isGenotype()==true), it makes
|
||||
* no sense for, e.g. dbSNP and should return false for the latter.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean isReference() {
|
||||
return (!isSNP());
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this variant a SNP?
|
||||
*
|
||||
* @return true or false
|
||||
*/
|
||||
@Override
|
||||
public boolean isSNP() {
|
||||
return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.SINGLE) &&
|
||||
(!getBestGenotype(1).toString().equals(refString(mRecord.getRefBase().toChar()))));
|
||||
}
|
||||
|
||||
/**
|
||||
* return a string representing the reference
|
||||
* @param ref the reference character
|
||||
* @return a string for the homozygous ref in a diploid
|
||||
*/
|
||||
private static String refString(char ref) {
|
||||
return new String(new char[]{ref, ref});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the nth best genotype (one based), i.e. to get the best genotype pass in 1,
|
||||
* the second best 2, etdc.
|
||||
*
|
||||
* @param nthBest the nth best genotype to get
|
||||
*
|
||||
* @return a GENOTYPE object representing the nth best genotype
|
||||
*/
|
||||
public LikelihoodObject.GENOTYPE getBestGenotype(int nthBest) {
|
||||
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
|
||||
return LikelihoodObject.GENOTYPE.values()[sorted[nthBest-1]];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the nth best genotype value (one based), i.e. to get the best genotype pass in 1,
|
||||
* the second best 2, etdc.
|
||||
*
|
||||
* @param nthBest the nth best genotype value to get
|
||||
*
|
||||
* @return a GENOTYPE object representing the nth best genotype
|
||||
*/
|
||||
public double getBestGenotypeValue(int nthBest) {
|
||||
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
|
||||
return (((SinglePointCall) mRecord).getLikelihoods())[sorted[nthBest-1]];
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this variant an insertion? The contract requires isIndel() to return true
|
||||
* if this method returns true.
|
||||
*
|
||||
* @return true or false
|
||||
*/
|
||||
@Override
|
||||
public boolean isInsertion() {
|
||||
return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) &&
|
||||
((VariableLengthCall) mRecord).getIndelLen1() > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this variant a deletion? The contract requires isIndel() to return true
|
||||
* if isDeletion() returns true.
|
||||
*
|
||||
* @return true or false
|
||||
*/
|
||||
@Override
|
||||
public boolean isDeletion() {
|
||||
return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) &&
|
||||
((VariableLengthCall) mRecord).getIndelLen1() < 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this variant an insertion or a deletion? The contract requires
|
||||
* this to be true if either isInsertion() or isDeletion() returns true. However,
|
||||
* this method is currently allowed to return true even if neither of isInsertion()
|
||||
* and isDeletion() does.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean isIndel() {
|
||||
return (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns minor allele frequency.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public double getMAF() {
|
||||
throw new UnsupportedOperationException("getMAF is unsupported");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns heterozygosity, a more accessible general feature of a variant.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public double getHeterozygosity() {
|
||||
throw new UnsupportedOperationException("getHeterozygosity is unsupported");
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this variant an actual genotype (such as individual call from sequencing, HapMap chip etc), or
|
||||
* population allelic variant (call from pooled sequencing, dbSNP site etc). Only if variant is a genotype, there
|
||||
* is a meaningful question of, e.g., whether it is a het, or homogeneous non-ref.
|
||||
*
|
||||
* @return true if this variant is an actual genotype.
|
||||
*/
|
||||
@Override
|
||||
public boolean isGenotype() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns phred-mapped confidence in variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref).
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public double getVariationConfidence() {
|
||||
String ref = new String() + mRecord.getRefBase() + mRecord.getRefBase();
|
||||
int index = 0;
|
||||
for (LikelihoodObject.GENOTYPE g: LikelihoodObject.GENOTYPE.values()) {
|
||||
if (g.toString().equals(ref)) break;
|
||||
index++;
|
||||
}
|
||||
return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall)mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns phred-mapped confidence in called genotype (e.g. MAQ's consensus confidence, or AlleleCaller's
|
||||
* best vs next-best.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public double getConsensusConfidence() {
|
||||
return Math.abs(getBestGenotypeValue(1) - getBestGenotypeValue(2)) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns actual observed genotype. Allowed to return more than two alleles (@see #getPloidy()). If this variant
|
||||
* is not a genotype, should throw an IllegalStateException.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getGenotype() throws IllegalStateException {
|
||||
List<String> ret = new ArrayList<String>();
|
||||
ret.add(this.getBestGenotype(1).toString());
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return actual number of observed alleles (chromosomes) in the genotype. If this variant is not a genotype,
|
||||
* should throw IllegalStateException.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int getPloidy() throws IllegalStateException {
|
||||
return 2; // we're so haploid it hurts
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the site has at most two known or observed alleles (ref and non-ref), or false if there are > 2 allelic variants known or observed. When
|
||||
* the implementing class is a genotype, alleles should be always counted including the reference allele whether it was observed in the particular
|
||||
* individual or not: i.e. if the reference is 'C', then both 'CA' and 'AA' genotypes must be reported as bi-allelic, while 'AT' is <i>not</i> bi-allelic (since there are
|
||||
* two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C').
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean isBiallelic() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(ReferenceOrderedDatum that) {
|
||||
return this.mLoc.compareTo(that.getLocation());
|
||||
}
|
||||
|
||||
/**
|
||||
* the parse line, which is not used by the GLF rod
|
||||
* @param header the header to pass in
|
||||
* @param parts the string object
|
||||
* @return false, alwayss
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
@Override
|
||||
public boolean parseLine(Object header, String[] parts) throws IOException {
|
||||
return false; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
builder.append(contig);
|
||||
builder.append("\t");
|
||||
builder.append(mRecord.getOffset());
|
||||
builder.append("\t");
|
||||
builder.append(mRecord.getRefBase());
|
||||
builder.append("\t");
|
||||
builder.append(mRecord.getReadDepth());
|
||||
builder.append("\t");
|
||||
builder.append(mRecord.getRmsMapQ());
|
||||
builder.append("\t");
|
||||
builder.append(mRecord.getMinimumLikelihood());
|
||||
|
||||
if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.SINGLE) {
|
||||
for (double d : ((SinglePointCall) mRecord).getLikelihoods()) {
|
||||
builder.append("\t");
|
||||
builder.append(d);
|
||||
}
|
||||
} else if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) {
|
||||
VariableLengthCall call = (VariableLengthCall) mRecord;
|
||||
builder.append("\t");
|
||||
builder.append(call.getLkHom1());
|
||||
builder.append("\t");
|
||||
builder.append(call.getLkHom2());
|
||||
builder.append("\t");
|
||||
builder.append(call.getLkHet());
|
||||
builder.append("\t");
|
||||
builder.append(call.getIndelLen1());
|
||||
builder.append("\t");
|
||||
builder.append(call.getIndelSeq1());
|
||||
builder.append("\t");
|
||||
builder.append(call.getIndelLen2());
|
||||
builder.append("\t");
|
||||
builder.append(call.getIndelSeq2());
|
||||
}
|
||||
return builder.toString();
|
||||
public boolean hasNext() {
|
||||
return (mReader.hasNext());
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return mLoc;
|
||||
public RodGLF next() {
|
||||
mRecord = mReader.next();
|
||||
mLoc = GenomeLocParser.createGenomeLoc(mReader.getReferenceName(), mReader.getCurrentLocation(), mReader.getCurrentLocation());
|
||||
return this;
|
||||
}
|
||||
|
||||
public static Iterator<RodGLF> createIterator(String name, File file) {
|
||||
if (mWrap == null) {
|
||||
return new RodGLF.GLFWrapper(name, file);
|
||||
}
|
||||
return mWrap;
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("GLF Rods don't support the remove() function");
|
||||
}
|
||||
|
||||
static void setGLFWrapper(GLFRODIterator iter) {
|
||||
mWrap = iter;
|
||||
}
|
||||
|
||||
private static class GLFWrapper implements GLFRODIterator {
|
||||
private GLFReader mReader;
|
||||
private String mName;
|
||||
|
||||
public GLFWrapper(String name, File file) {
|
||||
mName = name;
|
||||
mReader = new GLFReader(file);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return (mReader.hasNext());
|
||||
}
|
||||
|
||||
@Override
|
||||
public RodGLF next() {
|
||||
RodGLF glf = new RodGLF(mName);
|
||||
glf.mRecord = mReader.next();
|
||||
glf.mLoc = GenomeLocParser.createGenomeLoc(mReader.getReferenceName(), mReader.getCurrentLocation());
|
||||
glf.contig = mReader.getReferenceName();
|
||||
glf.contigLength = mReader.getReferenceLength();
|
||||
return glf;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("GLF Rods don't support the remove() function");
|
||||
public static RodGLF createIterator(String name, File file) {
|
||||
RodGLF glf = new RodGLF(name);
|
||||
try {
|
||||
glf.initialize(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Unable to find file " + file);
|
||||
}
|
||||
return glf;
|
||||
}
|
||||
}
|
||||
|
||||
// for testing
|
||||
interface GLFRODIterator extends Iterator<RodGLF> {
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,18 +1,19 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
|
|
@ -146,50 +147,9 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
// OMG this is painful
|
||||
private rodVariants glf2geli(char ref, RodGLF glf) {
|
||||
SinglePointCall rec = (SinglePointCall)glf.mRecord;
|
||||
// contig pos refBase depth maxMappingQ bestGenotype lodBtr lodBtnb genotypes
|
||||
Integer[] sorted = Utils.SortPermutation(rec.getLikelihoods());
|
||||
int bestIndex = sorted[0];
|
||||
char[] refs = {ref, ref};
|
||||
String homRef = new String(refs);
|
||||
int refIndex = rodVariants.Genotype.valueOf(homRef).ordinal();
|
||||
double refLikelihood = rec.getLikelihoods()[refIndex];
|
||||
double bestLikelihood = rec.getLikelihoods()[bestIndex];
|
||||
double secondBestLikelihood = rec.getLikelihoods()[sorted[1]];
|
||||
|
||||
rodVariants var = new rodVariants("eval");
|
||||
var.loc = glf.getLocation();
|
||||
var.refBase = ref;
|
||||
var.depth = rec.getReadDepth();
|
||||
var.maxMappingQuality = rec.getRmsMapQ();
|
||||
var.bestGenotype = rodVariants.Genotype.values()[bestIndex].toString();
|
||||
var.lodBtr = Math.abs((bestLikelihood - refLikelihood) / GLFRecord.LIKELIHOOD_SCALE_FACTOR);
|
||||
var.lodBtnb = Math.abs((bestLikelihood - secondBestLikelihood) / GLFRecord.LIKELIHOOD_SCALE_FACTOR);
|
||||
var.genotypeLikelihoods = rec.getLikelihoods();
|
||||
for ( int i = 0; i < var.genotypeLikelihoods.length; i++ )
|
||||
var.genotypeLikelihoods[i] /= GLFRecord.LIKELIHOOD_SCALE_FACTOR;
|
||||
|
||||
if ( false ) {
|
||||
System.out.printf("Converting : %s%n", glf);
|
||||
System.out.printf(" homRef: %s%n", homRef);
|
||||
System.out.printf(" refindex : %d%n", refIndex);
|
||||
System.out.printf(" bestIndex : %d%n", sorted[0]);
|
||||
System.out.printf(" 2ndindex : %d%n", sorted[1]);
|
||||
System.out.printf(" => %s%n", var);
|
||||
}
|
||||
|
||||
return var;
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
nSites++;
|
||||
|
||||
if ( tracker.lookup("eval", null) instanceof RodGLF) {
|
||||
tracker.bind("eval", glf2geli(ref, (RodGLF)tracker.lookup("eval", null)));
|
||||
}
|
||||
|
||||
// Iterate over each analysis, and update it
|
||||
AllelicVariant eval = (AllelicVariant)tracker.lookup("eval", null);
|
||||
|
||||
|
|
|
|||
|
|
@ -2,10 +2,6 @@ package org.broadinstitute.sting.utils.genotype.glf;
|
|||
|
||||
import net.sf.samtools.util.BinaryCodec;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
|
||||
/*
|
||||
|
|
@ -87,6 +83,14 @@ public abstract class GLFRecord {
|
|||
fieldValue = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* return the character representation
|
||||
* @return the char for the reference base
|
||||
*/
|
||||
public char toChar() {
|
||||
return this.toString().charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* static method from returning a REF_BASE given the character representation
|
||||
*
|
||||
|
|
|
|||
|
|
@ -5,80 +5,55 @@ import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
|
||||
import org.junit.Assert;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.fail;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Iterator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: aaronmckenna
|
||||
* User: aaron
|
||||
* Date: Jul 15, 2009
|
||||
* Time: 12:18:50 AM
|
||||
* <p/>
|
||||
* These tests work upon a very small data set, with the following samtools glfview dump:
|
||||
* <p/>
|
||||
* chrM 1 A 5 20 0 0 127 127 127 254 254 254 254 254 254
|
||||
* chrM 2 A 5 20 0 254 254 254 127 254 254 127 254 127 0
|
||||
* chrM 3 A 5 20 0 254 127 254 254 0 127 127 254 254 254
|
||||
* <p/>
|
||||
* You'll notice that the first is a hom ref, and the other two are hom alt SNP's
|
||||
*/
|
||||
public class RodGLFTest extends BaseTest {
|
||||
static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test_likelihoods.glf");
|
||||
static final int finalRecordCount = 484140; // the number of records in the above file
|
||||
static final int contigCount = 25;
|
||||
static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/glfTestFile.glf");
|
||||
static final int finalRecordCount = 3; // the number of records in the above file
|
||||
static final int contigCount = 1;
|
||||
static final String ref = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
|
||||
static ReferenceSequenceFile r;
|
||||
private RodGLF iter = null;
|
||||
|
||||
@BeforeClass
|
||||
public static void before() {
|
||||
ReferenceSequenceFile r = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(ref));
|
||||
r = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(ref));
|
||||
GenomeLocParser.setupRefContigOrdering(r);
|
||||
}
|
||||
|
||||
static class TestRODClass implements GLFRODIterator {
|
||||
private int count = 0;
|
||||
private int readCount = 0;
|
||||
|
||||
public TestRODClass(int count) {
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (count <= 0) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public RodGLF next() {
|
||||
RodGLF glf = new RodGLF("Test");
|
||||
glf.mRecord = new SinglePointCall('A', readCount, 0, (short) 0, new double[]{0.0, 0.0});
|
||||
--count;
|
||||
readCount++;
|
||||
return glf;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("BOO");
|
||||
}
|
||||
}
|
||||
|
||||
//@Test
|
||||
public void testNext() {
|
||||
RodGLF.setGLFWrapper(new RodGLFTest.TestRODClass(10));
|
||||
Iterator<RodGLF> iter = RodGLF.createIterator("test", new File(""));
|
||||
int counter = 0;
|
||||
while (iter.hasNext()) {
|
||||
iter.next();
|
||||
counter++;
|
||||
}
|
||||
assertEquals(10, counter);
|
||||
@Before
|
||||
public void setup() {
|
||||
iter = RodGLF.createIterator("test", glfFile);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRodCount() {
|
||||
|
||||
Iterator<RodGLF> iter = RodGLF.createIterator("test", glfFile);
|
||||
int counter = 0;
|
||||
while (iter.hasNext()) {
|
||||
RodGLF glf = iter.next();
|
||||
|
|
@ -87,21 +62,173 @@ public class RodGLFTest extends BaseTest {
|
|||
assertEquals(finalRecordCount, counter);
|
||||
}
|
||||
|
||||
/** make sure we're returning true to biallelic */
|
||||
@Test
|
||||
public void testIsBiallelic() {
|
||||
RodGLF glf = iter.next();
|
||||
Assert.assertFalse(iter.isBiallelic());
|
||||
}
|
||||
|
||||
// best vs next-best
|
||||
@Test
|
||||
public void testGetConsensusConfidence() {
|
||||
RodGLF glf = iter.next();
|
||||
assertEquals(120.0, iter.getConsensusConfidence(), 0.005);
|
||||
glf = iter.next();
|
||||
assertEquals(120.0, iter.getConsensusConfidence(), 0.005);
|
||||
}
|
||||
|
||||
// best vs. ref
|
||||
@Test
|
||||
public void testGetVariationConfidence() {
|
||||
RodGLF glf = iter.next();
|
||||
assertEquals(0.0, iter.getVariationConfidence(), 0.005);
|
||||
glf = iter.next();
|
||||
assertEquals(250.0, iter.getVariationConfidence(), 0.005);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testGetGenotype() {
|
||||
RodGLF glf = iter.next();
|
||||
List<String> str = iter.getGenotype();
|
||||
Assert.assertTrue(str.get(0).equals("AA"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIsSNP() {
|
||||
RodGLF glf = iter.next();
|
||||
Assert.assertFalse(iter.isSNP());
|
||||
glf = iter.next();
|
||||
Assert.assertTrue(iter.isSNP());
|
||||
glf = iter.next();
|
||||
Assert.assertTrue(iter.isSNP());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIsReference() {
|
||||
RodGLF glf = iter.next();
|
||||
Assert.assertTrue(iter.isReference());
|
||||
glf = iter.next();
|
||||
Assert.assertFalse(iter.isReference());
|
||||
glf = iter.next();
|
||||
Assert.assertFalse(iter.isReference());
|
||||
}
|
||||
|
||||
@Test(expected = IllegalStateException.class)
|
||||
public void testGetAltSnpFWDIllegalException() {
|
||||
RodGLF glf = iter.next();
|
||||
iter.getAltSnpFWD();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testCompareTo() {
|
||||
RodGLF iter2 = RodGLF.createIterator("test", glfFile);
|
||||
RodGLF glf = iter.next();
|
||||
glf = iter2.next();
|
||||
assertEquals(0, iter.compareTo(iter2));
|
||||
RodGLF glf2 = iter.next();
|
||||
assertEquals(-1, iter2.compareTo(iter));
|
||||
assertEquals(1, iter.compareTo(iter2));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetAltSnpFWD() {
|
||||
RodGLF glf = iter.next();
|
||||
glf = iter.next();
|
||||
Assert.assertEquals('T', iter.getAltSnpFWD());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetRefSnpFWD() {
|
||||
RodGLF glf = iter.next();
|
||||
glf = iter.next();
|
||||
Assert.assertEquals('A', iter.getRefSnpFWD());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetRefBasesFWD() {
|
||||
RodGLF glf = iter.next();
|
||||
Assert.assertTrue("A".equals(iter.getRefBasesFWD()));
|
||||
glf = iter.next();
|
||||
Assert.assertTrue("A".equals(iter.getRefBasesFWD()));
|
||||
}
|
||||
|
||||
/**
|
||||
* move to the second and third bases, and check that the
|
||||
* alternate bases are correct.
|
||||
*/
|
||||
@Test
|
||||
public void testGetAltBasesFWD() {
|
||||
RodGLF glf = iter.next();
|
||||
glf = iter.next();
|
||||
Assert.assertTrue("GT".equals(iter.getAltBasesFWD()));
|
||||
glf = iter.next();
|
||||
Assert.assertTrue("CT".equals(iter.getAltBasesFWD()));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRodLocations() {
|
||||
|
||||
Iterator<RodGLF> iter = RodGLF.createIterator("test", glfFile);
|
||||
GenomeLoc loc = null;
|
||||
while (iter.hasNext()) {
|
||||
RodGLF glf = iter.next();
|
||||
if (loc != null) {
|
||||
if (glf.getLocation().isBefore(loc)) {
|
||||
fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + glf.getLocation().toString());
|
||||
if (iter.getLocation().isBefore(loc)) {
|
||||
fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + iter.getLocation().toString());
|
||||
}
|
||||
}
|
||||
loc = glf.getLocation();
|
||||
}
|
||||
loc = iter.getLocation();
|
||||
}
|
||||
}
|
||||
|
||||
//@Test
|
||||
/**
|
||||
* create the example glf file for the test, you can uncomment the above test line to have this
|
||||
* test run, regenerating the file.
|
||||
*/
|
||||
public void createRodFile() {
|
||||
GenotypeWriter writer = new GLFWriter("", new File("glfTestFile.glf"));
|
||||
int location = 1;
|
||||
int x = 0;
|
||||
writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 1, 20, 'A', 5, createLikelihood('A'));
|
||||
writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 2, 20, 'A', 5, createLikelihood('T'));
|
||||
writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 3, 20, 'A', 5, createLikelihood('C'));
|
||||
writer.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a likelihood object, given the appropriate reference base
|
||||
*
|
||||
* @param ref the reference base
|
||||
*
|
||||
* @return the likelihood object
|
||||
*/
|
||||
private LikelihoodObject createLikelihood(char ref) {
|
||||
ArrayList<Double> vals = new ArrayList<Double>();
|
||||
for (LikelihoodObject.GENOTYPE type : LikelihoodObject.GENOTYPE.values()) {
|
||||
double x = (type.toString().charAt(0) == ref) ? 0 : 127 - (10 * Math.random());
|
||||
x += (type.toString().charAt(1) == ref) ? 0 : 127 - (10 * Math.random());
|
||||
vals.add(x);
|
||||
}
|
||||
double ret[] = new double[vals.size()];
|
||||
for (int x = 0; x < vals.size(); x++) {
|
||||
ret[x] = vals.get(x);
|
||||
}
|
||||
return new LikelihoodObject(ret, LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
/**
|
||||
* just make sure that we do get a string back, and no exceptions are thrown
|
||||
*/
|
||||
@Test
|
||||
public void testToString() {
|
||||
RodGLF glf = iter.next();
|
||||
iter.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -21,7 +21,9 @@ public class GLFReaderTest extends BaseTest {
|
|||
|
||||
// our test file
|
||||
static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test_likelihoods.glf");
|
||||
//static final File glfFile = new File("CALLS.glf");
|
||||
static final int finalRecordCount = 484140; // the number of records in the above file
|
||||
//static final int finalRecordCount = 484445;
|
||||
static final int contigCount = 25;
|
||||
|
||||
/** read in the records from the file */
|
||||
|
|
|
|||
Loading…
Reference in New Issue