2009-07-15 23:30:19 +08:00
|
|
|
package org.broadinstitute.sting.gatk.refdata;
|
|
|
|
|
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
2009-07-21 08:55:52 +08:00
|
|
|
import org.broadinstitute.sting.utils.StingException;
|
|
|
|
|
import org.broadinstitute.sting.utils.Utils;
|
|
|
|
|
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
2009-09-14 13:34:33 +08:00
|
|
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
2009-07-15 23:30:19 +08:00
|
|
|
import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
|
|
|
|
|
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
|
|
|
|
|
import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
|
|
|
|
|
import org.broadinstitute.sting.utils.genotype.glf.VariableLengthCall;
|
|
|
|
|
|
|
|
|
|
import java.io.File;
|
2009-07-21 08:55:52 +08:00
|
|
|
import java.io.FileNotFoundException;
|
2009-07-15 23:30:19 +08:00
|
|
|
import java.io.IOException;
|
|
|
|
|
import java.util.Iterator;
|
2009-09-19 04:19:34 +08:00
|
|
|
import java.util.List;
|
|
|
|
|
import java.util.ArrayList;
|
2009-07-15 23:30:19 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @author aaron
|
|
|
|
|
* <p/>
|
|
|
|
|
* Class RodGLF
|
|
|
|
|
* <p/>
|
|
|
|
|
* the rod class for GLF data.
|
|
|
|
|
*/
|
2009-09-14 13:34:33 +08:00
|
|
|
public class RodGLF implements ReferenceOrderedDatum, Variation, Iterator<RodGLF> {
|
2009-07-21 08:55:52 +08:00
|
|
|
static int count = 0;
|
|
|
|
|
public GLFReader mReader;
|
|
|
|
|
private final String mName;
|
2009-07-15 23:30:19 +08:00
|
|
|
private GenomeLoc mLoc;
|
2009-07-21 08:55:52 +08:00
|
|
|
public GLFRecord mRecord;
|
2009-07-15 23:30:19 +08:00
|
|
|
|
|
|
|
|
public RodGLF(String name) {
|
2009-07-21 08:55:52 +08:00
|
|
|
mName = name;
|
2009-07-15 23:30:19 +08:00
|
|
|
}
|
|
|
|
|
|
2009-07-21 08:55:52 +08:00
|
|
|
/**
|
|
|
|
|
* get the name
|
2009-09-14 13:34:33 +08:00
|
|
|
*
|
2009-07-21 08:55:52 +08:00
|
|
|
* @return the name
|
|
|
|
|
*/
|
2009-07-15 23:30:19 +08:00
|
|
|
@Override
|
2009-07-21 08:55:52 +08:00
|
|
|
public String getName() {
|
|
|
|
|
return mName;
|
2009-07-15 23:30:19 +08:00
|
|
|
}
|
|
|
|
|
|
2009-07-21 08:55:52 +08:00
|
|
|
/**
|
|
|
|
|
* Backdoor hook to read header, meta-data, etc. associated with the file. Will be
|
|
|
|
|
* called by the ROD system before streaming starts
|
|
|
|
|
*
|
|
|
|
|
* @param source source data file on disk from which this rod stream will be pulled
|
|
|
|
|
*
|
|
|
|
|
* @return a header object that will be passed to parseLine command
|
|
|
|
|
*/
|
2009-07-15 23:30:19 +08:00
|
|
|
@Override
|
2009-07-21 08:55:52 +08:00
|
|
|
public Object initialize(File source) throws FileNotFoundException {
|
|
|
|
|
mReader = new GLFReader(source);
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public String toSimpleString() {
|
|
|
|
|
return toString();
|
|
|
|
|
}
|
|
|
|
|
|
2009-09-14 13:34:33 +08:00
|
|
|
/** @return a string representation of the ROD GLF object */
|
2009-07-15 23:30:19 +08:00
|
|
|
public String toString() {
|
2009-07-21 08:55:52 +08:00
|
|
|
return String.format("%s\t%d\t%s\t%d\t%d\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f",
|
2009-09-14 13:34:33 +08:00
|
|
|
mLoc.getContig(),
|
|
|
|
|
mLoc.getStart(),
|
|
|
|
|
mRecord.getRefBase(),
|
|
|
|
|
mRecord.getReadDepth(),
|
|
|
|
|
mRecord.getRmsMapQ(),
|
|
|
|
|
getBestGenotypeValue(1),
|
|
|
|
|
((SinglePointCall) mRecord).getLikelihoods()[0],
|
|
|
|
|
((SinglePointCall) mRecord).getLikelihoods()[1],
|
|
|
|
|
((SinglePointCall) mRecord).getLikelihoods()[2],
|
|
|
|
|
((SinglePointCall) mRecord).getLikelihoods()[3],
|
|
|
|
|
((SinglePointCall) mRecord).getLikelihoods()[4],
|
|
|
|
|
((SinglePointCall) mRecord).getLikelihoods()[5],
|
|
|
|
|
((SinglePointCall) mRecord).getLikelihoods()[6],
|
|
|
|
|
((SinglePointCall) mRecord).getLikelihoods()[7],
|
|
|
|
|
((SinglePointCall) mRecord).getLikelihoods()[8],
|
|
|
|
|
((SinglePointCall) mRecord).getLikelihoods()[9]
|
2009-07-21 08:55:52 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public String repl() {
|
|
|
|
|
return this.toString();
|
2009-07-15 23:30:19 +08:00
|
|
|
}
|
|
|
|
|
|
2009-07-21 08:55:52 +08:00
|
|
|
/**
|
|
|
|
|
* Used by the ROD system to determine how to split input lines
|
|
|
|
|
*
|
|
|
|
|
* @return Regex string delimiter separating fields
|
|
|
|
|
*/
|
|
|
|
|
@Override
|
|
|
|
|
public String delimiterRegex() {
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* return a genome loc representing the current location
|
2009-09-14 13:34:33 +08:00
|
|
|
*
|
2009-07-21 08:55:52 +08:00
|
|
|
* @return the geonome loc
|
|
|
|
|
*/
|
2009-07-15 23:30:19 +08:00
|
|
|
@Override
|
|
|
|
|
public GenomeLoc getLocation() {
|
|
|
|
|
return mLoc;
|
|
|
|
|
}
|
|
|
|
|
|
2009-07-21 08:55:52 +08:00
|
|
|
/**
|
2009-09-14 13:34:33 +08:00
|
|
|
* get the reference base(s) at this position
|
2009-07-21 08:55:52 +08:00
|
|
|
*
|
2009-09-14 13:34:33 +08:00
|
|
|
* @return the reference base or bases, as a string
|
2009-07-21 08:55:52 +08:00
|
|
|
*/
|
|
|
|
|
@Override
|
2009-09-15 12:48:42 +08:00
|
|
|
public String getReference() {
|
|
|
|
|
return mRecord.getRefBase().toString();
|
2009-07-15 23:30:19 +08:00
|
|
|
}
|
|
|
|
|
|
2009-09-19 04:19:34 +08:00
|
|
|
/** are we bi-allelic? */
|
|
|
|
|
@Override
|
|
|
|
|
public boolean isBiallelic() {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2009-07-21 08:55:52 +08:00
|
|
|
/**
|
2009-09-14 13:34:33 +08:00
|
|
|
* Returns true if all observed alleles are reference alleles. All is<Variant> methods (where Variant=SNP,Insertion, etc) should
|
|
|
|
|
* return false at such site to ensure consistency. This method is included for use with genotyping calls (isGenotype()==true), it makes
|
|
|
|
|
* no sense for, e.g. dbSNP and should return false for the latter.
|
2009-07-21 08:55:52 +08:00
|
|
|
*
|
2009-09-14 13:34:33 +08:00
|
|
|
* @return
|
2009-07-21 08:55:52 +08:00
|
|
|
*/
|
|
|
|
|
@Override
|
2009-09-14 13:34:33 +08:00
|
|
|
public boolean isReference() {
|
|
|
|
|
return (!isSNP());
|
2009-07-21 08:55:52 +08:00
|
|
|
}
|
2009-07-15 23:30:19 +08:00
|
|
|
|
2009-07-21 08:55:52 +08:00
|
|
|
/**
|
2009-09-14 13:34:33 +08:00
|
|
|
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
|
2009-07-21 08:55:52 +08:00
|
|
|
*
|
2009-09-14 13:34:33 +08:00
|
|
|
* @return true if we're an insertion or deletion
|
2009-07-21 08:55:52 +08:00
|
|
|
*/
|
|
|
|
|
@Override
|
2009-09-14 13:34:33 +08:00
|
|
|
public boolean isIndel() {
|
|
|
|
|
return (isDeletion() || isInsertion());
|
2009-07-21 08:55:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2009-09-14 13:34:33 +08:00
|
|
|
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
|
|
|
|
* of
|
2009-07-21 08:55:52 +08:00
|
|
|
*
|
2009-09-14 13:34:33 +08:00
|
|
|
* @return a char, representing the alternate base
|
2009-07-21 08:55:52 +08:00
|
|
|
*/
|
|
|
|
|
@Override
|
2009-09-14 13:34:33 +08:00
|
|
|
public char getAlternativeBaseForSNP() {
|
|
|
|
|
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
2009-09-19 04:19:34 +08:00
|
|
|
if (getAlternateBase().charAt(0) == this.getReference().charAt(0))
|
|
|
|
|
return getAlternateBase().charAt(1);
|
|
|
|
|
return getAlternateBase().charAt(0);
|
2009-09-14 13:34:33 +08:00
|
|
|
|
2009-07-21 08:55:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2009-09-15 12:48:42 +08:00
|
|
|
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
|
|
|
|
*
|
|
|
|
|
* @return a char, representing the alternate base
|
|
|
|
|
*/
|
|
|
|
|
@Override
|
|
|
|
|
public char getReferenceForSNP() {
|
|
|
|
|
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
2009-09-19 04:19:34 +08:00
|
|
|
if (getAlternateBase().charAt(0) == this.getReference().charAt(0))
|
|
|
|
|
return getAlternateBase().charAt(0);
|
|
|
|
|
return getAlternateBase().charAt(1);
|
2009-09-15 12:48:42 +08:00
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2009-07-21 08:55:52 +08:00
|
|
|
* Is this variant a SNP?
|
|
|
|
|
*
|
|
|
|
|
* @return true or false
|
|
|
|
|
*/
|
|
|
|
|
@Override
|
|
|
|
|
public boolean isSNP() {
|
|
|
|
|
return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.SINGLE) &&
|
|
|
|
|
(!getBestGenotype(1).toString().equals(refString(mRecord.getRefBase().toChar()))));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* return a string representing the reference
|
2009-09-14 13:34:33 +08:00
|
|
|
*
|
2009-07-21 08:55:52 +08:00
|
|
|
* @param ref the reference character
|
2009-09-14 13:34:33 +08:00
|
|
|
*
|
2009-07-21 08:55:52 +08:00
|
|
|
* @return a string for the homozygous ref in a diploid
|
|
|
|
|
*/
|
|
|
|
|
private static String refString(char ref) {
|
|
|
|
|
return new String(new char[]{ref, ref});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get the nth best genotype (one based), i.e. to get the best genotype pass in 1,
|
|
|
|
|
* the second best 2, etdc.
|
|
|
|
|
*
|
|
|
|
|
* @param nthBest the nth best genotype to get
|
|
|
|
|
*
|
|
|
|
|
* @return a GENOTYPE object representing the nth best genotype
|
|
|
|
|
*/
|
|
|
|
|
public LikelihoodObject.GENOTYPE getBestGenotype(int nthBest) {
|
|
|
|
|
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
|
2009-09-14 13:34:33 +08:00
|
|
|
return LikelihoodObject.GENOTYPE.values()[sorted[nthBest - 1]];
|
2009-07-21 08:55:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get the nth best genotype value (one based), i.e. to get the best genotype pass in 1,
|
|
|
|
|
* the second best 2, etdc.
|
|
|
|
|
*
|
|
|
|
|
* @param nthBest the nth best genotype value to get
|
|
|
|
|
*
|
|
|
|
|
* @return a GENOTYPE object representing the nth best genotype
|
|
|
|
|
*/
|
|
|
|
|
public double getBestGenotypeValue(int nthBest) {
|
|
|
|
|
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
|
2009-09-14 13:34:33 +08:00
|
|
|
return (((SinglePointCall) mRecord).getLikelihoods())[sorted[nthBest - 1]];
|
2009-07-21 08:55:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Is this variant an insertion? The contract requires isIndel() to return true
|
|
|
|
|
* if this method returns true.
|
|
|
|
|
*
|
|
|
|
|
* @return true or false
|
|
|
|
|
*/
|
|
|
|
|
@Override
|
|
|
|
|
public boolean isInsertion() {
|
|
|
|
|
return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) &&
|
|
|
|
|
((VariableLengthCall) mRecord).getIndelLen1() > 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Is this variant a deletion? The contract requires isIndel() to return true
|
|
|
|
|
* if isDeletion() returns true.
|
|
|
|
|
*
|
|
|
|
|
* @return true or false
|
|
|
|
|
*/
|
|
|
|
|
@Override
|
|
|
|
|
public boolean isDeletion() {
|
|
|
|
|
return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) &&
|
|
|
|
|
((VariableLengthCall) mRecord).getIndelLen1() < 0);
|
|
|
|
|
}
|
2009-07-15 23:30:19 +08:00
|
|
|
|
2009-07-21 08:55:52 +08:00
|
|
|
/**
|
2009-09-14 13:34:33 +08:00
|
|
|
* get the base representation of this Variant
|
2009-07-21 08:55:52 +08:00
|
|
|
*
|
2009-09-14 13:34:33 +08:00
|
|
|
* @return a string, of ploidy
|
2009-07-21 08:55:52 +08:00
|
|
|
*/
|
|
|
|
|
@Override
|
2009-09-19 04:19:34 +08:00
|
|
|
public String getAlternateBase() {
|
2009-09-14 13:34:33 +08:00
|
|
|
return this.getBestGenotype(0).toString();
|
2009-07-21 08:55:52 +08:00
|
|
|
}
|
|
|
|
|
|
2009-09-19 04:19:34 +08:00
|
|
|
/**
|
|
|
|
|
* gets the alternate bases. Use this method if teh allele count is greater then 2
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
@Override
|
|
|
|
|
public List<String> getAlternateBases() {
|
|
|
|
|
List<String> list = new ArrayList<String>();
|
|
|
|
|
list.add(this.getAlternateBase());
|
|
|
|
|
return list;
|
|
|
|
|
}
|
|
|
|
|
|
2009-07-21 08:55:52 +08:00
|
|
|
/**
|
|
|
|
|
* Returns minor allele frequency.
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
@Override
|
2009-09-14 13:34:33 +08:00
|
|
|
public double getNonRefAlleleFrequency() {
|
2009-07-21 11:49:15 +08:00
|
|
|
return 0;
|
2009-07-21 08:55:52 +08:00
|
|
|
}
|
|
|
|
|
|
2009-09-14 13:34:33 +08:00
|
|
|
/** @return the VARIANT_TYPE of the current variant */
|
2009-07-21 08:55:52 +08:00
|
|
|
@Override
|
2009-09-14 13:34:33 +08:00
|
|
|
public VARIANT_TYPE getType() {
|
|
|
|
|
if (this.isSNP()) return VARIANT_TYPE.SNP;
|
2009-09-19 04:19:34 +08:00
|
|
|
else if (this.isInsertion() || this.isDeletion()) return VARIANT_TYPE.INDEL;
|
2009-09-14 13:34:33 +08:00
|
|
|
else return VARIANT_TYPE.REFERENCE;
|
2009-07-21 08:55:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns phred-mapped confidence in variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref).
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
@Override
|
2009-09-14 13:34:33 +08:00
|
|
|
public double getNegLog10PError() {
|
2009-07-21 08:55:52 +08:00
|
|
|
String ref = new String() + mRecord.getRefBase() + mRecord.getRefBase();
|
|
|
|
|
int index = 0;
|
2009-09-14 13:34:33 +08:00
|
|
|
for (LikelihoodObject.GENOTYPE g : LikelihoodObject.GENOTYPE.values()) {
|
2009-07-21 08:55:52 +08:00
|
|
|
if (g.toString().equals(ref)) break;
|
|
|
|
|
index++;
|
2009-07-15 23:30:19 +08:00
|
|
|
}
|
2009-09-14 13:34:33 +08:00
|
|
|
return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall) mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
|
2009-07-21 08:55:52 +08:00
|
|
|
}
|
|
|
|
|
|
2009-09-14 13:34:33 +08:00
|
|
|
public int length() {
|
|
|
|
|
return 1;
|
2009-07-21 08:55:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public int compareTo(ReferenceOrderedDatum that) {
|
|
|
|
|
return this.mLoc.compareTo(that.getLocation());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* the parse line, which is not used by the GLF rod
|
2009-09-14 13:34:33 +08:00
|
|
|
*
|
2009-07-21 08:55:52 +08:00
|
|
|
* @param header the header to pass in
|
2009-09-14 13:34:33 +08:00
|
|
|
* @param parts the string object
|
|
|
|
|
*
|
2009-07-21 08:55:52 +08:00
|
|
|
* @return false, alwayss
|
|
|
|
|
* @throws java.io.IOException
|
|
|
|
|
*/
|
|
|
|
|
@Override
|
|
|
|
|
public boolean parseLine(Object header, String[] parts) throws IOException {
|
|
|
|
|
return false; //To change body of implemented methods use File | Settings | File Templates.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public boolean hasNext() {
|
|
|
|
|
return (mReader.hasNext());
|
|
|
|
|
}
|
2009-07-15 23:30:19 +08:00
|
|
|
|
2009-07-21 08:55:52 +08:00
|
|
|
@Override
|
|
|
|
|
public RodGLF next() {
|
|
|
|
|
mRecord = mReader.next();
|
|
|
|
|
mLoc = GenomeLocParser.createGenomeLoc(mReader.getReferenceName(), mReader.getCurrentLocation(), mReader.getCurrentLocation());
|
|
|
|
|
return this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public void remove() {
|
|
|
|
|
throw new UnsupportedOperationException("GLF Rods don't support the remove() function");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static RodGLF createIterator(String name, File file) {
|
|
|
|
|
RodGLF glf = new RodGLF(name);
|
|
|
|
|
try {
|
|
|
|
|
glf.initialize(file);
|
|
|
|
|
} catch (FileNotFoundException e) {
|
|
|
|
|
throw new StingException("Unable to find file " + file);
|
2009-07-15 23:30:19 +08:00
|
|
|
}
|
2009-07-21 08:55:52 +08:00
|
|
|
return glf;
|
2009-07-15 23:30:19 +08:00
|
|
|
}
|
2009-09-11 23:01:50 +08:00
|
|
|
|
2009-07-15 23:30:19 +08:00
|
|
|
}
|
|
|
|
|
|