Changes to switch Variant Eval over to the new Variation system.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1611 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0feee9cdfd
commit
e03fccb223
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
|
import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
|
||||||
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
|
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
|
||||||
import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
|
import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
|
||||||
|
|
@ -13,9 +14,7 @@ import org.broadinstitute.sting.utils.genotype.glf.VariableLengthCall;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -25,7 +24,7 @@ import java.util.List;
|
||||||
* <p/>
|
* <p/>
|
||||||
* the rod class for GLF data.
|
* the rod class for GLF data.
|
||||||
*/
|
*/
|
||||||
public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<RodGLF> {
|
public class RodGLF implements ReferenceOrderedDatum, Variation, Iterator<RodGLF> {
|
||||||
static int count = 0;
|
static int count = 0;
|
||||||
public GLFReader mReader;
|
public GLFReader mReader;
|
||||||
private final String mName;
|
private final String mName;
|
||||||
|
|
@ -38,6 +37,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the name
|
* get the name
|
||||||
|
*
|
||||||
* @return the name
|
* @return the name
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -64,27 +64,25 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
||||||
return toString();
|
return toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** @return a string representation of the ROD GLF object */
|
||||||
* @return a string representation of the ROD GLF object
|
|
||||||
*/
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("%s\t%d\t%s\t%d\t%d\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f",
|
return String.format("%s\t%d\t%s\t%d\t%d\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f",
|
||||||
mLoc.getContig(),
|
mLoc.getContig(),
|
||||||
mLoc.getStart(),
|
mLoc.getStart(),
|
||||||
mRecord.getRefBase(),
|
mRecord.getRefBase(),
|
||||||
mRecord.getReadDepth(),
|
mRecord.getReadDepth(),
|
||||||
mRecord.getRmsMapQ(),
|
mRecord.getRmsMapQ(),
|
||||||
getBestGenotypeValue(1),
|
getBestGenotypeValue(1),
|
||||||
((SinglePointCall)mRecord).getLikelihoods()[0],
|
((SinglePointCall) mRecord).getLikelihoods()[0],
|
||||||
((SinglePointCall)mRecord).getLikelihoods()[1],
|
((SinglePointCall) mRecord).getLikelihoods()[1],
|
||||||
((SinglePointCall)mRecord).getLikelihoods()[2],
|
((SinglePointCall) mRecord).getLikelihoods()[2],
|
||||||
((SinglePointCall)mRecord).getLikelihoods()[3],
|
((SinglePointCall) mRecord).getLikelihoods()[3],
|
||||||
((SinglePointCall)mRecord).getLikelihoods()[4],
|
((SinglePointCall) mRecord).getLikelihoods()[4],
|
||||||
((SinglePointCall)mRecord).getLikelihoods()[5],
|
((SinglePointCall) mRecord).getLikelihoods()[5],
|
||||||
((SinglePointCall)mRecord).getLikelihoods()[6],
|
((SinglePointCall) mRecord).getLikelihoods()[6],
|
||||||
((SinglePointCall)mRecord).getLikelihoods()[7],
|
((SinglePointCall) mRecord).getLikelihoods()[7],
|
||||||
((SinglePointCall)mRecord).getLikelihoods()[8],
|
((SinglePointCall) mRecord).getLikelihoods()[8],
|
||||||
((SinglePointCall)mRecord).getLikelihoods()[9]
|
((SinglePointCall) mRecord).getLikelihoods()[9]
|
||||||
|
|
||||||
|
|
||||||
);
|
);
|
||||||
|
|
@ -107,6 +105,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* return a genome loc representing the current location
|
* return a genome loc representing the current location
|
||||||
|
*
|
||||||
* @return the geonome loc
|
* @return the geonome loc
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -115,62 +114,15 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns bases in the reference allele as a String. String can be empty (as in insertion into
|
* get the reference base(s) at this position
|
||||||
* the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters
|
|
||||||
* (for longer indels).
|
|
||||||
*
|
*
|
||||||
* @return reference allele, forward strand
|
* @return the reference base or bases, as a string
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public String getRefBasesFWD() {
|
public char getReference() {
|
||||||
if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) return "";
|
|
||||||
return String.valueOf(mRecord.getRefBase());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException
|
|
||||||
* if variant is not a SNP.
|
|
||||||
*
|
|
||||||
* This doesn't make much sense to me, what if the best genotype is hom non-ref?
|
|
||||||
*
|
|
||||||
* @return reference base on the forward strand
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public char getRefSnpFWD() throws IllegalStateException {
|
|
||||||
if (!isSNP()) throw new IllegalStateException("Current GLF Record is not a SNP");
|
|
||||||
return mRecord.getRefBase().toChar();
|
return mRecord.getRefBase().toChar();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns bases in the alternative allele as a String. String can be empty (as in deletion from
|
|
||||||
* the reference), can contain a single character (as in SNP or one-base insertion), or multiple characters
|
|
||||||
* (for longer indels).
|
|
||||||
*
|
|
||||||
* @return alternative allele, forward strand
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public String getAltBasesFWD() {
|
|
||||||
return getBestGenotype(2).toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns alternative (minor) allele base for a SNP variant as a character; should throw IllegalStateException
|
|
||||||
* if variant is not a SNP.
|
|
||||||
*
|
|
||||||
* @return alternative allele base on the forward starnd
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public char getAltSnpFWD() throws IllegalStateException {
|
|
||||||
if (!isSNP()) {
|
|
||||||
throw new IllegalStateException("Not a SNP");
|
|
||||||
}
|
|
||||||
String str = getBestGenotype(1).toString();
|
|
||||||
if (String.valueOf(str.charAt(0)).equals(mRecord.getRefBase().toString())) {
|
|
||||||
return str.charAt(1);
|
|
||||||
}
|
|
||||||
return str.charAt(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if all observed alleles are reference alleles. All is<Variant> methods (where Variant=SNP,Insertion, etc) should
|
* Returns true if all observed alleles are reference alleles. All is<Variant> methods (where Variant=SNP,Insertion, etc) should
|
||||||
* return false at such site to ensure consistency. This method is included for use with genotyping calls (isGenotype()==true), it makes
|
* return false at such site to ensure consistency. This method is included for use with genotyping calls (isGenotype()==true), it makes
|
||||||
|
|
@ -183,6 +135,31 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
||||||
return (!isSNP());
|
return (!isSNP());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
|
||||||
|
*
|
||||||
|
* @return true if we're an insertion or deletion
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean isIndel() {
|
||||||
|
return (isDeletion() || isInsertion());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||||
|
* of
|
||||||
|
*
|
||||||
|
* @return a char, representing the alternate base
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public char getAlternativeBaseForSNP() {
|
||||||
|
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||||
|
if (getAlternateBases().charAt(0) == this.getReference())
|
||||||
|
return getAlternateBases().charAt(1);
|
||||||
|
return getAlternateBases().charAt(0);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is this variant a SNP?
|
* Is this variant a SNP?
|
||||||
*
|
*
|
||||||
|
|
@ -196,7 +173,9 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* return a string representing the reference
|
* return a string representing the reference
|
||||||
|
*
|
||||||
* @param ref the reference character
|
* @param ref the reference character
|
||||||
|
*
|
||||||
* @return a string for the homozygous ref in a diploid
|
* @return a string for the homozygous ref in a diploid
|
||||||
*/
|
*/
|
||||||
private static String refString(char ref) {
|
private static String refString(char ref) {
|
||||||
|
|
@ -213,7 +192,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
||||||
*/
|
*/
|
||||||
public LikelihoodObject.GENOTYPE getBestGenotype(int nthBest) {
|
public LikelihoodObject.GENOTYPE getBestGenotype(int nthBest) {
|
||||||
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
|
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
|
||||||
return LikelihoodObject.GENOTYPE.values()[sorted[nthBest-1]];
|
return LikelihoodObject.GENOTYPE.values()[sorted[nthBest - 1]];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -226,7 +205,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
||||||
*/
|
*/
|
||||||
public double getBestGenotypeValue(int nthBest) {
|
public double getBestGenotypeValue(int nthBest) {
|
||||||
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
|
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
|
||||||
return (((SinglePointCall) mRecord).getLikelihoods())[sorted[nthBest-1]];
|
return (((SinglePointCall) mRecord).getLikelihoods())[sorted[nthBest - 1]];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -254,16 +233,13 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is this variant an insertion or a deletion? The contract requires
|
* get the base representation of this Variant
|
||||||
* this to be true if either isInsertion() or isDeletion() returns true. However,
|
|
||||||
* this method is currently allowed to return true even if neither of isInsertion()
|
|
||||||
* and isDeletion() does.
|
|
||||||
*
|
*
|
||||||
* @return
|
* @return a string, of ploidy
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean isIndel() {
|
public String getAlternateBases() {
|
||||||
return (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE);
|
return this.getBestGenotype(0).toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -272,31 +248,19 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public double getMAF() {
|
public double getNonRefAlleleFrequency() {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** @return the VARIANT_TYPE of the current variant */
|
||||||
* Returns heterozygosity, a more accessible general feature of a variant.
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
@Override
|
@Override
|
||||||
public double getHeterozygosity() {
|
public VARIANT_TYPE getType() {
|
||||||
return 0;
|
if (this.isSNP()) return VARIANT_TYPE.SNP;
|
||||||
|
else if (this.isInsertion()) return VARIANT_TYPE.INDEL;
|
||||||
|
else if (this.isDeletion()) return VARIANT_TYPE.DELETION;
|
||||||
|
else return VARIANT_TYPE.REFERENCE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Is this variant an actual genotype (such as individual call from sequencing, HapMap chip etc), or
|
|
||||||
* population allelic variant (call from pooled sequencing, dbSNP site etc). Only if variant is a genotype, there
|
|
||||||
* is a meaningful question of, e.g., whether it is a het, or homogeneous non-ref.
|
|
||||||
*
|
|
||||||
* @return true if this variant is an actual genotype.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean isGenotype() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns phred-mapped confidence in variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref).
|
* Returns phred-mapped confidence in variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref).
|
||||||
|
|
@ -304,66 +268,20 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public double getVariationConfidence() {
|
public double getNegLog10PError() {
|
||||||
String ref = new String() + mRecord.getRefBase() + mRecord.getRefBase();
|
String ref = new String() + mRecord.getRefBase() + mRecord.getRefBase();
|
||||||
int index = 0;
|
int index = 0;
|
||||||
for (LikelihoodObject.GENOTYPE g: LikelihoodObject.GENOTYPE.values()) {
|
for (LikelihoodObject.GENOTYPE g : LikelihoodObject.GENOTYPE.values()) {
|
||||||
if (g.toString().equals(ref)) break;
|
if (g.toString().equals(ref)) break;
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall)mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
|
return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall) mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public int length() {
|
||||||
* Returns phred-mapped confidence in called genotype (e.g. MAQ's consensus confidence, or AlleleCaller's
|
return 1;
|
||||||
* best vs next-best.
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public double getConsensusConfidence() {
|
|
||||||
return Math.abs(getBestGenotypeValue(1) - getBestGenotypeValue(2)) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns actual observed genotype. Allowed to return more than two alleles (@see #getPloidy()). If this variant
|
|
||||||
* is not a genotype, should throw an IllegalStateException.
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public List<String> getGenotype() throws IllegalStateException {
|
|
||||||
List<String> ret = new ArrayList<String>();
|
|
||||||
ret.add(this.getBestGenotype(1).toString());
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return actual number of observed alleles (chromosomes) in the genotype. If this variant is not a genotype,
|
|
||||||
* should throw IllegalStateException.
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public int getPloidy() throws IllegalStateException {
|
|
||||||
return 2; // we're so haploid it hurts
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if the site has at most two known or observed alleles (ref and non-ref), or false if there are > 2 allelic variants known or observed. When
|
|
||||||
* the implementing class is a genotype, alleles should be always counted including the reference allele whether it was observed in the particular
|
|
||||||
* individual or not: i.e. if the reference is 'C', then both 'CA' and 'AA' genotypes must be reported as bi-allelic, while 'AT' is <i>not</i> bi-allelic (since there are
|
|
||||||
* two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C').
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean isBiallelic() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int length() { return 1; }
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compareTo(ReferenceOrderedDatum that) {
|
public int compareTo(ReferenceOrderedDatum that) {
|
||||||
return this.mLoc.compareTo(that.getLocation());
|
return this.mLoc.compareTo(that.getLocation());
|
||||||
|
|
@ -371,8 +289,10 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* the parse line, which is not used by the GLF rod
|
* the parse line, which is not used by the GLF rod
|
||||||
|
*
|
||||||
* @param header the header to pass in
|
* @param header the header to pass in
|
||||||
* @param parts the string object
|
* @param parts the string object
|
||||||
|
*
|
||||||
* @return false, alwayss
|
* @return false, alwayss
|
||||||
* @throws java.io.IOException
|
* @throws java.io.IOException
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -25,14 +25,19 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.refdata;
|
package org.broadinstitute.sting.gatk.refdata;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVariant {
|
public class RodGeliText extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, AllelicVariant {
|
||||||
public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
||||||
public GenomeLoc loc;
|
public GenomeLoc loc;
|
||||||
public char refBase = 'N';
|
public char refBase = 'N';
|
||||||
|
|
@ -96,6 +101,27 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
|
||||||
|
|
||||||
public GenomeLoc getLocation() { return loc; }
|
public GenomeLoc getLocation() { return loc; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the reference base(s) at this position
|
||||||
|
*
|
||||||
|
* @return the reference base or bases, as a string
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public char getReference() {
|
||||||
|
return this.refBase;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the -1 * (log 10 of the error value)
|
||||||
|
*
|
||||||
|
* @return the log based error estimate
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public double getNegLog10PError() {
|
||||||
|
return Math.abs(lodBtr);
|
||||||
|
}
|
||||||
|
|
||||||
public String getRefBasesFWD() {
|
public String getRefBasesFWD() {
|
||||||
return String.format("%c", getRefSnpFWD());
|
return String.format("%c", getRefSnpFWD());
|
||||||
}
|
}
|
||||||
|
|
@ -118,8 +144,24 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
|
||||||
return refBase == bestGenotype.charAt(0) && refBase == bestGenotype.charAt(1);
|
return refBase == bestGenotype.charAt(0) && refBase == bestGenotype.charAt(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the frequency of this variant
|
||||||
|
*
|
||||||
|
* @return VariantFrequency with the stored frequency
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public double getNonRefAlleleFrequency() {
|
||||||
|
return 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return the VARIANT_TYPE of the current variant */
|
||||||
|
@Override
|
||||||
|
public VARIANT_TYPE getType() {
|
||||||
|
return VARIANT_TYPE.SNP;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isSNP() {
|
public boolean isSNP() {
|
||||||
return !isReference();
|
return (!bestGenotype.equals(Utils.dupString(this.getReference(),2)));
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isInsertion() {
|
public boolean isInsertion() {
|
||||||
|
|
@ -130,10 +172,35 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the base representation of this Variant
|
||||||
|
*
|
||||||
|
* @return a string, of ploidy
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String getAlternateBases() {
|
||||||
|
return this.bestGenotype;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isIndel() {
|
public boolean isIndel() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||||
|
* of
|
||||||
|
*
|
||||||
|
* @return a char, representing the alternate base
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public char getAlternativeBaseForSNP() {
|
||||||
|
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||||
|
if(this.bestGenotype.toString().charAt(0) == this.getReference())
|
||||||
|
return this.bestGenotype.toString().charAt(1);
|
||||||
|
return this.bestGenotype.toString().charAt(0);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public double getMAF() {
|
public double getMAF() {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -253,4 +320,15 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
|
||||||
public int getPloidy() throws IllegalStateException { return 2; }
|
public int getPloidy() throws IllegalStateException { return 2; }
|
||||||
public boolean isBiallelic() { return true; }
|
public boolean isBiallelic() { return true; }
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the likelihoods
|
||||||
|
*
|
||||||
|
* @return an array in lexigraphical order of the likelihoods
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public org.broadinstitute.sting.utils.genotype.Genotype getGenotype(DiploidGenotype x) {
|
||||||
|
if (x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
|
||||||
|
return new BasicGenotype(this.getLocation(),this.bestGenotype,this.getRefSnpFWD(),this.getConsensusConfidence());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,9 @@ package org.broadinstitute.sting.gatk.refdata;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.regex.MatchResult;
|
import java.util.regex.MatchResult;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
@ -16,7 +18,7 @@ import java.util.regex.Pattern;
|
||||||
* Time: 10:47:14 AM
|
* Time: 10:47:14 AM
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements AllelicVariant {
|
public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements AllelicVariant, Variation, VariantBackedByGenotype {
|
||||||
private String contig, source, feature, strand, frame;
|
private String contig, source, feature, strand, frame;
|
||||||
private long start, stop;
|
private long start, stop;
|
||||||
private double score;
|
private double score;
|
||||||
|
|
@ -74,6 +76,26 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
|
||||||
return GenomeLocParser.parseGenomeLoc(contig, start, stop);
|
return GenomeLocParser.parseGenomeLoc(contig, start, stop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the reference base(s) at this position
|
||||||
|
*
|
||||||
|
* @return the reference base or bases, as a string
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public char getReference() {
|
||||||
|
return 'N';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the -1 * (log 10 of the error value)
|
||||||
|
*
|
||||||
|
* @return the log based error estimate
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public double getNegLog10PError() {
|
||||||
|
return 4; // 1/10000 error
|
||||||
|
}
|
||||||
|
|
||||||
public String getAttribute(final String key) {
|
public String getAttribute(final String key) {
|
||||||
return attributes.get(key);
|
return attributes.get(key);
|
||||||
}
|
}
|
||||||
|
|
@ -158,10 +180,49 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
|
||||||
public String getAltBasesFWD() { return null; }
|
public String getAltBasesFWD() { return null; }
|
||||||
public char getAltSnpFWD() throws IllegalStateException { return 0; }
|
public char getAltSnpFWD() throws IllegalStateException { return 0; }
|
||||||
public boolean isReference() { return ! isSNP(); }
|
public boolean isReference() { return ! isSNP(); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String getAlternateBases() {
|
||||||
|
return this.feature;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the frequency of this variant
|
||||||
|
*
|
||||||
|
* @return VariantFrequency with the stored frequency
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public double getNonRefAlleleFrequency() {
|
||||||
|
return this.getMAF();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return the VARIANT_TYPE of the current variant */
|
||||||
|
@Override
|
||||||
|
public VARIANT_TYPE getType() {
|
||||||
|
return VARIANT_TYPE.SNP;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isSNP() { return false; }
|
public boolean isSNP() { return false; }
|
||||||
public boolean isInsertion() { return false; }
|
public boolean isInsertion() { return false; }
|
||||||
public boolean isDeletion() { return false; }
|
public boolean isDeletion() { return false; }
|
||||||
public boolean isIndel() { return false; }
|
public boolean isIndel() { return false; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||||
|
* of
|
||||||
|
*
|
||||||
|
* @return a char, representing the alternate base
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public char getAlternativeBaseForSNP() {
|
||||||
|
return this.getAltSnpFWD();
|
||||||
|
}
|
||||||
|
|
||||||
public double getMAF() { return 0; }
|
public double getMAF() { return 0; }
|
||||||
public double getHeterozygosity() { return 0; }
|
public double getHeterozygosity() { return 0; }
|
||||||
public boolean isGenotype() { return true; }
|
public boolean isGenotype() { return true; }
|
||||||
|
|
@ -175,4 +236,15 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
|
||||||
public int getPloidy() throws IllegalStateException { return 2; }
|
public int getPloidy() throws IllegalStateException { return 2; }
|
||||||
public boolean isBiallelic() { return true; }
|
public boolean isBiallelic() { return true; }
|
||||||
public int length() { return 1; }
|
public int length() { return 1; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the likelihoods
|
||||||
|
*
|
||||||
|
* @return an array in lexigraphical order of the likelihoods
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Genotype getGenotype(DiploidGenotype x) {
|
||||||
|
if (!x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
|
||||||
|
return new BasicGenotype(this.getLocation(),this.feature,this.getRefSnpFWD(),this.getConsensusConfidence());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,12 +7,15 @@ import java.util.List;
|
||||||
* chr1:1104840 A N 0.000000 -85.341265 -85.341265 0.000000 0.000000 324.000000 162 0 0
|
* chr1:1104840 A N 0.000000 -85.341265 -85.341265 0.000000 0.000000 324.000000 162 0 0
|
||||||
* chr1:1104841 C N 0.000000 -69.937928 -69.937928 0.000000 0.000000 324.000000 162 0 0
|
* chr1:1104841 C N 0.000000 -69.937928 -69.937928 0.000000 0.000000 324.000000 162 0 0
|
||||||
* chr1:1104842 A N 0.000000 -84.816002 -84.816002 0.000000 0.000000 324.000000 162 0 0
|
* chr1:1104842 A N 0.000000 -84.816002 -84.816002 0.000000 0.000000 324.000000 162 0 0
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public interface SNPCallFromGenotypes extends AllelicVariant {
|
public interface SNPCallFromGenotypes extends AllelicVariant {
|
||||||
public int nIndividuals();
|
public int nIndividuals();
|
||||||
|
|
||||||
public int nHomRefGenotypes();
|
public int nHomRefGenotypes();
|
||||||
|
|
||||||
public int nHetGenotypes();
|
public int nHetGenotypes();
|
||||||
|
|
||||||
public int nHomVarGenotypes();
|
public int nHomVarGenotypes();
|
||||||
|
|
||||||
public List<Genotype> getGenotypes();
|
public List<Genotype> getGenotypes();
|
||||||
}
|
}
|
||||||
|
|
@ -1,10 +1,12 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata;
|
package org.broadinstitute.sting.gatk.refdata;
|
||||||
|
|
||||||
import net.sf.picard.util.SequenceUtil;
|
import net.sf.picard.util.SequenceUtil;
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Example format:
|
* Example format:
|
||||||
|
|
@ -16,7 +18,7 @@ import org.broadinstitute.sting.utils.*;
|
||||||
* Time: 10:47:14 AM
|
* Time: 10:47:14 AM
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVariant {
|
public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, AllelicVariant {
|
||||||
public GenomeLoc loc; // genome location of SNP
|
public GenomeLoc loc; // genome location of SNP
|
||||||
// Reference sequence chromosome or scaffold
|
// Reference sequence chromosome or scaffold
|
||||||
// Start and stop positions in chrom
|
// Start and stop positions in chrom
|
||||||
|
|
@ -60,6 +62,26 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
||||||
// ----------------------------------------------------------------------
|
// ----------------------------------------------------------------------
|
||||||
public GenomeLoc getLocation() { return loc; }
|
public GenomeLoc getLocation() { return loc; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the reference base(s) at this position
|
||||||
|
*
|
||||||
|
* @return the reference base or bases, as a string
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public char getReference() {
|
||||||
|
return getRefSnpFWD();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the -1 * (log 10 of the error value)
|
||||||
|
*
|
||||||
|
* @return the log based error estimate
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public double getNegLog10PError() {
|
||||||
|
return 4; // -log10(0.0001)
|
||||||
|
}
|
||||||
|
|
||||||
public boolean onFwdStrand() {
|
public boolean onFwdStrand() {
|
||||||
return strand.equals("+");
|
return strand.equals("+");
|
||||||
}
|
}
|
||||||
|
|
@ -108,10 +130,24 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getAllelesFWDString() {
|
public String getAllelesFWDString() {
|
||||||
return Utils.join("/", getAllelesFWD());
|
return Utils.join("", getAllelesFWD());
|
||||||
}
|
}
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
/**
|
||||||
|
* get the frequency of this variant
|
||||||
|
*
|
||||||
|
* @return VariantFrequency with the stored frequency
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public double getNonRefAlleleFrequency() {
|
||||||
|
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return the VARIANT_TYPE of the current variant */
|
||||||
|
@Override
|
||||||
|
public VARIANT_TYPE getType() {
|
||||||
|
return VARIANT_TYPE.SNP;
|
||||||
|
}// ----------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// What kind of variant are we?
|
// What kind of variant are we?
|
||||||
//
|
//
|
||||||
|
|
@ -119,7 +155,34 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
||||||
public boolean isSNP() { return varType.contains("single"); }
|
public boolean isSNP() { return varType.contains("single"); }
|
||||||
public boolean isInsertion() { return varType.contains("insertion"); }
|
public boolean isInsertion() { return varType.contains("insertion"); }
|
||||||
public boolean isDeletion() { return varType.contains("deletion"); }
|
public boolean isDeletion() { return varType.contains("deletion"); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the base representation of this Variant
|
||||||
|
*
|
||||||
|
* @return a string, of ploidy
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String getAlternateBases() {
|
||||||
|
return getAllelesFWDString();
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isIndel() { return isInsertion() || isDeletion() || varType.contains("in-del"); }
|
public boolean isIndel() { return isInsertion() || isDeletion() || varType.contains("in-del"); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||||
|
* of
|
||||||
|
*
|
||||||
|
* @return a char, representing the alternate base
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public char getAlternativeBaseForSNP() {
|
||||||
|
return getAltSnpFWD(); /*
|
||||||
|
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||||
|
if (getAlternateBases().charAt(0) == this.getReference())
|
||||||
|
return getAlternateBases().charAt(1);
|
||||||
|
return getAlternateBases().charAt(0); */
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isReference() { return false; } // snp locations are never "reference", there's always a variant
|
public boolean isReference() { return false; } // snp locations are never "reference", there's always a variant
|
||||||
|
|
||||||
public boolean isHapmap() { return validationStatus.contains("by-hapmap"); }
|
public boolean isHapmap() { return validationStatus.contains("by-hapmap"); }
|
||||||
|
|
@ -244,4 +307,14 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
||||||
|
|
||||||
public int length() { return (int)(loc.getStop() - loc.getStart() + 1); }
|
public int length() { return (int)(loc.getStop() - loc.getStart() + 1); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the likelihoods
|
||||||
|
*
|
||||||
|
* @return an array in lexigraphical order of the likelihoods
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public org.broadinstitute.sting.utils.genotype.Genotype getGenotype(DiploidGenotype x) {
|
||||||
|
if (x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
|
||||||
|
return new BasicGenotype(this.getLocation(),this.getAltBasesFWD(),this.getRefSnpFWD(),this.getConsensusConfidence());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.filters;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodGeliText;
|
import org.broadinstitute.sting.gatk.refdata.RodGeliText;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
|
@ -50,7 +51,7 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
|
||||||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
||||||
|
|
||||||
boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest;
|
boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest;
|
||||||
boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet(variant);
|
boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet((AllelicVariant)variant);
|
||||||
exclude = excludable && highGenotypeConfidence && pointEstimateExclude(counts);
|
exclude = excludable && highGenotypeConfidence && pointEstimateExclude(counts);
|
||||||
//
|
//
|
||||||
// for printing only
|
// for printing only
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,12 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Broad Institute
|
* The Broad Institute
|
||||||
|
|
@ -65,5 +65,5 @@ public abstract class BasicVariantAnalysis implements VariantAnalysis {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context);
|
public abstract String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context);
|
||||||
}
|
}
|
||||||
|
|
@ -1,27 +1,30 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Broad Institute
|
* The Broad Institute
|
||||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||||
* This software and its documentation are copyright 2009 by the
|
* This software and its documentation are copyright 2009 by the
|
||||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||||
*
|
* <p/>
|
||||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class CallableBasesAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis {
|
public class CallableBasesAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis {
|
||||||
long all_bases = 0;
|
long all_bases = 0;
|
||||||
long all_calls = 0;
|
long all_calls = 0;
|
||||||
//final static double[] Qthresholds = { 10, 20, 30, 40, 50, 100, 200, 500, 1000 };
|
//final static double[] Qthresholds = { 10, 20, 30, 40, 50, 100, 200, 500, 1000 };
|
||||||
final static double[] thresholds = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100 };
|
final static double[] thresholds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100};
|
||||||
long[] discoverable_bases = new long[thresholds.length];
|
long[] discoverable_bases = new long[thresholds.length];
|
||||||
long[] genotypable_bases = new long[thresholds.length];
|
long[] genotypable_bases = new long[thresholds.length];
|
||||||
|
|
||||||
|
|
@ -29,36 +32,54 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot
|
||||||
super("callable_bases");
|
super("callable_bases");
|
||||||
}
|
}
|
||||||
|
|
||||||
public long nSites() { return all_bases; }
|
public long nSites() {
|
||||||
public long nCalls() { return all_calls; }
|
return all_bases;
|
||||||
public long nDiscoverable(int index) { return discoverable_bases[index]; }
|
}
|
||||||
public double percentDiscoverable(int index) { return (100.0*nDiscoverable(index)) / nSites(); }
|
|
||||||
public long nGenotypable(int index) { return genotypable_bases[index]; }
|
|
||||||
public double percentGenotypable(int index) { return (100.0*nGenotypable(index)) / nSites(); }
|
|
||||||
|
|
||||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
public long nCalls() {
|
||||||
|
return all_calls;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long nDiscoverable(int index) {
|
||||||
|
return discoverable_bases[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
public double percentDiscoverable(int index) {
|
||||||
|
return (100.0 * nDiscoverable(index)) / nSites();
|
||||||
|
}
|
||||||
|
|
||||||
|
public long nGenotypable(int index) {
|
||||||
|
return genotypable_bases[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
public double percentGenotypable(int index) {
|
||||||
|
return (100.0 * nGenotypable(index)) / nSites();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
all_bases++;
|
all_bases++;
|
||||||
|
|
||||||
if ( eval == null ) // no data here!
|
if (eval == null) // no data here!
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// we actually have a record here
|
// we actually have a record here
|
||||||
if ( ! eval.isGenotype() ) { // evaluation record isn't a genotype, die!
|
if (!(eval instanceof VariantBackedByGenotype)) { // evaluation record isn't a genotype, die!
|
||||||
throw new RuntimeException("Evaluation track isn't an Genotype!");
|
throw new RuntimeException("Evaluation track isn't an Genotype!");
|
||||||
}
|
}
|
||||||
|
|
||||||
all_calls++;
|
all_calls++;
|
||||||
// For every threshold, updated discoverable and callable
|
// For every threshold, updated discoverable and callable
|
||||||
for ( int i = 0; i < thresholds.length; i++ ) {
|
for (int i = 0; i < thresholds.length; i++) {
|
||||||
double threshold = thresholds[i];
|
double threshold = thresholds[i];
|
||||||
|
DiploidGenotype g = DiploidGenotype.valueOf(Utils.dupString(ref, 2));
|
||||||
|
Genotype genotype = ((VariantBackedByGenotype) eval).getGenotype(g);
|
||||||
// update discoverable
|
// update discoverable
|
||||||
if ( eval.isSNP() && eval.getVariationConfidence() >= threshold )
|
if (eval.isSNP() && eval.getNegLog10PError() >= threshold)
|
||||||
discoverable_bases[i]++;
|
discoverable_bases[i]++;
|
||||||
if ( eval.isReference() && eval.getConsensusConfidence() >= threshold )
|
if (!eval.isSNP() && genotype.getNegLog10PError() >= threshold)
|
||||||
discoverable_bases[i]++;
|
discoverable_bases[i]++;
|
||||||
|
|
||||||
if ( eval.getConsensusConfidence() >= threshold )
|
if (genotype.getNegLog10PError() >= threshold)
|
||||||
genotypable_bases[i]++;
|
genotypable_bases[i]++;
|
||||||
|
|
||||||
//System.out.printf("Updating %s SNP=%b, REF=%b VarConf=%f ConConf=%f where threshold=%f: discoverable = %d, genotypable = %d%n",
|
//System.out.printf("Updating %s SNP=%b, REF=%b VarConf=%f ConConf=%f where threshold=%f: discoverable = %d, genotypable = %d%n",
|
||||||
|
|
@ -76,7 +97,7 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot
|
||||||
s.add(String.format(""));
|
s.add(String.format(""));
|
||||||
s.add(String.format("confidence_threshold\tdiscoverable_bases\tdiscoverable_bases_percent\tgenotypable_bases\tgenotypable_bases_percent"));
|
s.add(String.format("confidence_threshold\tdiscoverable_bases\tdiscoverable_bases_percent\tgenotypable_bases\tgenotypable_bases_percent"));
|
||||||
|
|
||||||
for ( int i = 0; i < thresholds.length; i++ ) {
|
for (int i = 0; i < thresholds.length; i++) {
|
||||||
double threshold = thresholds[i];
|
double threshold = thresholds[i];
|
||||||
s.add(String.format("%6.2f\t%d\t%.6f\t%d\t%.6f", threshold, nDiscoverable(i), percentDiscoverable(i), nGenotypable(i), percentGenotypable(i)));
|
s.add(String.format("%6.2f\t%d\t%.6f\t%d\t%.6f", threshold, nDiscoverable(i), percentDiscoverable(i), nGenotypable(i), percentGenotypable(i)));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,14 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Broad Institute
|
* The Broad Institute
|
||||||
|
|
@ -24,7 +24,7 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno
|
||||||
ArrayList<HashSet<GenomeLoc>> variantsWithClusters;
|
ArrayList<HashSet<GenomeLoc>> variantsWithClusters;
|
||||||
int minDistanceForFlagging = 5;
|
int minDistanceForFlagging = 5;
|
||||||
int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100};
|
int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100};
|
||||||
AllelicVariant lastVariant = null;
|
Variation lastVariation = null;
|
||||||
GenomeLoc lastVariantInterval = null;
|
GenomeLoc lastVariantInterval = null;
|
||||||
int nSeen = 0;
|
int nSeen = 0;
|
||||||
|
|
||||||
|
|
@ -36,16 +36,16 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno
|
||||||
variantsWithClusters.add(new HashSet<GenomeLoc>());
|
variantsWithClusters.add(new HashSet<GenomeLoc>());
|
||||||
}
|
}
|
||||||
|
|
||||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
String r = null;
|
String r = null;
|
||||||
|
|
||||||
if ( eval != null && eval.isSNP() ) {
|
if ( eval != null && eval.isSNP() ) {
|
||||||
IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null);
|
IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null);
|
||||||
GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation();
|
GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation();
|
||||||
|
|
||||||
if (lastVariant != null) {
|
if (lastVariation != null) {
|
||||||
GenomeLoc eL = eval.getLocation();
|
GenomeLoc eL = eval.getLocation();
|
||||||
GenomeLoc lvL = lastVariant.getLocation();
|
GenomeLoc lvL = lastVariation.getLocation();
|
||||||
if (eL.getContigIndex() == lvL.getContigIndex()) {
|
if (eL.getContigIndex() == lvL.getContigIndex()) {
|
||||||
long d = eL.distance(lvL);
|
long d = eL.distance(lvL);
|
||||||
if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) {
|
if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) {
|
||||||
|
|
@ -66,7 +66,7 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lastVariant = eval;
|
lastVariation = eval;
|
||||||
lastVariantInterval = interval;
|
lastVariantInterval = interval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,21 +1,24 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Broad Institute
|
* The Broad Institute
|
||||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||||
* This software and its documentation are copyright 2009 by the
|
* This software and its documentation are copyright 2009 by the
|
||||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||||
*
|
* <p/>
|
||||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
|
public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
|
||||||
private String dbName;
|
private String dbName;
|
||||||
|
|
@ -35,48 +38,48 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
||||||
public GenotypeConcordance(final String name) {
|
public GenotypeConcordance(final String name) {
|
||||||
super("genotype_concordance");
|
super("genotype_concordance");
|
||||||
dbName = name;
|
dbName = name;
|
||||||
for ( int i = 0; i < 4; i++ ) {
|
for (int i = 0; i < 4; i++) {
|
||||||
truth_totals[i] = 0;
|
truth_totals[i] = 0;
|
||||||
calls_totals[i] = 0;
|
calls_totals[i] = 0;
|
||||||
for ( int j = 0; j < 4; j++ )
|
for (int j = 0; j < 4; j++)
|
||||||
table[i][j] = 0;
|
table[i][j] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void inc(AllelicVariant chip, AllelicVariant eval, char ref) {
|
public void inc(Variation chip, Variation eval, char ref) {
|
||||||
if ( (chip != null && !chip.isGenotype()) || (eval != null && !eval.isGenotype()) )
|
if ((chip != null && !(chip instanceof VariantBackedByGenotype) || (eval != null && !(eval instanceof VariantBackedByGenotype))))
|
||||||
throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
|
throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
|
||||||
|
|
||||||
|
DiploidGenotype g = DiploidGenotype.valueOf(Utils.dupString(ref, 2));
|
||||||
int truthIndex, callIndex;
|
int truthIndex, callIndex;
|
||||||
if ( chip == null )
|
if (chip == null)
|
||||||
truthIndex = UNKNOWN;
|
truthIndex = UNKNOWN;
|
||||||
else if ( chip.isReference() && Utils.countOccurrences(ref, chip.getGenotype().get(0)) == chip.getGenotype().get(0).length() )
|
else if (chip.getAlternateBases().equals(g.toString()))
|
||||||
truthIndex = REF;
|
truthIndex = REF;
|
||||||
else if ( GenotypeUtils.isHet(chip) )
|
else if (chip.getAlternateBases().charAt(0) != chip.getAlternateBases().charAt(1))
|
||||||
truthIndex = VAR_HET;
|
truthIndex = VAR_HET;
|
||||||
else
|
else
|
||||||
truthIndex = VAR_HOM;
|
truthIndex = VAR_HOM;
|
||||||
|
|
||||||
// todo -- FIXME on countOccurences
|
if (eval == null)
|
||||||
if ( eval == null )
|
|
||||||
callIndex = NO_CALL;
|
callIndex = NO_CALL;
|
||||||
else if ( eval.isReference() && Utils.countOccurrences(ref, eval.getGenotype().get(0)) == eval.getGenotype().get(0).length() )
|
else if (eval.getAlternateBases().equals(g.toString()))
|
||||||
callIndex = REF;
|
callIndex = REF;
|
||||||
else if ( GenotypeUtils.isHet(eval) )
|
else if (eval.getAlternateBases().charAt(0) != eval.getAlternateBases().charAt(1))
|
||||||
callIndex = VAR_HET;
|
callIndex = VAR_HET;
|
||||||
else
|
else
|
||||||
callIndex = VAR_HOM;
|
callIndex = VAR_HOM;
|
||||||
|
|
||||||
if ( chip != null || eval != null ) {
|
if (chip != null || eval != null) {
|
||||||
//System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
|
//System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
|
||||||
table[truthIndex][callIndex]++;
|
table[truthIndex][callIndex]++;
|
||||||
truth_totals[truthIndex]++;
|
truth_totals[truthIndex]++;
|
||||||
if ( callIndex != NO_CALL ) calls_totals[callIndex]++;
|
if (callIndex != NO_CALL) calls_totals[callIndex]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
AllelicVariant chip = (AllelicVariant)tracker.lookup(dbName, null);
|
Variation chip = (Variation) tracker.lookup(dbName, null);
|
||||||
inc(chip, eval, ref);
|
inc(chip, eval, ref);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
@ -84,14 +87,14 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
||||||
private void addCalledGenotypeConcordance(List<String> s) {
|
private void addCalledGenotypeConcordance(List<String> s) {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
sb.append("CALLED_GENOTYPE_CONCORDANCE\t");
|
sb.append("CALLED_GENOTYPE_CONCORDANCE\t");
|
||||||
for ( int i = 0; i < 4; i++ ) {
|
for (int i = 0; i < 4; i++) {
|
||||||
int nConcordantCallsI = table[i][i];
|
int nConcordantCallsI = table[i][i];
|
||||||
String value = "N/A";
|
String value = "N/A";
|
||||||
if ( i != UNKNOWN )
|
if (i != UNKNOWN)
|
||||||
value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i]-table[UNKNOWN][i]));
|
value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i]));
|
||||||
sb.append(value);
|
sb.append(value);
|
||||||
}
|
}
|
||||||
s.add(sb.toString());
|
s.add(sb.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -99,9 +102,9 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
||||||
*/
|
*/
|
||||||
private int getNCalled() {
|
private int getNCalled() {
|
||||||
int n = 0;
|
int n = 0;
|
||||||
for ( int i = 0; i < 4; i++ )
|
for (int i = 0; i < 4; i++)
|
||||||
for ( int j = 0; j < 4; j++ )
|
for (int j = 0; j < 4; j++)
|
||||||
if ( i != NO_CALL && j != NO_CALL ) n += table[i][j];
|
if (i != NO_CALL && j != NO_CALL) n += table[i][j];
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -115,23 +118,23 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
||||||
int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM];
|
int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM];
|
||||||
int nCalled = getNCalled();
|
int nCalled = getNCalled();
|
||||||
s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar)));
|
s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar)));
|
||||||
s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls)));
|
s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls)));
|
||||||
s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled)));
|
s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled)));
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> done() {
|
public List<String> done() {
|
||||||
List<String> s = new ArrayList<String>();
|
List<String> s = new ArrayList<String>();
|
||||||
s.add(String.format("name %s", dbName));
|
s.add(String.format("name %s", dbName));
|
||||||
s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY"));
|
s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY"));
|
||||||
for (int i=0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
sb.append(String.format("%15s ", TRUTH_NAMES[i]));
|
sb.append(String.format("%15s ", TRUTH_NAMES[i]));
|
||||||
for (int j=0; j < 4; j++)
|
for (int j = 0; j < 4; j++)
|
||||||
sb.append(String.format("%9d ", table[i][j]));
|
sb.append(String.format("%9d ", table[i][j]));
|
||||||
sb.append(String.format("%9d ", truth_totals[i]));
|
sb.append(String.format("%9d ", truth_totals[i]));
|
||||||
if ( i == VAR_HET || i == VAR_HOM ) {
|
if (i == VAR_HET || i == VAR_HOM) {
|
||||||
sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF]+table[i][VAR_HET]+table[i][VAR_HOM])));
|
sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
|
||||||
sb.append(String.format("%s", cellPercent(truth_totals[i]-table[i][NO_CALL], truth_totals[i])));
|
sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
|
||||||
} else {
|
} else {
|
||||||
sb.append("\tN/A\t\t\tN/A");
|
sb.append("\tN/A\t\t\tN/A");
|
||||||
}
|
}
|
||||||
|
|
@ -141,15 +144,15 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
||||||
addCalledGenotypeConcordance(s);
|
addCalledGenotypeConcordance(s);
|
||||||
addOverallStats(s);
|
addOverallStats(s);
|
||||||
|
|
||||||
for (int i=0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
for (int j=0; j < 4; j++) {
|
for (int j = 0; j < 4; j++) {
|
||||||
s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j]));
|
s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j]));
|
||||||
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i])));
|
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i])));
|
||||||
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j])));
|
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j])));
|
||||||
}
|
}
|
||||||
if ( i == VAR_HET || i == VAR_HOM ) {
|
if (i == VAR_HET || i == VAR_HOM) {
|
||||||
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF]+table[i][VAR_HET]+table[i][VAR_HOM])));
|
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
|
||||||
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i]-table[i][NO_CALL], truth_totals[i])));
|
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
|
|
@ -158,7 +161,7 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
||||||
private static String cellPercent(int count, int total) {
|
private static String cellPercent(int count, int total) {
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
total = Math.max(total, 0);
|
total = Math.max(total, 0);
|
||||||
sb.append(String.format("%.2f", (100.0*count)/total));
|
sb.append(String.format("%.2f", (100.0 * count) / total));
|
||||||
sb.append("%");
|
sb.append("%");
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,16 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
|
import cern.jet.math.Arithmetic;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes;
|
import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation;
|
import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import cern.jet.math.Arithmetic;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Broad Institute
|
* The Broad Institute
|
||||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||||
|
|
@ -32,7 +31,7 @@ public class HardyWeinbergEquilibrium extends BasicVariantAnalysis implements Po
|
||||||
this.threshold = threshold;
|
this.threshold = threshold;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
String r = null;
|
String r = null;
|
||||||
|
|
||||||
if ( eval != null &&
|
if ( eval != null &&
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Broad Institute
|
* The Broad Institute
|
||||||
|
|
@ -29,19 +29,19 @@ public class IndelMetricsAnalysis extends BasicVariantAnalysis implements Genoty
|
||||||
sizes[0][i] = sizes[1][i] = 0;
|
sizes[0][i] = sizes[1][i] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
if ( eval != null && eval.isIndel() ) {
|
if ( eval != null && eval.isInsertion() ) {
|
||||||
if ( eval.isInsertion() )
|
if ( eval.isInsertion() )
|
||||||
insertions++;
|
insertions++;
|
||||||
else if ( eval.isDeletion() )
|
else if ( eval.isDeletion() )
|
||||||
deletions++;
|
deletions++;
|
||||||
else
|
else
|
||||||
throw new RuntimeException("Variant is indel, but isn't insertion or deletion!");
|
throw new RuntimeException("Variation is indel, but isn't insertion or deletion!");
|
||||||
|
|
||||||
if ( eval.length() < 100 ) {
|
if ( eval.getAlternateBases().length() < 100 ) {
|
||||||
sizes[eval.isDeletion() ? 0 : 1][eval.length()]++;
|
sizes[eval.isDeletion() ? 0 : 1][eval.getAlternateBases().length()]++;
|
||||||
if ( eval.length() > maxSize )
|
if ( eval.getAlternateBases().length() > maxSize )
|
||||||
maxSize = eval.length();
|
maxSize = eval.getAlternateBases().length();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,15 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.io.PrintStream;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Broad Institute
|
* The Broad Institute
|
||||||
|
|
@ -26,7 +26,7 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge
|
||||||
int minDistanceForFlagging = 5;
|
int minDistanceForFlagging = 5;
|
||||||
int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100, 1000, 10000};
|
int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100, 1000, 10000};
|
||||||
|
|
||||||
AllelicVariant lastVariant = null;
|
Variation lastVariation = null;
|
||||||
GenomeLoc lastVariantInterval = null;
|
GenomeLoc lastVariantInterval = null;
|
||||||
PrintStream violationsOut = null;
|
PrintStream violationsOut = null;
|
||||||
|
|
||||||
|
|
@ -35,16 +35,16 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge
|
||||||
neighborWiseDistances = new ArrayList<Long>();
|
neighborWiseDistances = new ArrayList<Long>();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
String r = null;
|
String r = null;
|
||||||
|
|
||||||
if ( eval != null && eval.isSNP() ) {
|
if ( eval != null && eval.isSNP() ) {
|
||||||
IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null);
|
IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null);
|
||||||
GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation();
|
GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation();
|
||||||
|
|
||||||
if (lastVariant != null) {
|
if (lastVariation != null) {
|
||||||
GenomeLoc eL = eval.getLocation();
|
GenomeLoc eL = eval.getLocation();
|
||||||
GenomeLoc lvL = lastVariant.getLocation();
|
GenomeLoc lvL = lastVariation.getLocation();
|
||||||
if (eL.getContigIndex() == lvL.getContigIndex()) {
|
if (eL.getContigIndex() == lvL.getContigIndex()) {
|
||||||
long d = eL.distance(lvL);
|
long d = eL.distance(lvL);
|
||||||
if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) {
|
if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) {
|
||||||
|
|
@ -57,7 +57,7 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lastVariant = eval;
|
lastVariation = eval;
|
||||||
lastVariantInterval = interval;
|
lastVariantInterval = interval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,10 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -13,10 +14,9 @@ import java.util.List;
|
||||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||||
* This software and its documentation are copyright 2009 by the
|
* This software and its documentation are copyright 2009 by the
|
||||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||||
*
|
* <p/>
|
||||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class TransitionTranversionAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
|
public class TransitionTranversionAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
|
||||||
long nTransitions = 0, nTransversions = 0;
|
long nTransitions = 0, nTransversions = 0;
|
||||||
|
|
@ -25,13 +25,18 @@ public class TransitionTranversionAnalysis extends BasicVariantAnalysis implemen
|
||||||
super("transitions_transversions");
|
super("transitions_transversions");
|
||||||
}
|
}
|
||||||
|
|
||||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
if ( eval != null && eval.isSNP() ) {
|
if (eval != null && eval.isSNP()) {
|
||||||
char refBase = eval.getRefSnpFWD();
|
if (eval.getAlternateBases().length() != 2) {
|
||||||
char altBase = eval.getAltSnpFWD();
|
throw new StingException("TransitionTranversionAnalysis works only with biallelic variants");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
char refBase = eval.getReference();
|
||||||
|
char altBase = (eval.getAlternateBases().charAt(0) == refBase) ? eval.getAlternateBases().charAt(1) : eval.getAlternateBases().charAt(0);
|
||||||
|
|
||||||
BaseUtils.BaseSubstitutionType subType = BaseUtils.SNPSubstitutionType(refBase, altBase);
|
BaseUtils.BaseSubstitutionType subType = BaseUtils.SNPSubstitutionType(refBase, altBase);
|
||||||
if ( subType == BaseUtils.BaseSubstitutionType.TRANSITION )
|
if (subType == BaseUtils.BaseSubstitutionType.TRANSITION)
|
||||||
nTransitions++;
|
nTransitions++;
|
||||||
else
|
else
|
||||||
nTransversions++;
|
nTransversions++;
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -23,7 +23,7 @@ public interface VariantAnalysis {
|
||||||
public PrintStream getCallPrintStream();
|
public PrintStream getCallPrintStream();
|
||||||
public List<String> getParams();
|
public List<String> getParams();
|
||||||
public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String filename);
|
public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String filename);
|
||||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context);
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context);
|
||||||
public void finalize(long nSites);
|
public void finalize(long nSites);
|
||||||
public List<String> done();
|
public List<String> done();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,13 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.utils.GenotypeUtils;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Broad Institute
|
* The Broad Institute
|
||||||
|
|
@ -27,10 +28,10 @@ public class VariantCounter extends BasicVariantAnalysis implements GenotypeAnal
|
||||||
super("variant_counts");
|
super("variant_counts");
|
||||||
}
|
}
|
||||||
|
|
||||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
nSNPs += eval == null ? 0 : 1;
|
nSNPs += eval == null ? 0 : 1;
|
||||||
|
|
||||||
if ( this.getMaster().evalContainsGenotypes && eval != null && eval.isSNP() && GenotypeUtils.isHet(eval) )
|
if ( this.getMaster().evalContainsGenotypes && eval != null && eval.isSNP() && ((VariantBackedByGenotype)eval).getGenotype( DiploidGenotype.valueOf(eval.getAlternateBases())).isHet() )
|
||||||
nHets++;
|
nHets++;
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
|
|
||||||
|
|
@ -1,21 +1,20 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Broad Institute
|
* The Broad Institute
|
||||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||||
* This software and its documentation are copyright 2009 by the
|
* This software and its documentation are copyright 2009 by the
|
||||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||||
*
|
* <p/>
|
||||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
|
public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
|
||||||
private String dbName;
|
private String dbName;
|
||||||
|
|
@ -31,45 +30,81 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
|
||||||
dbName = name;
|
dbName = name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void inc(AllelicVariant dbSNP, AllelicVariant eval) {
|
public void inc(Variation dbSNP, Variation eval) {
|
||||||
boolean inDB = dbSNP != null;
|
boolean inDB = dbSNP != null;
|
||||||
boolean inEval = eval != null;
|
boolean inEval = eval != null;
|
||||||
|
|
||||||
if (inDB ) {
|
if (inDB) {
|
||||||
if ( dbSNP.isSNP() ) nDBSNPs++;
|
if (dbSNP.isSNP()) nDBSNPs++;
|
||||||
if ( dbSNP.isIndel() ) nDBIndels++;
|
if (dbSNP.isIndel()) nDBIndels++;
|
||||||
|
|
||||||
//System.out.printf("snp=%b ins=%b del=%b indel=%b %s%n", dbSNP.isSNP(), dbSNP.isInsertion(), dbSNP.isDeletion(), dbSNP.isIndel(), dbSNP);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inEval) nEvalObs++;
|
if (inEval) nEvalObs++;
|
||||||
if (inDB && inEval) {
|
if (inDB && inEval) {
|
||||||
if ( dbSNP.isSNP() ) { // changes the calculation a bit
|
if (dbSNP.isSNP()) { // changes the calculation a bit
|
||||||
nOverlapping++;
|
nOverlapping++;
|
||||||
|
|
||||||
if ( ! discordantP(dbSNP, eval) )
|
if (!discordantP(dbSNP, eval))
|
||||||
nConcordant++;
|
nConcordant++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( dbSNP.isIndel() && eval.isSNP() )
|
if (dbSNP.isIndel() && eval.isSNP())
|
||||||
nSNPsCalledAtIndels++;
|
nSNPsCalledAtIndels++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public int nDBSNPs() { return nDBSNPs; }
|
public int callCount = 0;
|
||||||
public int nDBIndels() { return nDBIndels; }
|
|
||||||
public int nEvalSites() { return nEvalObs; }
|
|
||||||
public int nOverlappingSites() { return nOverlapping; }
|
|
||||||
public int nConcordant() { return nConcordant; }
|
|
||||||
public int nNovelSites() { return Math.abs(nEvalSites() - nOverlappingSites()); }
|
|
||||||
public int nSNPsAtIndels() { return nSNPsCalledAtIndels; }
|
|
||||||
|
|
||||||
public boolean discordantP(AllelicVariant dbSNP, AllelicVariant eval) {
|
public int nDBSNPs() {
|
||||||
if (dbSNP != null && dbSNP.isSNP() && eval != null ) {
|
return nDBSNPs;
|
||||||
return ! (dbSNP.getAltSnpFWD() == eval.getAltSnpFWD() || dbSNP.getRefSnpFWD() == eval.getAltSnpFWD());
|
}
|
||||||
} else {
|
|
||||||
return false;
|
public int nDBIndels() {
|
||||||
|
return nDBIndels;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int nEvalSites() {
|
||||||
|
return nEvalObs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int nOverlappingSites() {
|
||||||
|
return nOverlapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int nConcordant() {
|
||||||
|
return nConcordant;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int nNovelSites() {
|
||||||
|
return Math.abs(nEvalSites() - nOverlappingSites());
|
||||||
|
}
|
||||||
|
|
||||||
|
public int nSNPsAtIndels() {
|
||||||
|
return nSNPsCalledAtIndels;
|
||||||
|
}
|
||||||
|
public static int count = 0;
|
||||||
|
public boolean discordantP(Variation dbSNP, Variation eval) {
|
||||||
|
if (eval != null) {
|
||||||
|
char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : eval.getReference();
|
||||||
|
|
||||||
|
if (dbSNP != null && dbSNP.isSNP()) {
|
||||||
|
|
||||||
|
boolean val = !dbSNP.getAlternateBases().contains(String.valueOf(alt));
|
||||||
|
|
||||||
|
if (val) {
|
||||||
|
//System.err.print(eval.getLocation());
|
||||||
|
//System.err.print(" eval.alt = " + alt);
|
||||||
|
//System.err.println(" dbsnp.bases = " + dbSNP.getAlternateBases());
|
||||||
|
//System.err.println(val);
|
||||||
|
|
||||||
|
//System.err.println(count);
|
||||||
|
}
|
||||||
|
//else System.err.println("not count == " + count++);
|
||||||
|
|
||||||
|
return val;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -86,17 +121,19 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
|
||||||
return nConcordant() / (1.0 * nOverlappingSites());
|
return nConcordant() / (1.0 * nOverlappingSites());
|
||||||
}
|
}
|
||||||
|
|
||||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
|
|
||||||
// There are four cases here:
|
// There are four cases here:
|
||||||
AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(dbName, null);
|
Variation dbsnp = (Variation) tracker.lookup(dbName, null);
|
||||||
boolean isSNP = dbsnp != null && dbsnp.isSNP();
|
boolean isSNP = dbsnp != null && dbsnp.isSNP();
|
||||||
inc(dbsnp, eval);
|
inc(dbsnp, eval);
|
||||||
|
|
||||||
if ( dbsnp != null && eval != null ) {
|
if (dbsnp != null && eval != null) {
|
||||||
if ( dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval) ) {
|
|
||||||
|
if (dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval)) {
|
||||||
return String.format("Discordant [DBSNP %s] [EVAL %s]", dbsnp, eval);
|
return String.format("Discordant [DBSNP %s] [EVAL %s]", dbsnp, eval);
|
||||||
} else if ( dbsnp.isIndel() && eval.isSNP() ) {
|
} else if (dbsnp.isIndel() && eval.isSNP()) {
|
||||||
return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getGenotype().get(0), eval);
|
return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getAlternateBases(), eval);
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
// return "Novel " + eval;
|
// return "Novel " + eval;
|
||||||
|
|
@ -128,10 +165,10 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
|
||||||
s.add(String.format("n_concordant %d", nConcordant()));
|
s.add(String.format("n_concordant %d", nConcordant()));
|
||||||
s.add(String.format("n_novel_sites %d", nNovelSites()));
|
s.add(String.format("n_novel_sites %d", nNovelSites()));
|
||||||
|
|
||||||
s.add(String.format("percent_eval_sites_in_db %.2f", 100*fractionEvalSitesCoveredByDB()));
|
s.add(String.format("percent_eval_sites_in_db %.2f", 100 * fractionEvalSitesCoveredByDB()));
|
||||||
s.add(String.format("concordance_rate %.2f", 100*concordanceRate()));
|
s.add(String.format("concordance_rate %.2f", 100 * concordanceRate()));
|
||||||
|
|
||||||
s.add(String.format("percent_db_sites_in_eval %.2f", 100*fractionDBSitesDiscoveredInEval()));
|
s.add(String.format("percent_db_sites_in_eval %.2f", 100 * fractionDBSitesDiscoveredInEval()));
|
||||||
|
|
||||||
s.add(String.format("n_snp_calls_at_indels %d", nSNPsAtIndels()));
|
s.add(String.format("n_snp_calls_at_indels %d", nSNPsAtIndels()));
|
||||||
s.add(String.format("percent_calls_at_indels %.2f", nSNPsAtIndels() / (0.01 * nEvalSites())));
|
s.add(String.format("percent_calls_at_indels %.2f", nSNPsAtIndels() / (0.01 * nEvalSites())));
|
||||||
|
|
|
||||||
|
|
@ -2,14 +2,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
|
@ -28,8 +26,8 @@ import java.util.*;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@By(DataSource.REFERENCE)
|
@By(DataSource.REFERENCE)
|
||||||
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=AllelicVariant.class)})
|
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=RodGeliText.class)}) // right now we have no base variant class for rods, this should change
|
||||||
@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=AllelicVariant.class), @RMD(name="dbsnp",type=AllelicVariant.class),@RMD(name="hapmap-chip",type=AllelicVariant.class), @RMD(name="interval",type=IntervalRod.class)})
|
@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=RodGeliText.class), @RMD(name="dbsnp",type=rodDbSNP.class),@RMD(name="hapmap-chip",type=RodGenotypeChipAsGFF.class), @RMD(name="interval",type=IntervalRod.class)})
|
||||||
public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
||||||
@Argument(shortName="minConfidenceScore", doc="Minimum confidence score to consider an evaluation SNP a variant", required=false)
|
@Argument(shortName="minConfidenceScore", doc="Minimum confidence score to consider an evaluation SNP a variant", required=false)
|
||||||
public int minConfidenceScore = -1;
|
public int minConfidenceScore = -1;
|
||||||
|
|
@ -181,19 +179,18 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
nSites++;
|
nSites++;
|
||||||
|
|
||||||
// Iterate over each analysis, and update it
|
// Iterate over each analysis, and update it
|
||||||
AllelicVariant eval = (AllelicVariant)tracker.lookup("eval", null);
|
Variation eval = (Variation)tracker.lookup("eval", null);
|
||||||
|
|
||||||
//if ( eval!=null ) System.out.printf("Eval: %f %d %b%n", eval.getVariationConfidence(), minDiscoveryQ, eval.getVariationConfidence() < minDiscoveryQ);
|
//if ( eval!=null ) System.out.printf("Eval: %f %d %b%n", eval.getVariationConfidence(), minDiscoveryQ, eval.getVariationConfidence() < minDiscoveryQ);
|
||||||
if ( eval != null ) {
|
if ( eval != null ) {
|
||||||
// comment to disable variant filtering by confidence score
|
// comment to disable variant filtering by confidence score
|
||||||
if ( evalContainsGenotypes ) {
|
if ( evalContainsGenotypes ) {
|
||||||
// Genotyping - use best vs. next best lod
|
// Genotyping - use best vs. next best lod
|
||||||
if ( eval.getConsensusConfidence() < minConfidenceScore ) eval = null;
|
if ( eval.getNegLog10PError() < minConfidenceScore ) eval = null;
|
||||||
} else {
|
} else {
|
||||||
// Variant discovery - use best vs. reference lod
|
// Variation discovery - use best vs. reference lod
|
||||||
if ( Math.abs(eval.getVariationConfidence()) < minConfidenceScore ) eval = null;
|
if ( Math.abs(eval.getNegLog10PError()) < minConfidenceScore ) eval = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -202,7 +199,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
||||||
|
|
||||||
// update the known / novel set by checking whether the knownSNPDBName track has an entry here
|
// update the known / novel set by checking whether the knownSNPDBName track has an entry here
|
||||||
if ( eval != null ) {
|
if ( eval != null ) {
|
||||||
AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(knownSNPDBName, null);
|
Variation dbsnp = (Variation)tracker.lookup(knownSNPDBName, null);
|
||||||
String noveltySet = dbsnp == null ? NOVEL_SNPS : KNOWN_SNPS;
|
String noveltySet = dbsnp == null ? NOVEL_SNPS : KNOWN_SNPS;
|
||||||
updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context);
|
updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context);
|
||||||
}
|
}
|
||||||
|
|
@ -219,7 +216,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void updateAnalysisSet(final String analysisSetName, AllelicVariant eval,
|
public void updateAnalysisSet(final String analysisSetName, Variation eval,
|
||||||
RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
// Iterate over each analysis, and update it
|
// Iterate over each analysis, and update it
|
||||||
if ( getAnalysisSet(analysisSetName) != null ) {
|
if ( getAnalysisSet(analysisSetName) != null ) {
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Broad Institute
|
* The Broad Institute
|
||||||
|
|
@ -22,7 +23,7 @@ public class VariantMatcher extends BasicVariantAnalysis implements GenotypeAnal
|
||||||
dbName = name;
|
dbName = name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
String r = null;
|
String r = null;
|
||||||
AllelicVariant db = (AllelicVariant)tracker.lookup(dbName, null);
|
AllelicVariant db = (AllelicVariant)tracker.lookup(dbName, null);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,143 @@
|
||||||
|
package org.broadinstitute.sting.utils.genotype;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @author aaron
|
||||||
|
*
|
||||||
|
* Class BasicGenotype
|
||||||
|
*
|
||||||
|
* represents a basic genotype object
|
||||||
|
*/
|
||||||
|
public class BasicGenotype implements Genotype {
|
||||||
|
// the genotype string
|
||||||
|
private String mGenotype;
|
||||||
|
private GenomeLoc mLocation;
|
||||||
|
private char mRef;
|
||||||
|
private double mNetLog10PError;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a basic genotype
|
||||||
|
* @param genotype
|
||||||
|
*/
|
||||||
|
public BasicGenotype(GenomeLoc location, String genotype, char ref, double netLog10PError) {
|
||||||
|
mNetLog10PError = netLog10PError;
|
||||||
|
mGenotype = genotype;
|
||||||
|
mLocation = location;
|
||||||
|
mRef = ref;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the -1 * (log 10 of the error value)
|
||||||
|
*
|
||||||
|
* @return the log based error estimate
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public double getNegLog10PError() {
|
||||||
|
return mNetLog10PError;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the bases that represent this
|
||||||
|
*
|
||||||
|
* @return the bases, as a string
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String getBases() {
|
||||||
|
return mGenotype;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the ploidy
|
||||||
|
*
|
||||||
|
* @return the ploidy value
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int getPloidy() {
|
||||||
|
return mGenotype.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
|
||||||
|
*
|
||||||
|
* @return true if we're homozygous, false otherwise
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean isHom() {
|
||||||
|
if (mGenotype.length() < 1)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
char base = mGenotype.charAt(0);
|
||||||
|
for (char cur : mGenotype.toCharArray()) {
|
||||||
|
if (base != cur) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
|
||||||
|
*
|
||||||
|
* @return true if we're het, false otherwise
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean isHet() {
|
||||||
|
return !isHom();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the genotype's location
|
||||||
|
*
|
||||||
|
* @return a GenomeLoc representing the location
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public GenomeLoc getLocation() {
|
||||||
|
return mLocation;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* returns true if the genotype is a point genotype, false if it's a indel / deletion
|
||||||
|
*
|
||||||
|
* @return true is a SNP
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean isPointGenotype() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* given the reference, are we a variant? (non-ref)
|
||||||
|
*
|
||||||
|
* @param ref the reference base or bases
|
||||||
|
*
|
||||||
|
* @return true if we're a variant
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean isVariant(char ref) {
|
||||||
|
return !(mGenotype.charAt(0) == ref && isHom());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the reference base.
|
||||||
|
*
|
||||||
|
* @return a character, representing the reference base
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public char getReference() {
|
||||||
|
return mRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* return this genotype as a variant
|
||||||
|
*
|
||||||
|
* @return the variant
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Variation toVariation() {
|
||||||
|
if (!isVariant(this.mRef)) throw new IllegalStateException("this genotype is not a variant");
|
||||||
|
return new BasicVariation(this.getBases(),mRef,this.getBases().length(),mLocation,mNetLog10PError);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -8,7 +8,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
* User: aaronmckenna
|
* User: aaronmckenna
|
||||||
* Date: Sep 9, 2009
|
* Date: Sep 9, 2009
|
||||||
* Time: 9:32:34 PM
|
* Time: 9:32:34 PM
|
||||||
*
|
* <p/>
|
||||||
* a basic implementation of variant
|
* a basic implementation of variant
|
||||||
*/
|
*/
|
||||||
public class BasicVariation implements Variation {
|
public class BasicVariation implements Variation {
|
||||||
|
|
@ -18,12 +18,14 @@ public class BasicVariation implements Variation {
|
||||||
protected final int mLength;
|
protected final int mLength;
|
||||||
protected final GenomeLoc mLocation;
|
protected final GenomeLoc mLocation;
|
||||||
protected final double mConfidence;
|
protected final double mConfidence;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* the constructor
|
* the constructor
|
||||||
* @param bases the bases that this variant represents
|
*
|
||||||
|
* @param bases the bases that this variant represents
|
||||||
* @param reference the reference base
|
* @param reference the reference base
|
||||||
* @param length are we a single base variant, or a indel/deletion? length is negitive for an indel,
|
* @param length are we a single base variant, or a indel/deletion? length is negitive for an indel,
|
||||||
* positive for a indel, and 0 for a substitution
|
* positive for a indel, and 0 for a substitution
|
||||||
*/
|
*/
|
||||||
public BasicVariation(String bases, char reference, int length, GenomeLoc location, double confidence) {
|
public BasicVariation(String bases, char reference, int length, GenomeLoc location, double confidence) {
|
||||||
mBases = bases;
|
mBases = bases;
|
||||||
|
|
@ -40,14 +42,14 @@ public class BasicVariation implements Variation {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VARIANT_TYPE getType() {
|
public VARIANT_TYPE getType() {
|
||||||
if (mLength >0) return VARIANT_TYPE.INDEL;
|
if (mLength > 0) return VARIANT_TYPE.INDEL;
|
||||||
if (mLength <0) return VARIANT_TYPE.DELETION;
|
if (mLength < 0) return VARIANT_TYPE.DELETION;
|
||||||
return (isSNP()) ? VARIANT_TYPE.SNP : VARIANT_TYPE.REFERENCE;
|
return (isSNP()) ? VARIANT_TYPE.SNP : VARIANT_TYPE.REFERENCE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isSNP() {
|
public boolean isSNP() {
|
||||||
if (mLength == 0 && Utils.dupString(mRef,2).equals(mBases)) return true;
|
if (mLength == 0 && Utils.dupString(mRef, 2).equals(mBases)) return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -62,7 +64,7 @@ public class BasicVariation implements Variation {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getBases() {
|
public String getAlternateBases() {
|
||||||
return mBases;
|
return mBases;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -72,19 +74,8 @@ public class BasicVariation implements Variation {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getReference() {
|
public char getReference() {
|
||||||
return String.valueOf(mRef);
|
return (mRef);
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isHet() {
|
|
||||||
if (mLength == 0 && mBases.charAt(0) != mBases.charAt(1)) return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isHom() {
|
|
||||||
return !isHet();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -98,12 +89,29 @@ public class BasicVariation implements Variation {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
|
||||||
|
*
|
||||||
|
* @return true if we're an insertion or deletion
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public char getAlternateBase() {
|
public boolean isIndel() {
|
||||||
if (mLength != 0) {
|
return (isDeletion() || isInsertion());
|
||||||
throw new UnsupportedOperationException("Unable to get alternate base for indel / deletion");
|
|
||||||
}
|
|
||||||
if (mBases.charAt(0) == mRef) return mBases.charAt(1);
|
|
||||||
else return mBases.charAt(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||||
|
* of
|
||||||
|
*
|
||||||
|
* @return a char, representing the alternate base
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public char getAlternativeBaseForSNP() {
|
||||||
|
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||||
|
if (getAlternateBases().charAt(0) == this.getReference())
|
||||||
|
return getAlternateBases().charAt(1);
|
||||||
|
return getAlternateBases().charAt(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,18 @@
|
||||||
|
package org.broadinstitute.sting.utils.genotype;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author aaron
|
||||||
|
* <p/>
|
||||||
|
* Interface VariantBackedByGenotype
|
||||||
|
* <p/>
|
||||||
|
* this variant is backed by genotypic information
|
||||||
|
*/
|
||||||
|
public interface VariantBackedByGenotype {
|
||||||
|
/**
|
||||||
|
* get the likelihoods
|
||||||
|
* @return an array in lexigraphical order of the likelihoods
|
||||||
|
*/
|
||||||
|
public Genotype getGenotype(DiploidGenotype x);
|
||||||
|
}
|
||||||
|
|
@ -46,13 +46,6 @@ public interface Variation {
|
||||||
*/
|
*/
|
||||||
public boolean isDeletion();
|
public boolean isDeletion();
|
||||||
|
|
||||||
/**
|
|
||||||
* get the base representation of this Variant
|
|
||||||
*
|
|
||||||
* @return a string, of ploidy
|
|
||||||
*/
|
|
||||||
public String getBases();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the location that this Variant represents
|
* get the location that this Variant represents
|
||||||
*
|
*
|
||||||
|
|
@ -65,13 +58,7 @@ public interface Variation {
|
||||||
*
|
*
|
||||||
* @return the reference base or bases, as a string
|
* @return the reference base or bases, as a string
|
||||||
*/
|
*/
|
||||||
public String getReference();
|
public char getReference();
|
||||||
|
|
||||||
/** is our base representation heterozygous */
|
|
||||||
public boolean isHet();
|
|
||||||
|
|
||||||
/** is our base representation homozygous */
|
|
||||||
public boolean isHom();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the -1 * (log 10 of the error value)
|
* get the -1 * (log 10 of the error value)
|
||||||
|
|
@ -87,9 +74,22 @@ public interface Variation {
|
||||||
public boolean isReference();
|
public boolean isReference();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* gets the alternate base. If this is homref, throws an UnsupportedOperationException
|
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public char getAlternateBase();
|
public String getAlternateBases();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
|
||||||
|
* @return true if we're an insertion or deletion
|
||||||
|
*/
|
||||||
|
public boolean isIndel();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||||
|
* of
|
||||||
|
* @return a char, representing the alternate base
|
||||||
|
*/
|
||||||
|
public char getAlternativeBaseForSNP();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -8,14 +8,12 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.fail;
|
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -60,38 +58,7 @@ public class RodGLFTest extends BaseTest {
|
||||||
assertEquals(finalRecordCount, counter);
|
assertEquals(finalRecordCount, counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** make sure we're returning true to biallelic */
|
|
||||||
@Test
|
|
||||||
public void testIsBiallelic() {
|
|
||||||
RodGLF glf = iter.next();
|
|
||||||
Assert.assertFalse(iter.isBiallelic());
|
|
||||||
}
|
|
||||||
|
|
||||||
// best vs next-best
|
|
||||||
@Test
|
|
||||||
public void testGetConsensusConfidence() {
|
|
||||||
RodGLF glf = iter.next();
|
|
||||||
assertEquals(120.0, iter.getConsensusConfidence(), 0.005);
|
|
||||||
glf = iter.next();
|
|
||||||
assertEquals(120.0, iter.getConsensusConfidence(), 0.005);
|
|
||||||
}
|
|
||||||
|
|
||||||
// best vs. ref
|
|
||||||
@Test
|
|
||||||
public void testGetVariationConfidence() {
|
|
||||||
RodGLF glf = iter.next();
|
|
||||||
assertEquals(0.0, iter.getVariationConfidence(), 0.005);
|
|
||||||
glf = iter.next();
|
|
||||||
assertEquals(250.0, iter.getVariationConfidence(), 0.005);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testGetGenotype() {
|
|
||||||
RodGLF glf = iter.next();
|
|
||||||
List<String> str = iter.getGenotype();
|
|
||||||
Assert.assertTrue(str.get(0).equals("AA"));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIsSNP() {
|
public void testIsSNP() {
|
||||||
|
|
@ -112,7 +79,7 @@ public class RodGLFTest extends BaseTest {
|
||||||
glf = iter.next();
|
glf = iter.next();
|
||||||
Assert.assertFalse(iter.isReference());
|
Assert.assertFalse(iter.isReference());
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
@Test(expected = IllegalStateException.class)
|
@Test(expected = IllegalStateException.class)
|
||||||
public void testGetAltSnpFWDIllegalException() {
|
public void testGetAltSnpFWDIllegalException() {
|
||||||
RodGLF glf = iter.next();
|
RodGLF glf = iter.next();
|
||||||
|
|
@ -159,7 +126,7 @@ public class RodGLFTest extends BaseTest {
|
||||||
/**
|
/**
|
||||||
* move to the second and third bases, and check that the
|
* move to the second and third bases, and check that the
|
||||||
* alternate bases are correct.
|
* alternate bases are correct.
|
||||||
*/
|
*
|
||||||
@Test
|
@Test
|
||||||
public void testGetAltBasesFWD() {
|
public void testGetAltBasesFWD() {
|
||||||
RodGLF glf = iter.next();
|
RodGLF glf = iter.next();
|
||||||
|
|
@ -169,7 +136,7 @@ public class RodGLFTest extends BaseTest {
|
||||||
Assert.assertTrue("CT".equals(iter.getAltBasesFWD()));
|
Assert.assertTrue("CT".equals(iter.getAltBasesFWD()));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testRodLocations() {
|
public void testRodLocations() {
|
||||||
GenomeLoc loc = null;
|
GenomeLoc loc = null;
|
||||||
|
|
@ -177,7 +144,7 @@ public class RodGLFTest extends BaseTest {
|
||||||
RodGLF glf = iter.next();
|
RodGLF glf = iter.next();
|
||||||
if (loc != null) {
|
if (loc != null) {
|
||||||
if (iter.getLocation().isBefore(loc)) {
|
if (iter.getLocation().isBefore(loc)) {
|
||||||
fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + iter.getLocation().toString());
|
Assert.fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + iter.getLocation().toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
loc = iter.getLocation();
|
loc = iter.getLocation();
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,62 @@
|
||||||
|
package org.broadinstitute.sting.utils.genotype;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @author aaron
|
||||||
|
*
|
||||||
|
* Class BasicGenotypeTest
|
||||||
|
*
|
||||||
|
* tests the basic genotype class
|
||||||
|
*/
|
||||||
|
public class BasicGenotypeTest extends BaseTest {
|
||||||
|
private static IndexedFastaSequenceFile seq;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeTests() {
|
||||||
|
try {
|
||||||
|
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new StingException("unable to load the sequence dictionary");
|
||||||
|
}
|
||||||
|
GenomeLocParser.setupRefContigOrdering(seq);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBasicGenotypeIsHom() {
|
||||||
|
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
|
||||||
|
Assert.assertTrue(gt.isHom());
|
||||||
|
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
|
||||||
|
Assert.assertTrue(!gt2.isHom());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBasicGenotypeIsHet() {
|
||||||
|
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
|
||||||
|
Assert.assertTrue(!gt.isHet());
|
||||||
|
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
|
||||||
|
Assert.assertTrue(gt2.isHet());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBasicGenotypeIsVariant() {
|
||||||
|
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
|
||||||
|
Assert.assertTrue(!gt.isVariant('A'));
|
||||||
|
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
|
||||||
|
Assert.assertTrue(gt2.isVariant('A'));
|
||||||
|
BasicGenotype gt3 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"TT",'A',0);
|
||||||
|
Assert.assertTrue(gt3.isVariant('A'));
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue