Changes to switch Variant Eval over to the new Variation system.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1611 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0feee9cdfd
commit
e03fccb223
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
|
||||
|
|
@ -13,9 +14,7 @@ import org.broadinstitute.sting.utils.genotype.glf.VariableLengthCall;
|
|||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -25,7 +24,7 @@ import java.util.List;
|
|||
* <p/>
|
||||
* the rod class for GLF data.
|
||||
*/
|
||||
public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<RodGLF> {
|
||||
public class RodGLF implements ReferenceOrderedDatum, Variation, Iterator<RodGLF> {
|
||||
static int count = 0;
|
||||
public GLFReader mReader;
|
||||
private final String mName;
|
||||
|
|
@ -38,6 +37,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
|||
|
||||
/**
|
||||
* get the name
|
||||
*
|
||||
* @return the name
|
||||
*/
|
||||
@Override
|
||||
|
|
@ -64,27 +64,25 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
|||
return toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a string representation of the ROD GLF object
|
||||
*/
|
||||
/** @return a string representation of the ROD GLF object */
|
||||
public String toString() {
|
||||
return String.format("%s\t%d\t%s\t%d\t%d\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f",
|
||||
mLoc.getContig(),
|
||||
mLoc.getStart(),
|
||||
mRecord.getRefBase(),
|
||||
mRecord.getReadDepth(),
|
||||
mRecord.getRmsMapQ(),
|
||||
getBestGenotypeValue(1),
|
||||
((SinglePointCall)mRecord).getLikelihoods()[0],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[1],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[2],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[3],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[4],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[5],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[6],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[7],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[8],
|
||||
((SinglePointCall)mRecord).getLikelihoods()[9]
|
||||
mLoc.getContig(),
|
||||
mLoc.getStart(),
|
||||
mRecord.getRefBase(),
|
||||
mRecord.getReadDepth(),
|
||||
mRecord.getRmsMapQ(),
|
||||
getBestGenotypeValue(1),
|
||||
((SinglePointCall) mRecord).getLikelihoods()[0],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[1],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[2],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[3],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[4],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[5],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[6],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[7],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[8],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[9]
|
||||
|
||||
|
||||
);
|
||||
|
|
@ -107,6 +105,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
|||
|
||||
/**
|
||||
* return a genome loc representing the current location
|
||||
*
|
||||
* @return the geonome loc
|
||||
*/
|
||||
@Override
|
||||
|
|
@ -115,62 +114,15 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns bases in the reference allele as a String. String can be empty (as in insertion into
|
||||
* the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters
|
||||
* (for longer indels).
|
||||
* get the reference base(s) at this position
|
||||
*
|
||||
* @return reference allele, forward strand
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
@Override
|
||||
public String getRefBasesFWD() {
|
||||
if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) return "";
|
||||
return String.valueOf(mRecord.getRefBase());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException
|
||||
* if variant is not a SNP.
|
||||
*
|
||||
* This doesn't make much sense to me, what if the best genotype is hom non-ref?
|
||||
*
|
||||
* @return reference base on the forward strand
|
||||
*/
|
||||
@Override
|
||||
public char getRefSnpFWD() throws IllegalStateException {
|
||||
if (!isSNP()) throw new IllegalStateException("Current GLF Record is not a SNP");
|
||||
public char getReference() {
|
||||
return mRecord.getRefBase().toChar();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns bases in the alternative allele as a String. String can be empty (as in deletion from
|
||||
* the reference), can contain a single character (as in SNP or one-base insertion), or multiple characters
|
||||
* (for longer indels).
|
||||
*
|
||||
* @return alternative allele, forward strand
|
||||
*/
|
||||
@Override
|
||||
public String getAltBasesFWD() {
|
||||
return getBestGenotype(2).toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns alternative (minor) allele base for a SNP variant as a character; should throw IllegalStateException
|
||||
* if variant is not a SNP.
|
||||
*
|
||||
* @return alternative allele base on the forward starnd
|
||||
*/
|
||||
@Override
|
||||
public char getAltSnpFWD() throws IllegalStateException {
|
||||
if (!isSNP()) {
|
||||
throw new IllegalStateException("Not a SNP");
|
||||
}
|
||||
String str = getBestGenotype(1).toString();
|
||||
if (String.valueOf(str.charAt(0)).equals(mRecord.getRefBase().toString())) {
|
||||
return str.charAt(1);
|
||||
}
|
||||
return str.charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if all observed alleles are reference alleles. All is<Variant> methods (where Variant=SNP,Insertion, etc) should
|
||||
* return false at such site to ensure consistency. This method is included for use with genotyping calls (isGenotype()==true), it makes
|
||||
|
|
@ -183,6 +135,31 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
|||
return (!isSNP());
|
||||
}
|
||||
|
||||
/**
|
||||
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
|
||||
*
|
||||
* @return true if we're an insertion or deletion
|
||||
*/
|
||||
@Override
|
||||
public boolean isIndel() {
|
||||
return (isDeletion() || isInsertion());
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||
* of
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
@Override
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
if (getAlternateBases().charAt(0) == this.getReference())
|
||||
return getAlternateBases().charAt(1);
|
||||
return getAlternateBases().charAt(0);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this variant a SNP?
|
||||
*
|
||||
|
|
@ -196,7 +173,9 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
|||
|
||||
/**
|
||||
* return a string representing the reference
|
||||
*
|
||||
* @param ref the reference character
|
||||
*
|
||||
* @return a string for the homozygous ref in a diploid
|
||||
*/
|
||||
private static String refString(char ref) {
|
||||
|
|
@ -213,7 +192,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
|||
*/
|
||||
public LikelihoodObject.GENOTYPE getBestGenotype(int nthBest) {
|
||||
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
|
||||
return LikelihoodObject.GENOTYPE.values()[sorted[nthBest-1]];
|
||||
return LikelihoodObject.GENOTYPE.values()[sorted[nthBest - 1]];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -226,7 +205,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
|||
*/
|
||||
public double getBestGenotypeValue(int nthBest) {
|
||||
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
|
||||
return (((SinglePointCall) mRecord).getLikelihoods())[sorted[nthBest-1]];
|
||||
return (((SinglePointCall) mRecord).getLikelihoods())[sorted[nthBest - 1]];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -254,16 +233,13 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
|||
}
|
||||
|
||||
/**
|
||||
* Is this variant an insertion or a deletion? The contract requires
|
||||
* this to be true if either isInsertion() or isDeletion() returns true. However,
|
||||
* this method is currently allowed to return true even if neither of isInsertion()
|
||||
* and isDeletion() does.
|
||||
* get the base representation of this Variant
|
||||
*
|
||||
* @return
|
||||
* @return a string, of ploidy
|
||||
*/
|
||||
@Override
|
||||
public boolean isIndel() {
|
||||
return (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE);
|
||||
public String getAlternateBases() {
|
||||
return this.getBestGenotype(0).toString();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -272,31 +248,19 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
|||
* @return
|
||||
*/
|
||||
@Override
|
||||
public double getMAF() {
|
||||
public double getNonRefAlleleFrequency() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns heterozygosity, a more accessible general feature of a variant.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
/** @return the VARIANT_TYPE of the current variant */
|
||||
@Override
|
||||
public double getHeterozygosity() {
|
||||
return 0;
|
||||
public VARIANT_TYPE getType() {
|
||||
if (this.isSNP()) return VARIANT_TYPE.SNP;
|
||||
else if (this.isInsertion()) return VARIANT_TYPE.INDEL;
|
||||
else if (this.isDeletion()) return VARIANT_TYPE.DELETION;
|
||||
else return VARIANT_TYPE.REFERENCE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this variant an actual genotype (such as individual call from sequencing, HapMap chip etc), or
|
||||
* population allelic variant (call from pooled sequencing, dbSNP site etc). Only if variant is a genotype, there
|
||||
* is a meaningful question of, e.g., whether it is a het, or homogeneous non-ref.
|
||||
*
|
||||
* @return true if this variant is an actual genotype.
|
||||
*/
|
||||
@Override
|
||||
public boolean isGenotype() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns phred-mapped confidence in variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref).
|
||||
|
|
@ -304,66 +268,20 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
|||
* @return
|
||||
*/
|
||||
@Override
|
||||
public double getVariationConfidence() {
|
||||
public double getNegLog10PError() {
|
||||
String ref = new String() + mRecord.getRefBase() + mRecord.getRefBase();
|
||||
int index = 0;
|
||||
for (LikelihoodObject.GENOTYPE g: LikelihoodObject.GENOTYPE.values()) {
|
||||
for (LikelihoodObject.GENOTYPE g : LikelihoodObject.GENOTYPE.values()) {
|
||||
if (g.toString().equals(ref)) break;
|
||||
index++;
|
||||
}
|
||||
return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall)mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
|
||||
return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall) mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns phred-mapped confidence in called genotype (e.g. MAQ's consensus confidence, or AlleleCaller's
|
||||
* best vs next-best.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public double getConsensusConfidence() {
|
||||
return Math.abs(getBestGenotypeValue(1) - getBestGenotypeValue(2)) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
|
||||
public int length() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns actual observed genotype. Allowed to return more than two alleles (@see #getPloidy()). If this variant
|
||||
* is not a genotype, should throw an IllegalStateException.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getGenotype() throws IllegalStateException {
|
||||
List<String> ret = new ArrayList<String>();
|
||||
ret.add(this.getBestGenotype(1).toString());
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return actual number of observed alleles (chromosomes) in the genotype. If this variant is not a genotype,
|
||||
* should throw IllegalStateException.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int getPloidy() throws IllegalStateException {
|
||||
return 2; // we're so haploid it hurts
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the site has at most two known or observed alleles (ref and non-ref), or false if there are > 2 allelic variants known or observed. When
|
||||
* the implementing class is a genotype, alleles should be always counted including the reference allele whether it was observed in the particular
|
||||
* individual or not: i.e. if the reference is 'C', then both 'CA' and 'AA' genotypes must be reported as bi-allelic, while 'AT' is <i>not</i> bi-allelic (since there are
|
||||
* two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C').
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean isBiallelic() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public int length() { return 1; }
|
||||
|
||||
@Override
|
||||
public int compareTo(ReferenceOrderedDatum that) {
|
||||
return this.mLoc.compareTo(that.getLocation());
|
||||
|
|
@ -371,8 +289,10 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
|||
|
||||
/**
|
||||
* the parse line, which is not used by the GLF rod
|
||||
*
|
||||
* @param header the header to pass in
|
||||
* @param parts the string object
|
||||
* @param parts the string object
|
||||
*
|
||||
* @return false, alwayss
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -25,14 +25,19 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVariant {
|
||||
public class RodGeliText extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, AllelicVariant {
|
||||
public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
||||
public GenomeLoc loc;
|
||||
public char refBase = 'N';
|
||||
|
|
@ -96,6 +101,27 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
|
|||
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
|
||||
/**
|
||||
* get the reference base(s) at this position
|
||||
*
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
@Override
|
||||
public char getReference() {
|
||||
return this.refBase;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
* @return the log based error estimate
|
||||
*/
|
||||
@Override
|
||||
public double getNegLog10PError() {
|
||||
return Math.abs(lodBtr);
|
||||
}
|
||||
|
||||
public String getRefBasesFWD() {
|
||||
return String.format("%c", getRefSnpFWD());
|
||||
}
|
||||
|
|
@ -118,8 +144,24 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
|
|||
return refBase == bestGenotype.charAt(0) && refBase == bestGenotype.charAt(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the frequency of this variant
|
||||
*
|
||||
* @return VariantFrequency with the stored frequency
|
||||
*/
|
||||
@Override
|
||||
public double getNonRefAlleleFrequency() {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
/** @return the VARIANT_TYPE of the current variant */
|
||||
@Override
|
||||
public VARIANT_TYPE getType() {
|
||||
return VARIANT_TYPE.SNP;
|
||||
}
|
||||
|
||||
public boolean isSNP() {
|
||||
return !isReference();
|
||||
return (!bestGenotype.equals(Utils.dupString(this.getReference(),2)));
|
||||
}
|
||||
|
||||
public boolean isInsertion() {
|
||||
|
|
@ -130,10 +172,35 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the base representation of this Variant
|
||||
*
|
||||
* @return a string, of ploidy
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
return this.bestGenotype;
|
||||
}
|
||||
|
||||
public boolean isIndel() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||
* of
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
@Override
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
if(this.bestGenotype.toString().charAt(0) == this.getReference())
|
||||
return this.bestGenotype.toString().charAt(1);
|
||||
return this.bestGenotype.toString().charAt(0);
|
||||
|
||||
}
|
||||
|
||||
public double getMAF() {
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -253,4 +320,15 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
|
|||
public int getPloidy() throws IllegalStateException { return 2; }
|
||||
public boolean isBiallelic() { return true; }
|
||||
*/
|
||||
|
||||
/**
|
||||
* get the likelihoods
|
||||
*
|
||||
* @return an array in lexigraphical order of the likelihoods
|
||||
*/
|
||||
@Override
|
||||
public org.broadinstitute.sting.utils.genotype.Genotype getGenotype(DiploidGenotype x) {
|
||||
if (x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
|
||||
return new BasicGenotype(this.getLocation(),this.bestGenotype,this.getRefSnpFWD(),this.getConsensusConfidence());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,9 @@ package org.broadinstitute.sting.gatk.refdata;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import java.util.*;
|
||||
import java.util.regex.MatchResult;
|
||||
import java.util.regex.Pattern;
|
||||
|
|
@ -16,7 +18,7 @@ import java.util.regex.Pattern;
|
|||
* Time: 10:47:14 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements AllelicVariant {
|
||||
public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements AllelicVariant, Variation, VariantBackedByGenotype {
|
||||
private String contig, source, feature, strand, frame;
|
||||
private long start, stop;
|
||||
private double score;
|
||||
|
|
@ -74,6 +76,26 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
|
|||
return GenomeLocParser.parseGenomeLoc(contig, start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference base(s) at this position
|
||||
*
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
@Override
|
||||
public char getReference() {
|
||||
return 'N';
|
||||
}
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
* @return the log based error estimate
|
||||
*/
|
||||
@Override
|
||||
public double getNegLog10PError() {
|
||||
return 4; // 1/10000 error
|
||||
}
|
||||
|
||||
public String getAttribute(final String key) {
|
||||
return attributes.get(key);
|
||||
}
|
||||
|
|
@ -158,10 +180,49 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
|
|||
public String getAltBasesFWD() { return null; }
|
||||
public char getAltSnpFWD() throws IllegalStateException { return 0; }
|
||||
public boolean isReference() { return ! isSNP(); }
|
||||
|
||||
/**
|
||||
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
return this.feature;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the frequency of this variant
|
||||
*
|
||||
* @return VariantFrequency with the stored frequency
|
||||
*/
|
||||
@Override
|
||||
public double getNonRefAlleleFrequency() {
|
||||
return this.getMAF();
|
||||
}
|
||||
|
||||
/** @return the VARIANT_TYPE of the current variant */
|
||||
@Override
|
||||
public VARIANT_TYPE getType() {
|
||||
return VARIANT_TYPE.SNP;
|
||||
}
|
||||
|
||||
public boolean isSNP() { return false; }
|
||||
public boolean isInsertion() { return false; }
|
||||
public boolean isDeletion() { return false; }
|
||||
public boolean isIndel() { return false; }
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||
* of
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
@Override
|
||||
public char getAlternativeBaseForSNP() {
|
||||
return this.getAltSnpFWD();
|
||||
}
|
||||
|
||||
public double getMAF() { return 0; }
|
||||
public double getHeterozygosity() { return 0; }
|
||||
public boolean isGenotype() { return true; }
|
||||
|
|
@ -175,4 +236,15 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
|
|||
public int getPloidy() throws IllegalStateException { return 2; }
|
||||
public boolean isBiallelic() { return true; }
|
||||
public int length() { return 1; }
|
||||
|
||||
/**
|
||||
* get the likelihoods
|
||||
*
|
||||
* @return an array in lexigraphical order of the likelihoods
|
||||
*/
|
||||
@Override
|
||||
public Genotype getGenotype(DiploidGenotype x) {
|
||||
if (!x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
|
||||
return new BasicGenotype(this.getLocation(),this.feature,this.getRefSnpFWD(),this.getConsensusConfidence());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,12 +7,15 @@ import java.util.List;
|
|||
* chr1:1104840 A N 0.000000 -85.341265 -85.341265 0.000000 0.000000 324.000000 162 0 0
|
||||
* chr1:1104841 C N 0.000000 -69.937928 -69.937928 0.000000 0.000000 324.000000 162 0 0
|
||||
* chr1:1104842 A N 0.000000 -84.816002 -84.816002 0.000000 0.000000 324.000000 162 0 0
|
||||
*
|
||||
*/
|
||||
public interface SNPCallFromGenotypes extends AllelicVariant {
|
||||
public int nIndividuals();
|
||||
|
||||
public int nHomRefGenotypes();
|
||||
|
||||
public int nHetGenotypes();
|
||||
|
||||
public int nHomVarGenotypes();
|
||||
|
||||
public List<Genotype> getGenotypes();
|
||||
}
|
||||
|
|
@ -1,10 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import net.sf.picard.util.SequenceUtil;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Example format:
|
||||
|
|
@ -16,7 +18,7 @@ import org.broadinstitute.sting.utils.*;
|
|||
* Time: 10:47:14 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVariant {
|
||||
public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, AllelicVariant {
|
||||
public GenomeLoc loc; // genome location of SNP
|
||||
// Reference sequence chromosome or scaffold
|
||||
// Start and stop positions in chrom
|
||||
|
|
@ -60,6 +62,26 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
|||
// ----------------------------------------------------------------------
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
|
||||
/**
|
||||
* get the reference base(s) at this position
|
||||
*
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
@Override
|
||||
public char getReference() {
|
||||
return getRefSnpFWD();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
* @return the log based error estimate
|
||||
*/
|
||||
@Override
|
||||
public double getNegLog10PError() {
|
||||
return 4; // -log10(0.0001)
|
||||
}
|
||||
|
||||
public boolean onFwdStrand() {
|
||||
return strand.equals("+");
|
||||
}
|
||||
|
|
@ -108,10 +130,24 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
|||
}
|
||||
|
||||
public String getAllelesFWDString() {
|
||||
return Utils.join("/", getAllelesFWD());
|
||||
return Utils.join("", getAllelesFWD());
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
/**
|
||||
* get the frequency of this variant
|
||||
*
|
||||
* @return VariantFrequency with the stored frequency
|
||||
*/
|
||||
@Override
|
||||
public double getNonRefAlleleFrequency() {
|
||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/** @return the VARIANT_TYPE of the current variant */
|
||||
@Override
|
||||
public VARIANT_TYPE getType() {
|
||||
return VARIANT_TYPE.SNP;
|
||||
}// ----------------------------------------------------------------------
|
||||
//
|
||||
// What kind of variant are we?
|
||||
//
|
||||
|
|
@ -119,7 +155,34 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
|||
public boolean isSNP() { return varType.contains("single"); }
|
||||
public boolean isInsertion() { return varType.contains("insertion"); }
|
||||
public boolean isDeletion() { return varType.contains("deletion"); }
|
||||
|
||||
/**
|
||||
* get the base representation of this Variant
|
||||
*
|
||||
* @return a string, of ploidy
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
return getAllelesFWDString();
|
||||
}
|
||||
|
||||
public boolean isIndel() { return isInsertion() || isDeletion() || varType.contains("in-del"); }
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||
* of
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
@Override
|
||||
public char getAlternativeBaseForSNP() {
|
||||
return getAltSnpFWD(); /*
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
if (getAlternateBases().charAt(0) == this.getReference())
|
||||
return getAlternateBases().charAt(1);
|
||||
return getAlternateBases().charAt(0); */
|
||||
}
|
||||
|
||||
public boolean isReference() { return false; } // snp locations are never "reference", there's always a variant
|
||||
|
||||
public boolean isHapmap() { return validationStatus.contains("by-hapmap"); }
|
||||
|
|
@ -147,7 +210,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
|||
if ( isIndel() ) s += ":Indel";
|
||||
if ( isHapmap() ) s += ":Hapmap";
|
||||
if ( is2Hit2Allele() ) s += ":2Hit";
|
||||
return s;
|
||||
return s;
|
||||
}
|
||||
|
||||
public String repl() {
|
||||
|
|
@ -213,7 +276,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
|||
}
|
||||
|
||||
public double getMAF() {
|
||||
// Fixme: update to actually get MAF
|
||||
// Fixme: update to actually get MAF
|
||||
//return avHet;
|
||||
return -1;
|
||||
}
|
||||
|
|
@ -244,4 +307,14 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
|||
|
||||
public int length() { return (int)(loc.getStop() - loc.getStart() + 1); }
|
||||
|
||||
/**
|
||||
* get the likelihoods
|
||||
*
|
||||
* @return an array in lexigraphical order of the likelihoods
|
||||
*/
|
||||
@Override
|
||||
public org.broadinstitute.sting.utils.genotype.Genotype getGenotype(DiploidGenotype x) {
|
||||
if (x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
|
||||
return new BasicGenotype(this.getLocation(),this.getAltBasesFWD(),this.getRefSnpFWD(),this.getConsensusConfidence());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.filters;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodGeliText;
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
|
|
@ -50,7 +51,7 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
|
|||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
||||
|
||||
boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest;
|
||||
boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet(variant);
|
||||
boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet((AllelicVariant)variant);
|
||||
exclude = excludable && highGenotypeConfidence && pointEstimateExclude(counts);
|
||||
//
|
||||
// for printing only
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
|
|
@ -65,5 +65,5 @@ public abstract class BasicVariantAnalysis implements VariantAnalysis {
|
|||
|
||||
}
|
||||
|
||||
public abstract String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context);
|
||||
public abstract String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context);
|
||||
}
|
||||
|
|
@ -1,27 +1,30 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
public class CallableBasesAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis {
|
||||
long all_bases = 0;
|
||||
long all_calls = 0;
|
||||
//final static double[] Qthresholds = { 10, 20, 30, 40, 50, 100, 200, 500, 1000 };
|
||||
final static double[] thresholds = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100 };
|
||||
final static double[] thresholds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100};
|
||||
long[] discoverable_bases = new long[thresholds.length];
|
||||
long[] genotypable_bases = new long[thresholds.length];
|
||||
|
||||
|
|
@ -29,36 +32,54 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot
|
|||
super("callable_bases");
|
||||
}
|
||||
|
||||
public long nSites() { return all_bases; }
|
||||
public long nCalls() { return all_calls; }
|
||||
public long nDiscoverable(int index) { return discoverable_bases[index]; }
|
||||
public double percentDiscoverable(int index) { return (100.0*nDiscoverable(index)) / nSites(); }
|
||||
public long nGenotypable(int index) { return genotypable_bases[index]; }
|
||||
public double percentGenotypable(int index) { return (100.0*nGenotypable(index)) / nSites(); }
|
||||
public long nSites() {
|
||||
return all_bases;
|
||||
}
|
||||
|
||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
public long nCalls() {
|
||||
return all_calls;
|
||||
}
|
||||
|
||||
public long nDiscoverable(int index) {
|
||||
return discoverable_bases[index];
|
||||
}
|
||||
|
||||
public double percentDiscoverable(int index) {
|
||||
return (100.0 * nDiscoverable(index)) / nSites();
|
||||
}
|
||||
|
||||
public long nGenotypable(int index) {
|
||||
return genotypable_bases[index];
|
||||
}
|
||||
|
||||
public double percentGenotypable(int index) {
|
||||
return (100.0 * nGenotypable(index)) / nSites();
|
||||
}
|
||||
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
all_bases++;
|
||||
|
||||
if ( eval == null ) // no data here!
|
||||
if (eval == null) // no data here!
|
||||
return null;
|
||||
|
||||
// we actually have a record here
|
||||
if ( ! eval.isGenotype() ) { // evaluation record isn't a genotype, die!
|
||||
if (!(eval instanceof VariantBackedByGenotype)) { // evaluation record isn't a genotype, die!
|
||||
throw new RuntimeException("Evaluation track isn't an Genotype!");
|
||||
}
|
||||
|
||||
all_calls++;
|
||||
// For every threshold, updated discoverable and callable
|
||||
for ( int i = 0; i < thresholds.length; i++ ) {
|
||||
for (int i = 0; i < thresholds.length; i++) {
|
||||
double threshold = thresholds[i];
|
||||
|
||||
DiploidGenotype g = DiploidGenotype.valueOf(Utils.dupString(ref, 2));
|
||||
Genotype genotype = ((VariantBackedByGenotype) eval).getGenotype(g);
|
||||
// update discoverable
|
||||
if ( eval.isSNP() && eval.getVariationConfidence() >= threshold )
|
||||
if (eval.isSNP() && eval.getNegLog10PError() >= threshold)
|
||||
discoverable_bases[i]++;
|
||||
if ( eval.isReference() && eval.getConsensusConfidence() >= threshold )
|
||||
if (!eval.isSNP() && genotype.getNegLog10PError() >= threshold)
|
||||
discoverable_bases[i]++;
|
||||
|
||||
if ( eval.getConsensusConfidence() >= threshold )
|
||||
if (genotype.getNegLog10PError() >= threshold)
|
||||
genotypable_bases[i]++;
|
||||
|
||||
//System.out.printf("Updating %s SNP=%b, REF=%b VarConf=%f ConConf=%f where threshold=%f: discoverable = %d, genotypable = %d%n",
|
||||
|
|
@ -76,7 +97,7 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot
|
|||
s.add(String.format(""));
|
||||
s.add(String.format("confidence_threshold\tdiscoverable_bases\tdiscoverable_bases_percent\tgenotypable_bases\tgenotypable_bases_percent"));
|
||||
|
||||
for ( int i = 0; i < thresholds.length; i++ ) {
|
||||
for (int i = 0; i < thresholds.length; i++) {
|
||||
double threshold = thresholds[i];
|
||||
s.add(String.format("%6.2f\t%d\t%.6f\t%d\t%.6f", threshold, nDiscoverable(i), percentDiscoverable(i), nGenotypable(i), percentGenotypable(i)));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
|
|
@ -24,7 +24,7 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno
|
|||
ArrayList<HashSet<GenomeLoc>> variantsWithClusters;
|
||||
int minDistanceForFlagging = 5;
|
||||
int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100};
|
||||
AllelicVariant lastVariant = null;
|
||||
Variation lastVariation = null;
|
||||
GenomeLoc lastVariantInterval = null;
|
||||
int nSeen = 0;
|
||||
|
||||
|
|
@ -36,16 +36,16 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno
|
|||
variantsWithClusters.add(new HashSet<GenomeLoc>());
|
||||
}
|
||||
|
||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
String r = null;
|
||||
|
||||
if ( eval != null && eval.isSNP() ) {
|
||||
IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null);
|
||||
GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation();
|
||||
|
||||
if (lastVariant != null) {
|
||||
if (lastVariation != null) {
|
||||
GenomeLoc eL = eval.getLocation();
|
||||
GenomeLoc lvL = lastVariant.getLocation();
|
||||
GenomeLoc lvL = lastVariation.getLocation();
|
||||
if (eL.getContigIndex() == lvL.getContigIndex()) {
|
||||
long d = eL.distance(lvL);
|
||||
if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) {
|
||||
|
|
@ -66,7 +66,7 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno
|
|||
}
|
||||
}
|
||||
|
||||
lastVariant = eval;
|
||||
lastVariation = eval;
|
||||
lastVariantInterval = interval;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,21 +1,24 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
|
||||
private String dbName;
|
||||
|
|
@ -35,48 +38,48 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
|||
public GenotypeConcordance(final String name) {
|
||||
super("genotype_concordance");
|
||||
dbName = name;
|
||||
for ( int i = 0; i < 4; i++ ) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
truth_totals[i] = 0;
|
||||
calls_totals[i] = 0;
|
||||
for ( int j = 0; j < 4; j++ )
|
||||
for (int j = 0; j < 4; j++)
|
||||
table[i][j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public void inc(AllelicVariant chip, AllelicVariant eval, char ref) {
|
||||
if ( (chip != null && !chip.isGenotype()) || (eval != null && !eval.isGenotype()) )
|
||||
public void inc(Variation chip, Variation eval, char ref) {
|
||||
if ((chip != null && !(chip instanceof VariantBackedByGenotype) || (eval != null && !(eval instanceof VariantBackedByGenotype))))
|
||||
throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
|
||||
|
||||
DiploidGenotype g = DiploidGenotype.valueOf(Utils.dupString(ref, 2));
|
||||
int truthIndex, callIndex;
|
||||
if ( chip == null )
|
||||
if (chip == null)
|
||||
truthIndex = UNKNOWN;
|
||||
else if ( chip.isReference() && Utils.countOccurrences(ref, chip.getGenotype().get(0)) == chip.getGenotype().get(0).length() )
|
||||
else if (chip.getAlternateBases().equals(g.toString()))
|
||||
truthIndex = REF;
|
||||
else if ( GenotypeUtils.isHet(chip) )
|
||||
else if (chip.getAlternateBases().charAt(0) != chip.getAlternateBases().charAt(1))
|
||||
truthIndex = VAR_HET;
|
||||
else
|
||||
truthIndex = VAR_HOM;
|
||||
|
||||
// todo -- FIXME on countOccurences
|
||||
if ( eval == null )
|
||||
if (eval == null)
|
||||
callIndex = NO_CALL;
|
||||
else if ( eval.isReference() && Utils.countOccurrences(ref, eval.getGenotype().get(0)) == eval.getGenotype().get(0).length() )
|
||||
else if (eval.getAlternateBases().equals(g.toString()))
|
||||
callIndex = REF;
|
||||
else if ( GenotypeUtils.isHet(eval) )
|
||||
else if (eval.getAlternateBases().charAt(0) != eval.getAlternateBases().charAt(1))
|
||||
callIndex = VAR_HET;
|
||||
else
|
||||
callIndex = VAR_HOM;
|
||||
|
||||
if ( chip != null || eval != null ) {
|
||||
if (chip != null || eval != null) {
|
||||
//System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
|
||||
table[truthIndex][callIndex]++;
|
||||
truth_totals[truthIndex]++;
|
||||
if ( callIndex != NO_CALL ) calls_totals[callIndex]++;
|
||||
if (callIndex != NO_CALL) calls_totals[callIndex]++;
|
||||
}
|
||||
}
|
||||
|
||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
AllelicVariant chip = (AllelicVariant)tracker.lookup(dbName, null);
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
Variation chip = (Variation) tracker.lookup(dbName, null);
|
||||
inc(chip, eval, ref);
|
||||
return null;
|
||||
}
|
||||
|
|
@ -84,24 +87,24 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
|||
private void addCalledGenotypeConcordance(List<String> s) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("CALLED_GENOTYPE_CONCORDANCE\t");
|
||||
for ( int i = 0; i < 4; i++ ) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int nConcordantCallsI = table[i][i];
|
||||
String value = "N/A";
|
||||
if ( i != UNKNOWN )
|
||||
value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i]-table[UNKNOWN][i]));
|
||||
if (i != UNKNOWN)
|
||||
value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i]));
|
||||
sb.append(value);
|
||||
}
|
||||
s.add(sb.toString());
|
||||
}
|
||||
s.add(sb.toString());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* How many overall calls where made that aren't NO_CALLS or UNKNOWNS?
|
||||
*/
|
||||
private int getNCalled() {
|
||||
int n = 0;
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
for ( int j = 0; j < 4; j++ )
|
||||
if ( i != NO_CALL && j != NO_CALL ) n += table[i][j];
|
||||
for (int i = 0; i < 4; i++)
|
||||
for (int j = 0; j < 4; j++)
|
||||
if (i != NO_CALL && j != NO_CALL) n += table[i][j];
|
||||
return n;
|
||||
}
|
||||
|
||||
|
|
@ -109,30 +112,30 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
|||
int nConcordantRefCalls = table[REF][REF];
|
||||
int nConcordantHetCalls = table[VAR_HET][VAR_HET];
|
||||
int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM];
|
||||
int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM];
|
||||
int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM];
|
||||
int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls;
|
||||
int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls;
|
||||
int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM];
|
||||
int nCalled = getNCalled();
|
||||
s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar)));
|
||||
s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls)));
|
||||
s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled)));
|
||||
s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls)));
|
||||
s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled)));
|
||||
}
|
||||
|
||||
public List<String> done() {
|
||||
List<String> s = new ArrayList<String>();
|
||||
s.add(String.format("name %s", dbName));
|
||||
s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY"));
|
||||
for (int i=0; i < 4; i++) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(String.format("%15s ", TRUTH_NAMES[i]));
|
||||
for (int j=0; j < 4; j++)
|
||||
for (int j = 0; j < 4; j++)
|
||||
sb.append(String.format("%9d ", table[i][j]));
|
||||
sb.append(String.format("%9d ", truth_totals[i]));
|
||||
if ( i == VAR_HET || i == VAR_HOM ) {
|
||||
sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF]+table[i][VAR_HET]+table[i][VAR_HOM])));
|
||||
sb.append(String.format("%s", cellPercent(truth_totals[i]-table[i][NO_CALL], truth_totals[i])));
|
||||
} else {
|
||||
if (i == VAR_HET || i == VAR_HOM) {
|
||||
sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
|
||||
sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
|
||||
} else {
|
||||
sb.append("\tN/A\t\t\tN/A");
|
||||
}
|
||||
s.add(sb.toString());
|
||||
|
|
@ -140,16 +143,16 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
|||
|
||||
addCalledGenotypeConcordance(s);
|
||||
addOverallStats(s);
|
||||
|
||||
for (int i=0; i < 4; i++) {
|
||||
for (int j=0; j < 4; j++) {
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j]));
|
||||
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i])));
|
||||
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j])));
|
||||
}
|
||||
if ( i == VAR_HET || i == VAR_HOM ) {
|
||||
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF]+table[i][VAR_HET]+table[i][VAR_HOM])));
|
||||
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i]-table[i][NO_CALL], truth_totals[i])));
|
||||
if (i == VAR_HET || i == VAR_HOM) {
|
||||
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
|
||||
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
|
||||
}
|
||||
}
|
||||
return s;
|
||||
|
|
@ -158,7 +161,7 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
|||
private static String cellPercent(int count, int total) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
total = Math.max(total, 0);
|
||||
sb.append(String.format("%.2f", (100.0*count)/total));
|
||||
sb.append(String.format("%.2f", (100.0 * count) / total));
|
||||
sb.append("%");
|
||||
return sb.toString();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,17 +1,16 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import cern.jet.math.Arithmetic;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import cern.jet.math.Arithmetic;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
|
|
@ -32,7 +31,7 @@ public class HardyWeinbergEquilibrium extends BasicVariantAnalysis implements Po
|
|||
this.threshold = threshold;
|
||||
}
|
||||
|
||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
String r = null;
|
||||
|
||||
if ( eval != null &&
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
|
|
@ -29,19 +29,19 @@ public class IndelMetricsAnalysis extends BasicVariantAnalysis implements Genoty
|
|||
sizes[0][i] = sizes[1][i] = 0;
|
||||
}
|
||||
|
||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
if ( eval != null && eval.isIndel() ) {
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
if ( eval != null && eval.isInsertion() ) {
|
||||
if ( eval.isInsertion() )
|
||||
insertions++;
|
||||
else if ( eval.isDeletion() )
|
||||
deletions++;
|
||||
else
|
||||
throw new RuntimeException("Variant is indel, but isn't insertion or deletion!");
|
||||
throw new RuntimeException("Variation is indel, but isn't insertion or deletion!");
|
||||
|
||||
if ( eval.length() < 100 ) {
|
||||
sizes[eval.isDeletion() ? 0 : 1][eval.length()]++;
|
||||
if ( eval.length() > maxSize )
|
||||
maxSize = eval.length();
|
||||
if ( eval.getAlternateBases().length() < 100 ) {
|
||||
sizes[eval.isDeletion() ? 0 : 1][eval.getAlternateBases().length()]++;
|
||||
if ( eval.getAlternateBases().length() > maxSize )
|
||||
maxSize = eval.getAlternateBases().length();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
|
|
@ -26,7 +26,7 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge
|
|||
int minDistanceForFlagging = 5;
|
||||
int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100, 1000, 10000};
|
||||
|
||||
AllelicVariant lastVariant = null;
|
||||
Variation lastVariation = null;
|
||||
GenomeLoc lastVariantInterval = null;
|
||||
PrintStream violationsOut = null;
|
||||
|
||||
|
|
@ -35,16 +35,16 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge
|
|||
neighborWiseDistances = new ArrayList<Long>();
|
||||
}
|
||||
|
||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
String r = null;
|
||||
|
||||
if ( eval != null && eval.isSNP() ) {
|
||||
IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null);
|
||||
GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation();
|
||||
|
||||
if (lastVariant != null) {
|
||||
if (lastVariation != null) {
|
||||
GenomeLoc eL = eval.getLocation();
|
||||
GenomeLoc lvL = lastVariant.getLocation();
|
||||
GenomeLoc lvL = lastVariation.getLocation();
|
||||
if (eL.getContigIndex() == lvL.getContigIndex()) {
|
||||
long d = eL.distance(lvL);
|
||||
if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) {
|
||||
|
|
@ -57,7 +57,7 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge
|
|||
}
|
||||
}
|
||||
|
||||
lastVariant = eval;
|
||||
lastVariation = eval;
|
||||
lastVariantInterval = interval;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
|
@ -13,10 +14,9 @@ import java.util.List;
|
|||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
public class TransitionTranversionAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
|
||||
long nTransitions = 0, nTransversions = 0;
|
||||
|
|
@ -25,13 +25,18 @@ public class TransitionTranversionAnalysis extends BasicVariantAnalysis implemen
|
|||
super("transitions_transversions");
|
||||
}
|
||||
|
||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
if ( eval != null && eval.isSNP() ) {
|
||||
char refBase = eval.getRefSnpFWD();
|
||||
char altBase = eval.getAltSnpFWD();
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
if (eval != null && eval.isSNP()) {
|
||||
if (eval.getAlternateBases().length() != 2) {
|
||||
throw new StingException("TransitionTranversionAnalysis works only with biallelic variants");
|
||||
}
|
||||
|
||||
|
||||
char refBase = eval.getReference();
|
||||
char altBase = (eval.getAlternateBases().charAt(0) == refBase) ? eval.getAlternateBases().charAt(1) : eval.getAlternateBases().charAt(0);
|
||||
|
||||
BaseUtils.BaseSubstitutionType subType = BaseUtils.SNPSubstitutionType(refBase, altBase);
|
||||
if ( subType == BaseUtils.BaseSubstitutionType.TRANSITION )
|
||||
if (subType == BaseUtils.BaseSubstitutionType.TRANSITION)
|
||||
nTransitions++;
|
||||
else
|
||||
nTransversions++;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
|
|
@ -23,7 +23,7 @@ public interface VariantAnalysis {
|
|||
public PrintStream getCallPrintStream();
|
||||
public List<String> getParams();
|
||||
public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String filename);
|
||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context);
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context);
|
||||
public void finalize(long nSites);
|
||||
public List<String> done();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,12 +1,13 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.GenotypeUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
|
|
@ -27,10 +28,10 @@ public class VariantCounter extends BasicVariantAnalysis implements GenotypeAnal
|
|||
super("variant_counts");
|
||||
}
|
||||
|
||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
nSNPs += eval == null ? 0 : 1;
|
||||
|
||||
if ( this.getMaster().evalContainsGenotypes && eval != null && eval.isSNP() && GenotypeUtils.isHet(eval) )
|
||||
if ( this.getMaster().evalContainsGenotypes && eval != null && eval.isSNP() && ((VariantBackedByGenotype)eval).getGenotype( DiploidGenotype.valueOf(eval.getAlternateBases())).isHet() )
|
||||
nHets++;
|
||||
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -1,21 +1,20 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
|
||||
private String dbName;
|
||||
|
|
@ -31,45 +30,81 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
|
|||
dbName = name;
|
||||
}
|
||||
|
||||
public void inc(AllelicVariant dbSNP, AllelicVariant eval) {
|
||||
public void inc(Variation dbSNP, Variation eval) {
|
||||
boolean inDB = dbSNP != null;
|
||||
boolean inEval = eval != null;
|
||||
|
||||
if (inDB ) {
|
||||
if ( dbSNP.isSNP() ) nDBSNPs++;
|
||||
if ( dbSNP.isIndel() ) nDBIndels++;
|
||||
|
||||
//System.out.printf("snp=%b ins=%b del=%b indel=%b %s%n", dbSNP.isSNP(), dbSNP.isInsertion(), dbSNP.isDeletion(), dbSNP.isIndel(), dbSNP);
|
||||
if (inDB) {
|
||||
if (dbSNP.isSNP()) nDBSNPs++;
|
||||
if (dbSNP.isIndel()) nDBIndels++;
|
||||
}
|
||||
|
||||
if (inEval) nEvalObs++;
|
||||
if (inDB && inEval) {
|
||||
if ( dbSNP.isSNP() ) { // changes the calculation a bit
|
||||
if (dbSNP.isSNP()) { // changes the calculation a bit
|
||||
nOverlapping++;
|
||||
|
||||
if ( ! discordantP(dbSNP, eval) )
|
||||
if (!discordantP(dbSNP, eval))
|
||||
nConcordant++;
|
||||
}
|
||||
|
||||
if ( dbSNP.isIndel() && eval.isSNP() )
|
||||
if (dbSNP.isIndel() && eval.isSNP())
|
||||
nSNPsCalledAtIndels++;
|
||||
}
|
||||
}
|
||||
|
||||
public int nDBSNPs() { return nDBSNPs; }
|
||||
public int nDBIndels() { return nDBIndels; }
|
||||
public int nEvalSites() { return nEvalObs; }
|
||||
public int nOverlappingSites() { return nOverlapping; }
|
||||
public int nConcordant() { return nConcordant; }
|
||||
public int nNovelSites() { return Math.abs(nEvalSites() - nOverlappingSites()); }
|
||||
public int nSNPsAtIndels() { return nSNPsCalledAtIndels; }
|
||||
public int callCount = 0;
|
||||
|
||||
public boolean discordantP(AllelicVariant dbSNP, AllelicVariant eval) {
|
||||
if (dbSNP != null && dbSNP.isSNP() && eval != null ) {
|
||||
return ! (dbSNP.getAltSnpFWD() == eval.getAltSnpFWD() || dbSNP.getRefSnpFWD() == eval.getAltSnpFWD());
|
||||
} else {
|
||||
return false;
|
||||
public int nDBSNPs() {
|
||||
return nDBSNPs;
|
||||
}
|
||||
|
||||
public int nDBIndels() {
|
||||
return nDBIndels;
|
||||
}
|
||||
|
||||
public int nEvalSites() {
|
||||
return nEvalObs;
|
||||
}
|
||||
|
||||
public int nOverlappingSites() {
|
||||
return nOverlapping;
|
||||
}
|
||||
|
||||
public int nConcordant() {
|
||||
return nConcordant;
|
||||
}
|
||||
|
||||
public int nNovelSites() {
|
||||
return Math.abs(nEvalSites() - nOverlappingSites());
|
||||
}
|
||||
|
||||
public int nSNPsAtIndels() {
|
||||
return nSNPsCalledAtIndels;
|
||||
}
|
||||
public static int count = 0;
|
||||
public boolean discordantP(Variation dbSNP, Variation eval) {
|
||||
if (eval != null) {
|
||||
char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : eval.getReference();
|
||||
|
||||
if (dbSNP != null && dbSNP.isSNP()) {
|
||||
|
||||
boolean val = !dbSNP.getAlternateBases().contains(String.valueOf(alt));
|
||||
|
||||
if (val) {
|
||||
//System.err.print(eval.getLocation());
|
||||
//System.err.print(" eval.alt = " + alt);
|
||||
//System.err.println(" dbsnp.bases = " + dbSNP.getAlternateBases());
|
||||
//System.err.println(val);
|
||||
|
||||
//System.err.println(count);
|
||||
}
|
||||
//else System.err.println("not count == " + count++);
|
||||
|
||||
return val;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -86,17 +121,19 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
|
|||
return nConcordant() / (1.0 * nOverlappingSites());
|
||||
}
|
||||
|
||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
|
||||
// There are four cases here:
|
||||
AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(dbName, null);
|
||||
Variation dbsnp = (Variation) tracker.lookup(dbName, null);
|
||||
boolean isSNP = dbsnp != null && dbsnp.isSNP();
|
||||
inc(dbsnp, eval);
|
||||
|
||||
if ( dbsnp != null && eval != null ) {
|
||||
if ( dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval) ) {
|
||||
if (dbsnp != null && eval != null) {
|
||||
|
||||
if (dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval)) {
|
||||
return String.format("Discordant [DBSNP %s] [EVAL %s]", dbsnp, eval);
|
||||
} else if ( dbsnp.isIndel() && eval.isSNP() ) {
|
||||
return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getGenotype().get(0), eval);
|
||||
} else if (dbsnp.isIndel() && eval.isSNP()) {
|
||||
return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getAlternateBases(), eval);
|
||||
} else {
|
||||
return null;
|
||||
// return "Novel " + eval;
|
||||
|
|
@ -128,10 +165,10 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
|
|||
s.add(String.format("n_concordant %d", nConcordant()));
|
||||
s.add(String.format("n_novel_sites %d", nNovelSites()));
|
||||
|
||||
s.add(String.format("percent_eval_sites_in_db %.2f", 100*fractionEvalSitesCoveredByDB()));
|
||||
s.add(String.format("concordance_rate %.2f", 100*concordanceRate()));
|
||||
s.add(String.format("percent_eval_sites_in_db %.2f", 100 * fractionEvalSitesCoveredByDB()));
|
||||
s.add(String.format("concordance_rate %.2f", 100 * concordanceRate()));
|
||||
|
||||
s.add(String.format("percent_db_sites_in_eval %.2f", 100*fractionDBSitesDiscoveredInEval()));
|
||||
s.add(String.format("percent_db_sites_in_eval %.2f", 100 * fractionDBSitesDiscoveredInEval()));
|
||||
|
||||
s.add(String.format("n_snp_calls_at_indels %d", nSNPsAtIndels()));
|
||||
s.add(String.format("percent_calls_at_indels %.2f", nSNPsAtIndels() / (0.01 * nEvalSites())));
|
||||
|
|
|
|||
|
|
@ -2,14 +2,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes;
|
||||
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -28,8 +26,8 @@ import java.util.*;
|
|||
*
|
||||
*/
|
||||
@By(DataSource.REFERENCE)
|
||||
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=AllelicVariant.class)})
|
||||
@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=AllelicVariant.class), @RMD(name="dbsnp",type=AllelicVariant.class),@RMD(name="hapmap-chip",type=AllelicVariant.class), @RMD(name="interval",type=IntervalRod.class)})
|
||||
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=RodGeliText.class)}) // right now we have no base variant class for rods, this should change
|
||||
@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=RodGeliText.class), @RMD(name="dbsnp",type=rodDbSNP.class),@RMD(name="hapmap-chip",type=RodGenotypeChipAsGFF.class), @RMD(name="interval",type=IntervalRod.class)})
|
||||
public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
||||
@Argument(shortName="minConfidenceScore", doc="Minimum confidence score to consider an evaluation SNP a variant", required=false)
|
||||
public int minConfidenceScore = -1;
|
||||
|
|
@ -181,19 +179,18 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
|||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
nSites++;
|
||||
|
||||
// Iterate over each analysis, and update it
|
||||
AllelicVariant eval = (AllelicVariant)tracker.lookup("eval", null);
|
||||
Variation eval = (Variation)tracker.lookup("eval", null);
|
||||
|
||||
//if ( eval!=null ) System.out.printf("Eval: %f %d %b%n", eval.getVariationConfidence(), minDiscoveryQ, eval.getVariationConfidence() < minDiscoveryQ);
|
||||
if ( eval != null ) {
|
||||
// comment to disable variant filtering by confidence score
|
||||
if ( evalContainsGenotypes ) {
|
||||
// Genotyping - use best vs. next best lod
|
||||
if ( eval.getConsensusConfidence() < minConfidenceScore ) eval = null;
|
||||
if ( eval.getNegLog10PError() < minConfidenceScore ) eval = null;
|
||||
} else {
|
||||
// Variant discovery - use best vs. reference lod
|
||||
if ( Math.abs(eval.getVariationConfidence()) < minConfidenceScore ) eval = null;
|
||||
// Variation discovery - use best vs. reference lod
|
||||
if ( Math.abs(eval.getNegLog10PError()) < minConfidenceScore ) eval = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -202,7 +199,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
|||
|
||||
// update the known / novel set by checking whether the knownSNPDBName track has an entry here
|
||||
if ( eval != null ) {
|
||||
AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(knownSNPDBName, null);
|
||||
Variation dbsnp = (Variation)tracker.lookup(knownSNPDBName, null);
|
||||
String noveltySet = dbsnp == null ? NOVEL_SNPS : KNOWN_SNPS;
|
||||
updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context);
|
||||
}
|
||||
|
|
@ -219,7 +216,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
|
||||
public void updateAnalysisSet(final String analysisSetName, AllelicVariant eval,
|
||||
public void updateAnalysisSet(final String analysisSetName, Variation eval,
|
||||
RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
// Iterate over each analysis, and update it
|
||||
if ( getAnalysisSet(analysisSetName) != null ) {
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
|
|
@ -22,7 +23,7 @@ public class VariantMatcher extends BasicVariantAnalysis implements GenotypeAnal
|
|||
dbName = name;
|
||||
}
|
||||
|
||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
String r = null;
|
||||
AllelicVariant db = (AllelicVariant)tracker.lookup(dbName, null);
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ public class GenotypeUtils {
|
|||
// VariantType() {}
|
||||
}
|
||||
|
||||
/** This method accepts rods that implement either Genotype or GenotypeList interface (all others will result in an exception). Variant
|
||||
/** This method accepts rods that implement either Genotype or GenotypeList interface (all others will result in an exception). Variant
|
||||
* (Genotype object) of the specified type (point mutation or indel) will be extracted from GenotypeList rod if such variant exists, or the rod itself
|
||||
* will be typecasted and returned back if it implements Genotype and represents the specified variant type. If the last argument is false, then
|
||||
* null will be returned in all other cases. If the last argument is true and either a) rod is a GenotypeList that lacks a call of the specified type, but call
|
||||
|
|
|
|||
|
|
@ -0,0 +1,143 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class BasicGenotype
|
||||
*
|
||||
* represents a basic genotype object
|
||||
*/
|
||||
public class BasicGenotype implements Genotype {
|
||||
// the genotype string
|
||||
private String mGenotype;
|
||||
private GenomeLoc mLocation;
|
||||
private char mRef;
|
||||
private double mNetLog10PError;
|
||||
|
||||
/**
|
||||
* create a basic genotype
|
||||
* @param genotype
|
||||
*/
|
||||
public BasicGenotype(GenomeLoc location, String genotype, char ref, double netLog10PError) {
|
||||
mNetLog10PError = netLog10PError;
|
||||
mGenotype = genotype;
|
||||
mLocation = location;
|
||||
mRef = ref;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
* @return the log based error estimate
|
||||
*/
|
||||
@Override
|
||||
public double getNegLog10PError() {
|
||||
return mNetLog10PError;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the bases that represent this
|
||||
*
|
||||
* @return the bases, as a string
|
||||
*/
|
||||
@Override
|
||||
public String getBases() {
|
||||
return mGenotype;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the ploidy
|
||||
*
|
||||
* @return the ploidy value
|
||||
*/
|
||||
@Override
|
||||
public int getPloidy() {
|
||||
return mGenotype.length();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're homozygous, false otherwise
|
||||
*/
|
||||
@Override
|
||||
public boolean isHom() {
|
||||
if (mGenotype.length() < 1)
|
||||
return false;
|
||||
|
||||
char base = mGenotype.charAt(0);
|
||||
for (char cur : mGenotype.toCharArray()) {
|
||||
if (base != cur) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're het, false otherwise
|
||||
*/
|
||||
@Override
|
||||
public boolean isHet() {
|
||||
return !isHom();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotype's location
|
||||
*
|
||||
* @return a GenomeLoc representing the location
|
||||
*/
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return mLocation;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if the genotype is a point genotype, false if it's a indel / deletion
|
||||
*
|
||||
* @return true is a SNP
|
||||
*/
|
||||
@Override
|
||||
public boolean isPointGenotype() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* given the reference, are we a variant? (non-ref)
|
||||
*
|
||||
* @param ref the reference base or bases
|
||||
*
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
@Override
|
||||
public boolean isVariant(char ref) {
|
||||
return !(mGenotype.charAt(0) == ref && isHom());
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference base.
|
||||
*
|
||||
* @return a character, representing the reference base
|
||||
*/
|
||||
@Override
|
||||
public char getReference() {
|
||||
return mRef;
|
||||
}
|
||||
|
||||
/**
|
||||
* return this genotype as a variant
|
||||
*
|
||||
* @return the variant
|
||||
*/
|
||||
@Override
|
||||
public Variation toVariation() {
|
||||
if (!isVariant(this.mRef)) throw new IllegalStateException("this genotype is not a variant");
|
||||
return new BasicVariation(this.getBases(),mRef,this.getBases().length(),mLocation,mNetLog10PError);
|
||||
}
|
||||
}
|
||||
|
|
@ -8,7 +8,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
* User: aaronmckenna
|
||||
* Date: Sep 9, 2009
|
||||
* Time: 9:32:34 PM
|
||||
*
|
||||
* <p/>
|
||||
* a basic implementation of variant
|
||||
*/
|
||||
public class BasicVariation implements Variation {
|
||||
|
|
@ -18,12 +18,14 @@ public class BasicVariation implements Variation {
|
|||
protected final int mLength;
|
||||
protected final GenomeLoc mLocation;
|
||||
protected final double mConfidence;
|
||||
|
||||
/**
|
||||
* the constructor
|
||||
* @param bases the bases that this variant represents
|
||||
*
|
||||
* @param bases the bases that this variant represents
|
||||
* @param reference the reference base
|
||||
* @param length are we a single base variant, or a indel/deletion? length is negitive for an indel,
|
||||
* positive for a indel, and 0 for a substitution
|
||||
* @param length are we a single base variant, or a indel/deletion? length is negitive for an indel,
|
||||
* positive for a indel, and 0 for a substitution
|
||||
*/
|
||||
public BasicVariation(String bases, char reference, int length, GenomeLoc location, double confidence) {
|
||||
mBases = bases;
|
||||
|
|
@ -40,14 +42,14 @@ public class BasicVariation implements Variation {
|
|||
|
||||
@Override
|
||||
public VARIANT_TYPE getType() {
|
||||
if (mLength >0) return VARIANT_TYPE.INDEL;
|
||||
if (mLength <0) return VARIANT_TYPE.DELETION;
|
||||
if (mLength > 0) return VARIANT_TYPE.INDEL;
|
||||
if (mLength < 0) return VARIANT_TYPE.DELETION;
|
||||
return (isSNP()) ? VARIANT_TYPE.SNP : VARIANT_TYPE.REFERENCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSNP() {
|
||||
if (mLength == 0 && Utils.dupString(mRef,2).equals(mBases)) return true;
|
||||
if (mLength == 0 && Utils.dupString(mRef, 2).equals(mBases)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -62,7 +64,7 @@ public class BasicVariation implements Variation {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String getBases() {
|
||||
public String getAlternateBases() {
|
||||
return mBases;
|
||||
}
|
||||
|
||||
|
|
@ -72,19 +74,8 @@ public class BasicVariation implements Variation {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String getReference() {
|
||||
return String.valueOf(mRef);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isHet() {
|
||||
if (mLength == 0 && mBases.charAt(0) != mBases.charAt(1)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isHom() {
|
||||
return !isHet();
|
||||
public char getReference() {
|
||||
return (mRef);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -98,12 +89,29 @@ public class BasicVariation implements Variation {
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
|
||||
*
|
||||
* @return true if we're an insertion or deletion
|
||||
*/
|
||||
@Override
|
||||
public char getAlternateBase() {
|
||||
if (mLength != 0) {
|
||||
throw new UnsupportedOperationException("Unable to get alternate base for indel / deletion");
|
||||
}
|
||||
if (mBases.charAt(0) == mRef) return mBases.charAt(1);
|
||||
else return mBases.charAt(0);
|
||||
public boolean isIndel() {
|
||||
return (isDeletion() || isInsertion());
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||
* of
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
@Override
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
if (getAlternateBases().charAt(0) == this.getReference())
|
||||
return getAlternateBases().charAt(1);
|
||||
return getAlternateBases().charAt(0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,18 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Interface VariantBackedByGenotype
|
||||
* <p/>
|
||||
* this variant is backed by genotypic information
|
||||
*/
|
||||
public interface VariantBackedByGenotype {
|
||||
/**
|
||||
* get the likelihoods
|
||||
* @return an array in lexigraphical order of the likelihoods
|
||||
*/
|
||||
public Genotype getGenotype(DiploidGenotype x);
|
||||
}
|
||||
|
|
@ -46,13 +46,6 @@ public interface Variation {
|
|||
*/
|
||||
public boolean isDeletion();
|
||||
|
||||
/**
|
||||
* get the base representation of this Variant
|
||||
*
|
||||
* @return a string, of ploidy
|
||||
*/
|
||||
public String getBases();
|
||||
|
||||
/**
|
||||
* get the location that this Variant represents
|
||||
*
|
||||
|
|
@ -65,13 +58,7 @@ public interface Variation {
|
|||
*
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
public String getReference();
|
||||
|
||||
/** is our base representation heterozygous */
|
||||
public boolean isHet();
|
||||
|
||||
/** is our base representation homozygous */
|
||||
public boolean isHom();
|
||||
public char getReference();
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
|
|
@ -87,9 +74,22 @@ public interface Variation {
|
|||
public boolean isReference();
|
||||
|
||||
/**
|
||||
* gets the alternate base. If this is homref, throws an UnsupportedOperationException
|
||||
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
|
||||
* @return
|
||||
*/
|
||||
public char getAlternateBase();
|
||||
public String getAlternateBases();
|
||||
|
||||
/**
|
||||
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
|
||||
* @return true if we're an insertion or deletion
|
||||
*/
|
||||
public boolean isIndel();
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||
* of
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
public char getAlternativeBaseForSNP();
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,14 +8,12 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.junit.Assert;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.fail;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -60,38 +58,7 @@ public class RodGLFTest extends BaseTest {
|
|||
assertEquals(finalRecordCount, counter);
|
||||
}
|
||||
|
||||
/** make sure we're returning true to biallelic */
|
||||
@Test
|
||||
public void testIsBiallelic() {
|
||||
RodGLF glf = iter.next();
|
||||
Assert.assertFalse(iter.isBiallelic());
|
||||
}
|
||||
|
||||
// best vs next-best
|
||||
@Test
|
||||
public void testGetConsensusConfidence() {
|
||||
RodGLF glf = iter.next();
|
||||
assertEquals(120.0, iter.getConsensusConfidence(), 0.005);
|
||||
glf = iter.next();
|
||||
assertEquals(120.0, iter.getConsensusConfidence(), 0.005);
|
||||
}
|
||||
|
||||
// best vs. ref
|
||||
@Test
|
||||
public void testGetVariationConfidence() {
|
||||
RodGLF glf = iter.next();
|
||||
assertEquals(0.0, iter.getVariationConfidence(), 0.005);
|
||||
glf = iter.next();
|
||||
assertEquals(250.0, iter.getVariationConfidence(), 0.005);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testGetGenotype() {
|
||||
RodGLF glf = iter.next();
|
||||
List<String> str = iter.getGenotype();
|
||||
Assert.assertTrue(str.get(0).equals("AA"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIsSNP() {
|
||||
|
|
@ -112,7 +79,7 @@ public class RodGLFTest extends BaseTest {
|
|||
glf = iter.next();
|
||||
Assert.assertFalse(iter.isReference());
|
||||
}
|
||||
|
||||
/*
|
||||
@Test(expected = IllegalStateException.class)
|
||||
public void testGetAltSnpFWDIllegalException() {
|
||||
RodGLF glf = iter.next();
|
||||
|
|
@ -159,7 +126,7 @@ public class RodGLFTest extends BaseTest {
|
|||
/**
|
||||
* move to the second and third bases, and check that the
|
||||
* alternate bases are correct.
|
||||
*/
|
||||
*
|
||||
@Test
|
||||
public void testGetAltBasesFWD() {
|
||||
RodGLF glf = iter.next();
|
||||
|
|
@ -169,7 +136,7 @@ public class RodGLFTest extends BaseTest {
|
|||
Assert.assertTrue("CT".equals(iter.getAltBasesFWD()));
|
||||
|
||||
}
|
||||
|
||||
*/
|
||||
@Test
|
||||
public void testRodLocations() {
|
||||
GenomeLoc loc = null;
|
||||
|
|
@ -177,7 +144,7 @@ public class RodGLFTest extends BaseTest {
|
|||
RodGLF glf = iter.next();
|
||||
if (loc != null) {
|
||||
if (iter.getLocation().isBefore(loc)) {
|
||||
fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + iter.getLocation().toString());
|
||||
Assert.fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + iter.getLocation().toString());
|
||||
}
|
||||
}
|
||||
loc = iter.getLocation();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,62 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class BasicGenotypeTest
|
||||
*
|
||||
* tests the basic genotype class
|
||||
*/
|
||||
public class BasicGenotypeTest extends BaseTest {
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
try {
|
||||
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasicGenotypeIsHom() {
|
||||
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
|
||||
Assert.assertTrue(gt.isHom());
|
||||
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
|
||||
Assert.assertTrue(!gt2.isHom());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasicGenotypeIsHet() {
|
||||
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
|
||||
Assert.assertTrue(!gt.isHet());
|
||||
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
|
||||
Assert.assertTrue(gt2.isHet());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasicGenotypeIsVariant() {
|
||||
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
|
||||
Assert.assertTrue(!gt.isVariant('A'));
|
||||
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
|
||||
Assert.assertTrue(gt2.isVariant('A'));
|
||||
BasicGenotype gt3 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"TT",'A',0);
|
||||
Assert.assertTrue(gt3.isVariant('A'));
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue