Changes to switch Variant Eval over to the new Variation system.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1611 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-14 05:34:33 +00:00
parent 0feee9cdfd
commit e03fccb223
26 changed files with 825 additions and 416 deletions

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
@ -13,9 +14,7 @@ import org.broadinstitute.sting.utils.genotype.glf.VariableLengthCall;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
@ -25,7 +24,7 @@ import java.util.List;
* <p/>
* the rod class for GLF data.
*/
public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<RodGLF> {
public class RodGLF implements ReferenceOrderedDatum, Variation, Iterator<RodGLF> {
static int count = 0;
public GLFReader mReader;
private final String mName;
@ -38,6 +37,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
/**
* get the name
*
* @return the name
*/
@Override
@ -64,27 +64,25 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
return toString();
}
/**
* @return a string representation of the ROD GLF object
*/
/** @return a string representation of the ROD GLF object */
public String toString() {
return String.format("%s\t%d\t%s\t%d\t%d\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f",
mLoc.getContig(),
mLoc.getStart(),
mRecord.getRefBase(),
mRecord.getReadDepth(),
mRecord.getRmsMapQ(),
getBestGenotypeValue(1),
((SinglePointCall)mRecord).getLikelihoods()[0],
((SinglePointCall)mRecord).getLikelihoods()[1],
((SinglePointCall)mRecord).getLikelihoods()[2],
((SinglePointCall)mRecord).getLikelihoods()[3],
((SinglePointCall)mRecord).getLikelihoods()[4],
((SinglePointCall)mRecord).getLikelihoods()[5],
((SinglePointCall)mRecord).getLikelihoods()[6],
((SinglePointCall)mRecord).getLikelihoods()[7],
((SinglePointCall)mRecord).getLikelihoods()[8],
((SinglePointCall)mRecord).getLikelihoods()[9]
mLoc.getContig(),
mLoc.getStart(),
mRecord.getRefBase(),
mRecord.getReadDepth(),
mRecord.getRmsMapQ(),
getBestGenotypeValue(1),
((SinglePointCall) mRecord).getLikelihoods()[0],
((SinglePointCall) mRecord).getLikelihoods()[1],
((SinglePointCall) mRecord).getLikelihoods()[2],
((SinglePointCall) mRecord).getLikelihoods()[3],
((SinglePointCall) mRecord).getLikelihoods()[4],
((SinglePointCall) mRecord).getLikelihoods()[5],
((SinglePointCall) mRecord).getLikelihoods()[6],
((SinglePointCall) mRecord).getLikelihoods()[7],
((SinglePointCall) mRecord).getLikelihoods()[8],
((SinglePointCall) mRecord).getLikelihoods()[9]
);
@ -107,6 +105,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
/**
* return a genome loc representing the current location
*
* @return the geonome loc
*/
@Override
@ -115,62 +114,15 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
}
/**
* Returns bases in the reference allele as a String. String can be empty (as in insertion into
* the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters
* (for longer indels).
* get the reference base(s) at this position
*
* @return reference allele, forward strand
* @return the reference base or bases, as a string
*/
@Override
public String getRefBasesFWD() {
if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) return "";
return String.valueOf(mRecord.getRefBase());
}
/**
* Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException
* if variant is not a SNP.
*
* This doesn't make much sense to me, what if the best genotype is hom non-ref?
*
* @return reference base on the forward strand
*/
@Override
public char getRefSnpFWD() throws IllegalStateException {
if (!isSNP()) throw new IllegalStateException("Current GLF Record is not a SNP");
public char getReference() {
return mRecord.getRefBase().toChar();
}
/**
* Returns bases in the alternative allele as a String. String can be empty (as in deletion from
* the reference), can contain a single character (as in SNP or one-base insertion), or multiple characters
* (for longer indels).
*
* @return alternative allele, forward strand
*/
@Override
public String getAltBasesFWD() {
return getBestGenotype(2).toString();
}
/**
* Returns alternative (minor) allele base for a SNP variant as a character; should throw IllegalStateException
* if variant is not a SNP.
*
* @return alternative allele base on the forward starnd
*/
@Override
public char getAltSnpFWD() throws IllegalStateException {
if (!isSNP()) {
throw new IllegalStateException("Not a SNP");
}
String str = getBestGenotype(1).toString();
if (String.valueOf(str.charAt(0)).equals(mRecord.getRefBase().toString())) {
return str.charAt(1);
}
return str.charAt(0);
}
/**
* Returns true if all observed alleles are reference alleles. All is<Variant> methods (where Variant=SNP,Insertion, etc) should
* return false at such site to ensure consistency. This method is included for use with genotyping calls (isGenotype()==true), it makes
@ -183,6 +135,31 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
return (!isSNP());
}
/**
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
*
* @return true if we're an insertion or deletion
*/
@Override
public boolean isIndel() {
return (isDeletion() || isInsertion());
}
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference())
return getAlternateBases().charAt(1);
return getAlternateBases().charAt(0);
}
/**
* Is this variant a SNP?
*
@ -196,7 +173,9 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
/**
* return a string representing the reference
*
* @param ref the reference character
*
* @return a string for the homozygous ref in a diploid
*/
private static String refString(char ref) {
@ -213,7 +192,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
*/
public LikelihoodObject.GENOTYPE getBestGenotype(int nthBest) {
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
return LikelihoodObject.GENOTYPE.values()[sorted[nthBest-1]];
return LikelihoodObject.GENOTYPE.values()[sorted[nthBest - 1]];
}
/**
@ -226,7 +205,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
*/
public double getBestGenotypeValue(int nthBest) {
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
return (((SinglePointCall) mRecord).getLikelihoods())[sorted[nthBest-1]];
return (((SinglePointCall) mRecord).getLikelihoods())[sorted[nthBest - 1]];
}
/**
@ -254,16 +233,13 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
}
/**
* Is this variant an insertion or a deletion? The contract requires
* this to be true if either isInsertion() or isDeletion() returns true. However,
* this method is currently allowed to return true even if neither of isInsertion()
* and isDeletion() does.
* get the base representation of this Variant
*
* @return
* @return a string, of ploidy
*/
@Override
public boolean isIndel() {
return (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE);
public String getAlternateBases() {
return this.getBestGenotype(0).toString();
}
/**
@ -272,31 +248,19 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
* @return
*/
@Override
public double getMAF() {
public double getNonRefAlleleFrequency() {
return 0;
}
/**
* Returns heterozygosity, a more accessible general feature of a variant.
*
* @return
*/
/** @return the VARIANT_TYPE of the current variant */
@Override
public double getHeterozygosity() {
return 0;
public VARIANT_TYPE getType() {
if (this.isSNP()) return VARIANT_TYPE.SNP;
else if (this.isInsertion()) return VARIANT_TYPE.INDEL;
else if (this.isDeletion()) return VARIANT_TYPE.DELETION;
else return VARIANT_TYPE.REFERENCE;
}
/**
* Is this variant an actual genotype (such as individual call from sequencing, HapMap chip etc), or
* population allelic variant (call from pooled sequencing, dbSNP site etc). Only if variant is a genotype, there
* is a meaningful question of, e.g., whether it is a het, or homogeneous non-ref.
*
* @return true if this variant is an actual genotype.
*/
@Override
public boolean isGenotype() {
return true;
}
/**
* Returns phred-mapped confidence in variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref).
@ -304,66 +268,20 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
* @return
*/
@Override
public double getVariationConfidence() {
public double getNegLog10PError() {
String ref = new String() + mRecord.getRefBase() + mRecord.getRefBase();
int index = 0;
for (LikelihoodObject.GENOTYPE g: LikelihoodObject.GENOTYPE.values()) {
for (LikelihoodObject.GENOTYPE g : LikelihoodObject.GENOTYPE.values()) {
if (g.toString().equals(ref)) break;
index++;
}
return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall)mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall) mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
}
/**
* Returns phred-mapped confidence in called genotype (e.g. MAQ's consensus confidence, or AlleleCaller's
* best vs next-best.
*
* @return
*/
@Override
public double getConsensusConfidence() {
return Math.abs(getBestGenotypeValue(1) - getBestGenotypeValue(2)) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
public int length() {
return 1;
}
/**
* Returns actual observed genotype. Allowed to return more than two alleles (@see #getPloidy()). If this variant
* is not a genotype, should throw an IllegalStateException.
*
* @return
*/
@Override
public List<String> getGenotype() throws IllegalStateException {
List<String> ret = new ArrayList<String>();
ret.add(this.getBestGenotype(1).toString());
return ret;
}
/**
* Return actual number of observed alleles (chromosomes) in the genotype. If this variant is not a genotype,
* should throw IllegalStateException.
*
* @return
*/
@Override
public int getPloidy() throws IllegalStateException {
return 2; // we're so haploid it hurts
}
/**
* Returns true if the site has at most two known or observed alleles (ref and non-ref), or false if there are > 2 allelic variants known or observed. When
* the implementing class is a genotype, alleles should be always counted including the reference allele whether it was observed in the particular
* individual or not: i.e. if the reference is 'C', then both 'CA' and 'AA' genotypes must be reported as bi-allelic, while 'AT' is <i>not</i> bi-allelic (since there are
* two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C').
*
* @return
*/
@Override
public boolean isBiallelic() {
return false;
}
public int length() { return 1; }
@Override
public int compareTo(ReferenceOrderedDatum that) {
return this.mLoc.compareTo(that.getLocation());
@ -371,8 +289,10 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
/**
* the parse line, which is not used by the GLF rod
*
* @param header the header to pass in
* @param parts the string object
* @param parts the string object
*
* @return false, alwayss
* @throws java.io.IOException
*/

View File

@ -25,14 +25,19 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.io.IOException;
import java.util.List;
import java.util.Arrays;
import java.util.List;
public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVariant {
public class RodGeliText extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, AllelicVariant {
public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
public GenomeLoc loc;
public char refBase = 'N';
@ -96,6 +101,27 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
public GenomeLoc getLocation() { return loc; }
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
@Override
public char getReference() {
return this.refBase;
}
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
@Override
public double getNegLog10PError() {
return Math.abs(lodBtr);
}
public String getRefBasesFWD() {
return String.format("%c", getRefSnpFWD());
}
@ -118,8 +144,24 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
return refBase == bestGenotype.charAt(0) && refBase == bestGenotype.charAt(1);
}
/**
* get the frequency of this variant
*
* @return VariantFrequency with the stored frequency
*/
@Override
public double getNonRefAlleleFrequency() {
return 1.0;
}
/** @return the VARIANT_TYPE of the current variant */
@Override
public VARIANT_TYPE getType() {
return VARIANT_TYPE.SNP;
}
public boolean isSNP() {
return !isReference();
return (!bestGenotype.equals(Utils.dupString(this.getReference(),2)));
}
public boolean isInsertion() {
@ -130,10 +172,35 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
return false;
}
/**
* get the base representation of this Variant
*
* @return a string, of ploidy
*/
@Override
public String getAlternateBases() {
return this.bestGenotype;
}
public boolean isIndel() {
return false;
}
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if(this.bestGenotype.toString().charAt(0) == this.getReference())
return this.bestGenotype.toString().charAt(1);
return this.bestGenotype.toString().charAt(0);
}
public double getMAF() {
return 0;
}
@ -253,4 +320,15 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
*/
/**
* get the likelihoods
*
* @return an array in lexigraphical order of the likelihoods
*/
@Override
public org.broadinstitute.sting.utils.genotype.Genotype getGenotype(DiploidGenotype x) {
if (x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
return new BasicGenotype(this.getLocation(),this.bestGenotype,this.getRefSnpFWD(),this.getConsensusConfidence());
}
}

View File

@ -3,7 +3,9 @@ package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import java.util.*;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
@ -16,7 +18,7 @@ import java.util.regex.Pattern;
* Time: 10:47:14 AM
* To change this template use File | Settings | File Templates.
*/
public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements AllelicVariant {
public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements AllelicVariant, Variation, VariantBackedByGenotype {
private String contig, source, feature, strand, frame;
private long start, stop;
private double score;
@ -74,6 +76,26 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
return GenomeLocParser.parseGenomeLoc(contig, start, stop);
}
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
@Override
public char getReference() {
return 'N';
}
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
@Override
public double getNegLog10PError() {
return 4; // 1/10000 error
}
public String getAttribute(final String key) {
return attributes.get(key);
}
@ -158,10 +180,49 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
public String getAltBasesFWD() { return null; }
public char getAltSnpFWD() throws IllegalStateException { return 0; }
public boolean isReference() { return ! isSNP(); }
/**
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
*
* @return
*/
@Override
public String getAlternateBases() {
return this.feature;
}
/**
* get the frequency of this variant
*
* @return VariantFrequency with the stored frequency
*/
@Override
public double getNonRefAlleleFrequency() {
return this.getMAF();
}
/** @return the VARIANT_TYPE of the current variant */
@Override
public VARIANT_TYPE getType() {
return VARIANT_TYPE.SNP;
}
public boolean isSNP() { return false; }
public boolean isInsertion() { return false; }
public boolean isDeletion() { return false; }
public boolean isIndel() { return false; }
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
return this.getAltSnpFWD();
}
public double getMAF() { return 0; }
public double getHeterozygosity() { return 0; }
public boolean isGenotype() { return true; }
@ -175,4 +236,15 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
public int length() { return 1; }
/**
* get the likelihoods
*
* @return an array in lexigraphical order of the likelihoods
*/
@Override
public Genotype getGenotype(DiploidGenotype x) {
if (!x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
return new BasicGenotype(this.getLocation(),this.feature,this.getRefSnpFWD(),this.getConsensusConfidence());
}
}

View File

@ -7,12 +7,15 @@ import java.util.List;
* chr1:1104840 A N 0.000000 -85.341265 -85.341265 0.000000 0.000000 324.000000 162 0 0
* chr1:1104841 C N 0.000000 -69.937928 -69.937928 0.000000 0.000000 324.000000 162 0 0
* chr1:1104842 A N 0.000000 -84.816002 -84.816002 0.000000 0.000000 324.000000 162 0 0
*
*/
public interface SNPCallFromGenotypes extends AllelicVariant {
public int nIndividuals();
public int nHomRefGenotypes();
public int nHetGenotypes();
public int nHomVarGenotypes();
public List<Genotype> getGenotypes();
}

View File

@ -1,10 +1,12 @@
package org.broadinstitute.sting.gatk.refdata;
import net.sf.picard.util.SequenceUtil;
import java.util.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import java.util.Arrays;
import java.util.List;
/**
* Example format:
@ -16,7 +18,7 @@ import org.broadinstitute.sting.utils.*;
* Time: 10:47:14 AM
* To change this template use File | Settings | File Templates.
*/
public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVariant {
public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, AllelicVariant {
public GenomeLoc loc; // genome location of SNP
// Reference sequence chromosome or scaffold
// Start and stop positions in chrom
@ -60,6 +62,26 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
// ----------------------------------------------------------------------
public GenomeLoc getLocation() { return loc; }
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
@Override
public char getReference() {
return getRefSnpFWD();
}
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
@Override
public double getNegLog10PError() {
return 4; // -log10(0.0001)
}
public boolean onFwdStrand() {
return strand.equals("+");
}
@ -108,10 +130,24 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
}
public String getAllelesFWDString() {
return Utils.join("/", getAllelesFWD());
return Utils.join("", getAllelesFWD());
}
// ----------------------------------------------------------------------
/**
* get the frequency of this variant
*
* @return VariantFrequency with the stored frequency
*/
@Override
public double getNonRefAlleleFrequency() {
return 0; //To change body of implemented methods use File | Settings | File Templates.
}
/** @return the VARIANT_TYPE of the current variant */
@Override
public VARIANT_TYPE getType() {
return VARIANT_TYPE.SNP;
}// ----------------------------------------------------------------------
//
// What kind of variant are we?
//
@ -119,7 +155,34 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
public boolean isSNP() { return varType.contains("single"); }
public boolean isInsertion() { return varType.contains("insertion"); }
public boolean isDeletion() { return varType.contains("deletion"); }
/**
* get the base representation of this Variant
*
* @return a string, of ploidy
*/
@Override
public String getAlternateBases() {
return getAllelesFWDString();
}
public boolean isIndel() { return isInsertion() || isDeletion() || varType.contains("in-del"); }
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
return getAltSnpFWD(); /*
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference())
return getAlternateBases().charAt(1);
return getAlternateBases().charAt(0); */
}
public boolean isReference() { return false; } // snp locations are never "reference", there's always a variant
public boolean isHapmap() { return validationStatus.contains("by-hapmap"); }
@ -147,7 +210,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
if ( isIndel() ) s += ":Indel";
if ( isHapmap() ) s += ":Hapmap";
if ( is2Hit2Allele() ) s += ":2Hit";
return s;
return s;
}
public String repl() {
@ -213,7 +276,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
}
public double getMAF() {
// Fixme: update to actually get MAF
// Fixme: update to actually get MAF
//return avHet;
return -1;
}
@ -244,4 +307,14 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
public int length() { return (int)(loc.getStop() - loc.getStart() + 1); }
/**
* get the likelihoods
*
* @return an array in lexigraphical order of the likelihoods
*/
@Override
public org.broadinstitute.sting.utils.genotype.Genotype getGenotype(DiploidGenotype x) {
if (x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
return new BasicGenotype(this.getLocation(),this.getAltBasesFWD(),this.getRefSnpFWD(),this.getConsensusConfidence());
}
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.filters;
import org.broadinstitute.sting.gatk.contexts.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RodGeliText;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.utils.*;
import org.apache.log4j.Logger;
@ -50,7 +51,7 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest;
boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet(variant);
boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet((AllelicVariant)variant);
exclude = excludable && highGenotypeConfidence && pointEstimateExclude(counts);
//
// for printing only

View File

@ -1,12 +1,12 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.io.PrintStream;
import java.util.List;
import java.util.ArrayList;
import java.util.List;
/**
* The Broad Institute
@ -65,5 +65,5 @@ public abstract class BasicVariantAnalysis implements VariantAnalysis {
}
public abstract String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context);
public abstract String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context);
}

View File

@ -1,27 +1,30 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.Utils;
import java.util.List;
import java.util.ArrayList;
import java.util.List;
/**
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
public class CallableBasesAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis {
long all_bases = 0;
long all_calls = 0;
//final static double[] Qthresholds = { 10, 20, 30, 40, 50, 100, 200, 500, 1000 };
final static double[] thresholds = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100 };
final static double[] thresholds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100};
long[] discoverable_bases = new long[thresholds.length];
long[] genotypable_bases = new long[thresholds.length];
@ -29,36 +32,54 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot
super("callable_bases");
}
public long nSites() { return all_bases; }
public long nCalls() { return all_calls; }
public long nDiscoverable(int index) { return discoverable_bases[index]; }
public double percentDiscoverable(int index) { return (100.0*nDiscoverable(index)) / nSites(); }
public long nGenotypable(int index) { return genotypable_bases[index]; }
public double percentGenotypable(int index) { return (100.0*nGenotypable(index)) / nSites(); }
public long nSites() {
return all_bases;
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
public long nCalls() {
return all_calls;
}
public long nDiscoverable(int index) {
return discoverable_bases[index];
}
public double percentDiscoverable(int index) {
return (100.0 * nDiscoverable(index)) / nSites();
}
public long nGenotypable(int index) {
return genotypable_bases[index];
}
public double percentGenotypable(int index) {
return (100.0 * nGenotypable(index)) / nSites();
}
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
all_bases++;
if ( eval == null ) // no data here!
if (eval == null) // no data here!
return null;
// we actually have a record here
if ( ! eval.isGenotype() ) { // evaluation record isn't a genotype, die!
if (!(eval instanceof VariantBackedByGenotype)) { // evaluation record isn't a genotype, die!
throw new RuntimeException("Evaluation track isn't an Genotype!");
}
all_calls++;
// For every threshold, updated discoverable and callable
for ( int i = 0; i < thresholds.length; i++ ) {
for (int i = 0; i < thresholds.length; i++) {
double threshold = thresholds[i];
DiploidGenotype g = DiploidGenotype.valueOf(Utils.dupString(ref, 2));
Genotype genotype = ((VariantBackedByGenotype) eval).getGenotype(g);
// update discoverable
if ( eval.isSNP() && eval.getVariationConfidence() >= threshold )
if (eval.isSNP() && eval.getNegLog10PError() >= threshold)
discoverable_bases[i]++;
if ( eval.isReference() && eval.getConsensusConfidence() >= threshold )
if (!eval.isSNP() && genotype.getNegLog10PError() >= threshold)
discoverable_bases[i]++;
if ( eval.getConsensusConfidence() >= threshold )
if (genotype.getNegLog10PError() >= threshold)
genotypable_bases[i]++;
//System.out.printf("Updating %s SNP=%b, REF=%b VarConf=%f ConConf=%f where threshold=%f: discoverable = %d, genotypable = %d%n",
@ -76,7 +97,7 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot
s.add(String.format(""));
s.add(String.format("confidence_threshold\tdiscoverable_bases\tdiscoverable_bases_percent\tgenotypable_bases\tgenotypable_bases_percent"));
for ( int i = 0; i < thresholds.length; i++ ) {
for (int i = 0; i < thresholds.length; i++) {
double threshold = thresholds[i];
s.add(String.format("%6.2f\t%d\t%.6f\t%d\t%.6f", threshold, nDiscoverable(i), percentDiscoverable(i), nGenotypable(i), percentGenotypable(i)));
}

View File

@ -1,14 +1,14 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.ArrayList;
import java.util.List;
import java.util.HashSet;
import java.util.List;
/**
* The Broad Institute
@ -24,7 +24,7 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno
ArrayList<HashSet<GenomeLoc>> variantsWithClusters;
int minDistanceForFlagging = 5;
int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100};
AllelicVariant lastVariant = null;
Variation lastVariation = null;
GenomeLoc lastVariantInterval = null;
int nSeen = 0;
@ -36,16 +36,16 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno
variantsWithClusters.add(new HashSet<GenomeLoc>());
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
String r = null;
if ( eval != null && eval.isSNP() ) {
IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null);
GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation();
if (lastVariant != null) {
if (lastVariation != null) {
GenomeLoc eL = eval.getLocation();
GenomeLoc lvL = lastVariant.getLocation();
GenomeLoc lvL = lastVariation.getLocation();
if (eL.getContigIndex() == lvL.getContigIndex()) {
long d = eL.distance(lvL);
if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) {
@ -66,7 +66,7 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno
}
}
lastVariant = eval;
lastVariation = eval;
lastVariantInterval = interval;
}

View File

@ -1,21 +1,24 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.List;
import java.util.ArrayList;
import java.util.List;
/**
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
private String dbName;
@ -35,48 +38,48 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
public GenotypeConcordance(final String name) {
super("genotype_concordance");
dbName = name;
for ( int i = 0; i < 4; i++ ) {
for (int i = 0; i < 4; i++) {
truth_totals[i] = 0;
calls_totals[i] = 0;
for ( int j = 0; j < 4; j++ )
for (int j = 0; j < 4; j++)
table[i][j] = 0;
}
}
public void inc(AllelicVariant chip, AllelicVariant eval, char ref) {
if ( (chip != null && !chip.isGenotype()) || (eval != null && !eval.isGenotype()) )
public void inc(Variation chip, Variation eval, char ref) {
if ((chip != null && !(chip instanceof VariantBackedByGenotype) || (eval != null && !(eval instanceof VariantBackedByGenotype))))
throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
DiploidGenotype g = DiploidGenotype.valueOf(Utils.dupString(ref, 2));
int truthIndex, callIndex;
if ( chip == null )
if (chip == null)
truthIndex = UNKNOWN;
else if ( chip.isReference() && Utils.countOccurrences(ref, chip.getGenotype().get(0)) == chip.getGenotype().get(0).length() )
else if (chip.getAlternateBases().equals(g.toString()))
truthIndex = REF;
else if ( GenotypeUtils.isHet(chip) )
else if (chip.getAlternateBases().charAt(0) != chip.getAlternateBases().charAt(1))
truthIndex = VAR_HET;
else
truthIndex = VAR_HOM;
// todo -- FIXME on countOccurences
if ( eval == null )
if (eval == null)
callIndex = NO_CALL;
else if ( eval.isReference() && Utils.countOccurrences(ref, eval.getGenotype().get(0)) == eval.getGenotype().get(0).length() )
else if (eval.getAlternateBases().equals(g.toString()))
callIndex = REF;
else if ( GenotypeUtils.isHet(eval) )
else if (eval.getAlternateBases().charAt(0) != eval.getAlternateBases().charAt(1))
callIndex = VAR_HET;
else
callIndex = VAR_HOM;
if ( chip != null || eval != null ) {
if (chip != null || eval != null) {
//System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
table[truthIndex][callIndex]++;
truth_totals[truthIndex]++;
if ( callIndex != NO_CALL ) calls_totals[callIndex]++;
if (callIndex != NO_CALL) calls_totals[callIndex]++;
}
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
AllelicVariant chip = (AllelicVariant)tracker.lookup(dbName, null);
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
Variation chip = (Variation) tracker.lookup(dbName, null);
inc(chip, eval, ref);
return null;
}
@ -84,24 +87,24 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
private void addCalledGenotypeConcordance(List<String> s) {
StringBuilder sb = new StringBuilder();
sb.append("CALLED_GENOTYPE_CONCORDANCE\t");
for ( int i = 0; i < 4; i++ ) {
for (int i = 0; i < 4; i++) {
int nConcordantCallsI = table[i][i];
String value = "N/A";
if ( i != UNKNOWN )
value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i]-table[UNKNOWN][i]));
if (i != UNKNOWN)
value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i]));
sb.append(value);
}
s.add(sb.toString());
}
s.add(sb.toString());
}
/**
* How many overall calls where made that aren't NO_CALLS or UNKNOWNS?
*/
private int getNCalled() {
int n = 0;
for ( int i = 0; i < 4; i++ )
for ( int j = 0; j < 4; j++ )
if ( i != NO_CALL && j != NO_CALL ) n += table[i][j];
for (int i = 0; i < 4; i++)
for (int j = 0; j < 4; j++)
if (i != NO_CALL && j != NO_CALL) n += table[i][j];
return n;
}
@ -109,30 +112,30 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
int nConcordantRefCalls = table[REF][REF];
int nConcordantHetCalls = table[VAR_HET][VAR_HET];
int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM];
int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM];
int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM];
int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls;
int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls;
int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM];
int nCalled = getNCalled();
s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar)));
s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls)));
s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled)));
s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls)));
s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled)));
}
public List<String> done() {
List<String> s = new ArrayList<String>();
s.add(String.format("name %s", dbName));
s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY"));
for (int i=0; i < 4; i++) {
for (int i = 0; i < 4; i++) {
StringBuffer sb = new StringBuffer();
sb.append(String.format("%15s ", TRUTH_NAMES[i]));
for (int j=0; j < 4; j++)
for (int j = 0; j < 4; j++)
sb.append(String.format("%9d ", table[i][j]));
sb.append(String.format("%9d ", truth_totals[i]));
if ( i == VAR_HET || i == VAR_HOM ) {
sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF]+table[i][VAR_HET]+table[i][VAR_HOM])));
sb.append(String.format("%s", cellPercent(truth_totals[i]-table[i][NO_CALL], truth_totals[i])));
} else {
if (i == VAR_HET || i == VAR_HOM) {
sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
} else {
sb.append("\tN/A\t\t\tN/A");
}
s.add(sb.toString());
@ -140,16 +143,16 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
addCalledGenotypeConcordance(s);
addOverallStats(s);
for (int i=0; i < 4; i++) {
for (int j=0; j < 4; j++) {
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j]));
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i])));
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j])));
}
if ( i == VAR_HET || i == VAR_HOM ) {
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF]+table[i][VAR_HET]+table[i][VAR_HOM])));
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i]-table[i][NO_CALL], truth_totals[i])));
if (i == VAR_HET || i == VAR_HOM) {
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
}
}
return s;
@ -158,7 +161,7 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
private static String cellPercent(int count, int total) {
StringBuffer sb = new StringBuffer();
total = Math.max(total, 0);
sb.append(String.format("%.2f", (100.0*count)/total));
sb.append(String.format("%.2f", (100.0 * count) / total));
sb.append("%");
return sb.toString();
}

View File

@ -1,17 +1,16 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import cern.jet.math.Arithmetic;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.ArrayList;
import java.util.List;
import cern.jet.math.Arithmetic;
/**
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
@ -32,7 +31,7 @@ public class HardyWeinbergEquilibrium extends BasicVariantAnalysis implements Po
this.threshold = threshold;
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
String r = null;
if ( eval != null &&

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.List;
import java.util.ArrayList;
import java.util.List;
/**
* The Broad Institute
@ -29,19 +29,19 @@ public class IndelMetricsAnalysis extends BasicVariantAnalysis implements Genoty
sizes[0][i] = sizes[1][i] = 0;
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
if ( eval != null && eval.isIndel() ) {
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
if ( eval != null && eval.isInsertion() ) {
if ( eval.isInsertion() )
insertions++;
else if ( eval.isDeletion() )
deletions++;
else
throw new RuntimeException("Variant is indel, but isn't insertion or deletion!");
throw new RuntimeException("Variation is indel, but isn't insertion or deletion!");
if ( eval.length() < 100 ) {
sizes[eval.isDeletion() ? 0 : 1][eval.length()]++;
if ( eval.length() > maxSize )
maxSize = eval.length();
if ( eval.getAlternateBases().length() < 100 ) {
sizes[eval.isDeletion() ? 0 : 1][eval.getAlternateBases().length()]++;
if ( eval.getAlternateBases().length() > maxSize )
maxSize = eval.getAlternateBases().length();
}
}

View File

@ -1,15 +1,15 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.io.PrintStream;
/**
* The Broad Institute
@ -26,7 +26,7 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge
int minDistanceForFlagging = 5;
int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100, 1000, 10000};
AllelicVariant lastVariant = null;
Variation lastVariation = null;
GenomeLoc lastVariantInterval = null;
PrintStream violationsOut = null;
@ -35,16 +35,16 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge
neighborWiseDistances = new ArrayList<Long>();
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
String r = null;
if ( eval != null && eval.isSNP() ) {
IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null);
GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation();
if (lastVariant != null) {
if (lastVariation != null) {
GenomeLoc eL = eval.getLocation();
GenomeLoc lvL = lastVariant.getLocation();
GenomeLoc lvL = lastVariation.getLocation();
if (eL.getContigIndex() == lvL.getContigIndex()) {
long d = eL.distance(lvL);
if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) {
@ -57,7 +57,7 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge
}
}
lastVariant = eval;
lastVariation = eval;
lastVariantInterval = interval;
}

View File

@ -1,9 +1,10 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.ArrayList;
import java.util.List;
@ -13,10 +14,9 @@ import java.util.List;
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
public class TransitionTranversionAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
long nTransitions = 0, nTransversions = 0;
@ -25,13 +25,18 @@ public class TransitionTranversionAnalysis extends BasicVariantAnalysis implemen
super("transitions_transversions");
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
if ( eval != null && eval.isSNP() ) {
char refBase = eval.getRefSnpFWD();
char altBase = eval.getAltSnpFWD();
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
if (eval != null && eval.isSNP()) {
if (eval.getAlternateBases().length() != 2) {
throw new StingException("TransitionTranversionAnalysis works only with biallelic variants");
}
char refBase = eval.getReference();
char altBase = (eval.getAlternateBases().charAt(0) == refBase) ? eval.getAlternateBases().charAt(1) : eval.getAlternateBases().charAt(0);
BaseUtils.BaseSubstitutionType subType = BaseUtils.SNPSubstitutionType(refBase, altBase);
if ( subType == BaseUtils.BaseSubstitutionType.TRANSITION )
if (subType == BaseUtils.BaseSubstitutionType.TRANSITION)
nTransitions++;
else
nTransversions++;

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.io.PrintStream;
import java.util.List;
@ -23,7 +23,7 @@ public interface VariantAnalysis {
public PrintStream getCallPrintStream();
public List<String> getParams();
public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String filename);
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context);
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context);
public void finalize(long nSites);
public List<String> done();
}

View File

@ -1,12 +1,13 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenotypeUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.List;
import java.util.ArrayList;
import java.util.List;
/**
* The Broad Institute
@ -27,10 +28,10 @@ public class VariantCounter extends BasicVariantAnalysis implements GenotypeAnal
super("variant_counts");
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
nSNPs += eval == null ? 0 : 1;
if ( this.getMaster().evalContainsGenotypes && eval != null && eval.isSNP() && GenotypeUtils.isHet(eval) )
if ( this.getMaster().evalContainsGenotypes && eval != null && eval.isSNP() && ((VariantBackedByGenotype)eval).getGenotype( DiploidGenotype.valueOf(eval.getAlternateBases())).isHet() )
nHets++;
return null;

View File

@ -1,21 +1,20 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.List;
import java.util.ArrayList;
import java.util.List;
/**
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
private String dbName;
@ -31,45 +30,81 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
dbName = name;
}
public void inc(AllelicVariant dbSNP, AllelicVariant eval) {
public void inc(Variation dbSNP, Variation eval) {
boolean inDB = dbSNP != null;
boolean inEval = eval != null;
if (inDB ) {
if ( dbSNP.isSNP() ) nDBSNPs++;
if ( dbSNP.isIndel() ) nDBIndels++;
//System.out.printf("snp=%b ins=%b del=%b indel=%b %s%n", dbSNP.isSNP(), dbSNP.isInsertion(), dbSNP.isDeletion(), dbSNP.isIndel(), dbSNP);
if (inDB) {
if (dbSNP.isSNP()) nDBSNPs++;
if (dbSNP.isIndel()) nDBIndels++;
}
if (inEval) nEvalObs++;
if (inDB && inEval) {
if ( dbSNP.isSNP() ) { // changes the calculation a bit
if (dbSNP.isSNP()) { // changes the calculation a bit
nOverlapping++;
if ( ! discordantP(dbSNP, eval) )
if (!discordantP(dbSNP, eval))
nConcordant++;
}
if ( dbSNP.isIndel() && eval.isSNP() )
if (dbSNP.isIndel() && eval.isSNP())
nSNPsCalledAtIndels++;
}
}
public int nDBSNPs() { return nDBSNPs; }
public int nDBIndels() { return nDBIndels; }
public int nEvalSites() { return nEvalObs; }
public int nOverlappingSites() { return nOverlapping; }
public int nConcordant() { return nConcordant; }
public int nNovelSites() { return Math.abs(nEvalSites() - nOverlappingSites()); }
public int nSNPsAtIndels() { return nSNPsCalledAtIndels; }
public int callCount = 0;
public boolean discordantP(AllelicVariant dbSNP, AllelicVariant eval) {
if (dbSNP != null && dbSNP.isSNP() && eval != null ) {
return ! (dbSNP.getAltSnpFWD() == eval.getAltSnpFWD() || dbSNP.getRefSnpFWD() == eval.getAltSnpFWD());
} else {
return false;
public int nDBSNPs() {
return nDBSNPs;
}
public int nDBIndels() {
return nDBIndels;
}
public int nEvalSites() {
return nEvalObs;
}
public int nOverlappingSites() {
return nOverlapping;
}
public int nConcordant() {
return nConcordant;
}
public int nNovelSites() {
return Math.abs(nEvalSites() - nOverlappingSites());
}
public int nSNPsAtIndels() {
return nSNPsCalledAtIndels;
}
public static int count = 0;
public boolean discordantP(Variation dbSNP, Variation eval) {
if (eval != null) {
char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : eval.getReference();
if (dbSNP != null && dbSNP.isSNP()) {
boolean val = !dbSNP.getAlternateBases().contains(String.valueOf(alt));
if (val) {
//System.err.print(eval.getLocation());
//System.err.print(" eval.alt = " + alt);
//System.err.println(" dbsnp.bases = " + dbSNP.getAlternateBases());
//System.err.println(val);
//System.err.println(count);
}
//else System.err.println("not count == " + count++);
return val;
}
}
return false;
}
@ -86,17 +121,19 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
return nConcordant() / (1.0 * nOverlappingSites());
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
// There are four cases here:
AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(dbName, null);
Variation dbsnp = (Variation) tracker.lookup(dbName, null);
boolean isSNP = dbsnp != null && dbsnp.isSNP();
inc(dbsnp, eval);
if ( dbsnp != null && eval != null ) {
if ( dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval) ) {
if (dbsnp != null && eval != null) {
if (dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval)) {
return String.format("Discordant [DBSNP %s] [EVAL %s]", dbsnp, eval);
} else if ( dbsnp.isIndel() && eval.isSNP() ) {
return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getGenotype().get(0), eval);
} else if (dbsnp.isIndel() && eval.isSNP()) {
return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getAlternateBases(), eval);
} else {
return null;
// return "Novel " + eval;
@ -128,10 +165,10 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
s.add(String.format("n_concordant %d", nConcordant()));
s.add(String.format("n_novel_sites %d", nNovelSites()));
s.add(String.format("percent_eval_sites_in_db %.2f", 100*fractionEvalSitesCoveredByDB()));
s.add(String.format("concordance_rate %.2f", 100*concordanceRate()));
s.add(String.format("percent_eval_sites_in_db %.2f", 100 * fractionEvalSitesCoveredByDB()));
s.add(String.format("concordance_rate %.2f", 100 * concordanceRate()));
s.add(String.format("percent_db_sites_in_eval %.2f", 100*fractionDBSitesDiscoveredInEval()));
s.add(String.format("percent_db_sites_in_eval %.2f", 100 * fractionDBSitesDiscoveredInEval()));
s.add(String.format("n_snp_calls_at_indels %d", nSNPsAtIndels()));
s.add(String.format("percent_calls_at_indels %.2f", nSNPsAtIndels() / (0.01 * nEvalSites())));

View File

@ -2,14 +2,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes;
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.io.File;
import java.io.FileNotFoundException;
@ -28,8 +26,8 @@ import java.util.*;
*
*/
@By(DataSource.REFERENCE)
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=AllelicVariant.class)})
@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=AllelicVariant.class), @RMD(name="dbsnp",type=AllelicVariant.class),@RMD(name="hapmap-chip",type=AllelicVariant.class), @RMD(name="interval",type=IntervalRod.class)})
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=RodGeliText.class)}) // right now we have no base variant class for rods, this should change
@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=RodGeliText.class), @RMD(name="dbsnp",type=rodDbSNP.class),@RMD(name="hapmap-chip",type=RodGenotypeChipAsGFF.class), @RMD(name="interval",type=IntervalRod.class)})
public class VariantEvalWalker extends RefWalker<Integer, Integer> {
@Argument(shortName="minConfidenceScore", doc="Minimum confidence score to consider an evaluation SNP a variant", required=false)
public int minConfidenceScore = -1;
@ -181,19 +179,18 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nSites++;
// Iterate over each analysis, and update it
AllelicVariant eval = (AllelicVariant)tracker.lookup("eval", null);
Variation eval = (Variation)tracker.lookup("eval", null);
//if ( eval!=null ) System.out.printf("Eval: %f %d %b%n", eval.getVariationConfidence(), minDiscoveryQ, eval.getVariationConfidence() < minDiscoveryQ);
if ( eval != null ) {
// comment to disable variant filtering by confidence score
if ( evalContainsGenotypes ) {
// Genotyping - use best vs. next best lod
if ( eval.getConsensusConfidence() < minConfidenceScore ) eval = null;
if ( eval.getNegLog10PError() < minConfidenceScore ) eval = null;
} else {
// Variant discovery - use best vs. reference lod
if ( Math.abs(eval.getVariationConfidence()) < minConfidenceScore ) eval = null;
// Variation discovery - use best vs. reference lod
if ( Math.abs(eval.getNegLog10PError()) < minConfidenceScore ) eval = null;
}
}
@ -202,7 +199,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
// update the known / novel set by checking whether the knownSNPDBName track has an entry here
if ( eval != null ) {
AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(knownSNPDBName, null);
Variation dbsnp = (Variation)tracker.lookup(knownSNPDBName, null);
String noveltySet = dbsnp == null ? NOVEL_SNPS : KNOWN_SNPS;
updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context);
}
@ -219,7 +216,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
}
public void updateAnalysisSet(final String analysisSetName, AllelicVariant eval,
public void updateAnalysisSet(final String analysisSetName, Variation eval,
RefMetaDataTracker tracker, char ref, AlignmentContext context) {
// Iterate over each analysis, and update it
if ( getAnalysisSet(analysisSetName) != null ) {

View File

@ -1,8 +1,9 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.genotype.Variation;
/**
* The Broad Institute
@ -22,7 +23,7 @@ public class VariantMatcher extends BasicVariantAnalysis implements GenotypeAnal
dbName = name;
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
String r = null;
AllelicVariant db = (AllelicVariant)tracker.lookup(dbName, null);

View File

@ -20,7 +20,7 @@ public class GenotypeUtils {
// VariantType() {}
}
/** This method accepts rods that implement either Genotype or GenotypeList interface (all others will result in an exception). Variant
/** This method accepts rods that implement either Genotype or GenotypeList interface (all others will result in an exception). Variant
* (Genotype object) of the specified type (point mutation or indel) will be extracted from GenotypeList rod if such variant exists, or the rod itself
* will be typecasted and returned back if it implements Genotype and represents the specified variant type. If the last argument is false, then
* null will be returned in all other cases. If the last argument is true and either a) rod is a GenotypeList that lacks a call of the specified type, but call

View File

@ -0,0 +1,143 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
*
* @author aaron
*
* Class BasicGenotype
*
* represents a basic genotype object
*/
public class BasicGenotype implements Genotype {
// the genotype string
private String mGenotype;
private GenomeLoc mLocation;
private char mRef;
private double mNetLog10PError;
/**
* create a basic genotype
* @param genotype
*/
public BasicGenotype(GenomeLoc location, String genotype, char ref, double netLog10PError) {
mNetLog10PError = netLog10PError;
mGenotype = genotype;
mLocation = location;
mRef = ref;
}
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
@Override
public double getNegLog10PError() {
return mNetLog10PError;
}
/**
* get the bases that represent this
*
* @return the bases, as a string
*/
@Override
public String getBases() {
return mGenotype;
}
/**
* get the ploidy
*
* @return the ploidy value
*/
@Override
public int getPloidy() {
return mGenotype.length();
}
/**
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
*
* @return true if we're homozygous, false otherwise
*/
@Override
public boolean isHom() {
if (mGenotype.length() < 1)
return false;
char base = mGenotype.charAt(0);
for (char cur : mGenotype.toCharArray()) {
if (base != cur) {
return false;
}
}
return true;
}
/**
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
*
* @return true if we're het, false otherwise
*/
@Override
public boolean isHet() {
return !isHom();
}
/**
* get the genotype's location
*
* @return a GenomeLoc representing the location
*/
@Override
public GenomeLoc getLocation() {
return mLocation;
}
/**
* returns true if the genotype is a point genotype, false if it's a indel / deletion
*
* @return true is a SNP
*/
@Override
public boolean isPointGenotype() {
return true;
}
/**
* given the reference, are we a variant? (non-ref)
*
* @param ref the reference base or bases
*
* @return true if we're a variant
*/
@Override
public boolean isVariant(char ref) {
return !(mGenotype.charAt(0) == ref && isHom());
}
/**
* get the reference base.
*
* @return a character, representing the reference base
*/
@Override
public char getReference() {
return mRef;
}
/**
* return this genotype as a variant
*
* @return the variant
*/
@Override
public Variation toVariation() {
if (!isVariant(this.mRef)) throw new IllegalStateException("this genotype is not a variant");
return new BasicVariation(this.getBases(),mRef,this.getBases().length(),mLocation,mNetLog10PError);
}
}

View File

@ -8,7 +8,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
* User: aaronmckenna
* Date: Sep 9, 2009
* Time: 9:32:34 PM
*
* <p/>
* a basic implementation of variant
*/
public class BasicVariation implements Variation {
@ -18,12 +18,14 @@ public class BasicVariation implements Variation {
protected final int mLength;
protected final GenomeLoc mLocation;
protected final double mConfidence;
/**
* the constructor
* @param bases the bases that this variant represents
*
* @param bases the bases that this variant represents
* @param reference the reference base
* @param length are we a single base variant, or a indel/deletion? length is negitive for an indel,
* positive for a indel, and 0 for a substitution
* @param length are we a single base variant, or a indel/deletion? length is negitive for an indel,
* positive for a indel, and 0 for a substitution
*/
public BasicVariation(String bases, char reference, int length, GenomeLoc location, double confidence) {
mBases = bases;
@ -40,14 +42,14 @@ public class BasicVariation implements Variation {
@Override
public VARIANT_TYPE getType() {
if (mLength >0) return VARIANT_TYPE.INDEL;
if (mLength <0) return VARIANT_TYPE.DELETION;
if (mLength > 0) return VARIANT_TYPE.INDEL;
if (mLength < 0) return VARIANT_TYPE.DELETION;
return (isSNP()) ? VARIANT_TYPE.SNP : VARIANT_TYPE.REFERENCE;
}
@Override
public boolean isSNP() {
if (mLength == 0 && Utils.dupString(mRef,2).equals(mBases)) return true;
if (mLength == 0 && Utils.dupString(mRef, 2).equals(mBases)) return true;
return false;
}
@ -62,7 +64,7 @@ public class BasicVariation implements Variation {
}
@Override
public String getBases() {
public String getAlternateBases() {
return mBases;
}
@ -72,19 +74,8 @@ public class BasicVariation implements Variation {
}
@Override
public String getReference() {
return String.valueOf(mRef);
}
@Override
public boolean isHet() {
if (mLength == 0 && mBases.charAt(0) != mBases.charAt(1)) return true;
return false;
}
@Override
public boolean isHom() {
return !isHet();
public char getReference() {
return (mRef);
}
@Override
@ -98,12 +89,29 @@ public class BasicVariation implements Variation {
return false;
}
/**
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
*
* @return true if we're an insertion or deletion
*/
@Override
public char getAlternateBase() {
if (mLength != 0) {
throw new UnsupportedOperationException("Unable to get alternate base for indel / deletion");
}
if (mBases.charAt(0) == mRef) return mBases.charAt(1);
else return mBases.charAt(0);
public boolean isIndel() {
return (isDeletion() || isInsertion());
}
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference())
return getAlternateBases().charAt(1);
return getAlternateBases().charAt(0);
}
}

View File

@ -0,0 +1,18 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
/**
* @author aaron
* <p/>
* Interface VariantBackedByGenotype
* <p/>
* this variant is backed by genotypic information
*/
public interface VariantBackedByGenotype {
/**
* get the likelihoods
* @return an array in lexigraphical order of the likelihoods
*/
public Genotype getGenotype(DiploidGenotype x);
}

View File

@ -46,13 +46,6 @@ public interface Variation {
*/
public boolean isDeletion();
/**
* get the base representation of this Variant
*
* @return a string, of ploidy
*/
public String getBases();
/**
* get the location that this Variant represents
*
@ -65,13 +58,7 @@ public interface Variation {
*
* @return the reference base or bases, as a string
*/
public String getReference();
/** is our base representation heterozygous */
public boolean isHet();
/** is our base representation homozygous */
public boolean isHom();
public char getReference();
/**
* get the -1 * (log 10 of the error value)
@ -87,9 +74,22 @@ public interface Variation {
public boolean isReference();
/**
* gets the alternate base. If this is homref, throws an UnsupportedOperationException
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
* @return
*/
public char getAlternateBase();
public String getAlternateBases();
/**
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
* @return true if we're an insertion or deletion
*/
public boolean isIndel();
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
* @return a char, representing the alternate base
*/
public char getAlternativeBaseForSNP();
}

View File

@ -8,14 +8,12 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.junit.Assert;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
@ -60,38 +58,7 @@ public class RodGLFTest extends BaseTest {
assertEquals(finalRecordCount, counter);
}
/** make sure we're returning true to biallelic */
@Test
public void testIsBiallelic() {
RodGLF glf = iter.next();
Assert.assertFalse(iter.isBiallelic());
}
// best vs next-best
@Test
public void testGetConsensusConfidence() {
RodGLF glf = iter.next();
assertEquals(120.0, iter.getConsensusConfidence(), 0.005);
glf = iter.next();
assertEquals(120.0, iter.getConsensusConfidence(), 0.005);
}
// best vs. ref
@Test
public void testGetVariationConfidence() {
RodGLF glf = iter.next();
assertEquals(0.0, iter.getVariationConfidence(), 0.005);
glf = iter.next();
assertEquals(250.0, iter.getVariationConfidence(), 0.005);
}
@Test
public void testGetGenotype() {
RodGLF glf = iter.next();
List<String> str = iter.getGenotype();
Assert.assertTrue(str.get(0).equals("AA"));
}
@Test
public void testIsSNP() {
@ -112,7 +79,7 @@ public class RodGLFTest extends BaseTest {
glf = iter.next();
Assert.assertFalse(iter.isReference());
}
/*
@Test(expected = IllegalStateException.class)
public void testGetAltSnpFWDIllegalException() {
RodGLF glf = iter.next();
@ -159,7 +126,7 @@ public class RodGLFTest extends BaseTest {
/**
* move to the second and third bases, and check that the
* alternate bases are correct.
*/
*
@Test
public void testGetAltBasesFWD() {
RodGLF glf = iter.next();
@ -169,7 +136,7 @@ public class RodGLFTest extends BaseTest {
Assert.assertTrue("CT".equals(iter.getAltBasesFWD()));
}
*/
@Test
public void testRodLocations() {
GenomeLoc loc = null;
@ -177,7 +144,7 @@ public class RodGLFTest extends BaseTest {
RodGLF glf = iter.next();
if (loc != null) {
if (iter.getLocation().isBefore(loc)) {
fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + iter.getLocation().toString());
Assert.fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + iter.getLocation().toString());
}
}
loc = iter.getLocation();

View File

@ -0,0 +1,62 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.FileNotFoundException;
/**
*
* @author aaron
*
* Class BasicGenotypeTest
*
* tests the basic genotype class
*/
public class BasicGenotypeTest extends BaseTest {
private static IndexedFastaSequenceFile seq;
@BeforeClass
public static void beforeTests() {
try {
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
} catch (FileNotFoundException e) {
throw new StingException("unable to load the sequence dictionary");
}
GenomeLocParser.setupRefContigOrdering(seq);
}
@Test
public void testBasicGenotypeIsHom() {
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
Assert.assertTrue(gt.isHom());
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
Assert.assertTrue(!gt2.isHom());
}
@Test
public void testBasicGenotypeIsHet() {
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
Assert.assertTrue(!gt.isHet());
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
Assert.assertTrue(gt2.isHet());
}
@Test
public void testBasicGenotypeIsVariant() {
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
Assert.assertTrue(!gt.isVariant('A'));
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
Assert.assertTrue(gt2.isVariant('A'));
BasicGenotype gt3 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"TT",'A',0);
Assert.assertTrue(gt3.isVariant('A'));
}
}