Changes to switch Variant Eval over to the new Variation system.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1611 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-14 05:34:33 +00:00
parent 0feee9cdfd
commit e03fccb223
26 changed files with 825 additions and 416 deletions

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.glf.GLFReader; import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord; import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall; import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
@ -13,9 +14,7 @@ import org.broadinstitute.sting.utils.genotype.glf.VariableLengthCall;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
/** /**
@ -25,7 +24,7 @@ import java.util.List;
* <p/> * <p/>
* the rod class for GLF data. * the rod class for GLF data.
*/ */
public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<RodGLF> { public class RodGLF implements ReferenceOrderedDatum, Variation, Iterator<RodGLF> {
static int count = 0; static int count = 0;
public GLFReader mReader; public GLFReader mReader;
private final String mName; private final String mName;
@ -38,6 +37,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
/** /**
* get the name * get the name
*
* @return the name * @return the name
*/ */
@Override @Override
@ -64,27 +64,25 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
return toString(); return toString();
} }
/** /** @return a string representation of the ROD GLF object */
* @return a string representation of the ROD GLF object
*/
public String toString() { public String toString() {
return String.format("%s\t%d\t%s\t%d\t%d\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f", return String.format("%s\t%d\t%s\t%d\t%d\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f",
mLoc.getContig(), mLoc.getContig(),
mLoc.getStart(), mLoc.getStart(),
mRecord.getRefBase(), mRecord.getRefBase(),
mRecord.getReadDepth(), mRecord.getReadDepth(),
mRecord.getRmsMapQ(), mRecord.getRmsMapQ(),
getBestGenotypeValue(1), getBestGenotypeValue(1),
((SinglePointCall)mRecord).getLikelihoods()[0], ((SinglePointCall) mRecord).getLikelihoods()[0],
((SinglePointCall)mRecord).getLikelihoods()[1], ((SinglePointCall) mRecord).getLikelihoods()[1],
((SinglePointCall)mRecord).getLikelihoods()[2], ((SinglePointCall) mRecord).getLikelihoods()[2],
((SinglePointCall)mRecord).getLikelihoods()[3], ((SinglePointCall) mRecord).getLikelihoods()[3],
((SinglePointCall)mRecord).getLikelihoods()[4], ((SinglePointCall) mRecord).getLikelihoods()[4],
((SinglePointCall)mRecord).getLikelihoods()[5], ((SinglePointCall) mRecord).getLikelihoods()[5],
((SinglePointCall)mRecord).getLikelihoods()[6], ((SinglePointCall) mRecord).getLikelihoods()[6],
((SinglePointCall)mRecord).getLikelihoods()[7], ((SinglePointCall) mRecord).getLikelihoods()[7],
((SinglePointCall)mRecord).getLikelihoods()[8], ((SinglePointCall) mRecord).getLikelihoods()[8],
((SinglePointCall)mRecord).getLikelihoods()[9] ((SinglePointCall) mRecord).getLikelihoods()[9]
); );
@ -107,6 +105,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
/** /**
* return a genome loc representing the current location * return a genome loc representing the current location
*
* @return the geonome loc * @return the geonome loc
*/ */
@Override @Override
@ -115,62 +114,15 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
} }
/** /**
* Returns bases in the reference allele as a String. String can be empty (as in insertion into * get the reference base(s) at this position
* the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters
* (for longer indels).
* *
* @return reference allele, forward strand * @return the reference base or bases, as a string
*/ */
@Override @Override
public String getRefBasesFWD() { public char getReference() {
if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) return "";
return String.valueOf(mRecord.getRefBase());
}
/**
* Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException
* if variant is not a SNP.
*
* This doesn't make much sense to me, what if the best genotype is hom non-ref?
*
* @return reference base on the forward strand
*/
@Override
public char getRefSnpFWD() throws IllegalStateException {
if (!isSNP()) throw new IllegalStateException("Current GLF Record is not a SNP");
return mRecord.getRefBase().toChar(); return mRecord.getRefBase().toChar();
} }
/**
* Returns bases in the alternative allele as a String. String can be empty (as in deletion from
* the reference), can contain a single character (as in SNP or one-base insertion), or multiple characters
* (for longer indels).
*
* @return alternative allele, forward strand
*/
@Override
public String getAltBasesFWD() {
return getBestGenotype(2).toString();
}
/**
* Returns alternative (minor) allele base for a SNP variant as a character; should throw IllegalStateException
* if variant is not a SNP.
*
* @return alternative allele base on the forward starnd
*/
@Override
public char getAltSnpFWD() throws IllegalStateException {
if (!isSNP()) {
throw new IllegalStateException("Not a SNP");
}
String str = getBestGenotype(1).toString();
if (String.valueOf(str.charAt(0)).equals(mRecord.getRefBase().toString())) {
return str.charAt(1);
}
return str.charAt(0);
}
/** /**
* Returns true if all observed alleles are reference alleles. All is<Variant> methods (where Variant=SNP,Insertion, etc) should * Returns true if all observed alleles are reference alleles. All is<Variant> methods (where Variant=SNP,Insertion, etc) should
* return false at such site to ensure consistency. This method is included for use with genotyping calls (isGenotype()==true), it makes * return false at such site to ensure consistency. This method is included for use with genotyping calls (isGenotype()==true), it makes
@ -183,6 +135,31 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
return (!isSNP()); return (!isSNP());
} }
/**
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
*
* @return true if we're an insertion or deletion
*/
@Override
public boolean isIndel() {
return (isDeletion() || isInsertion());
}
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference())
return getAlternateBases().charAt(1);
return getAlternateBases().charAt(0);
}
/** /**
* Is this variant a SNP? * Is this variant a SNP?
* *
@ -196,7 +173,9 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
/** /**
* return a string representing the reference * return a string representing the reference
*
* @param ref the reference character * @param ref the reference character
*
* @return a string for the homozygous ref in a diploid * @return a string for the homozygous ref in a diploid
*/ */
private static String refString(char ref) { private static String refString(char ref) {
@ -213,7 +192,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
*/ */
public LikelihoodObject.GENOTYPE getBestGenotype(int nthBest) { public LikelihoodObject.GENOTYPE getBestGenotype(int nthBest) {
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods()); Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
return LikelihoodObject.GENOTYPE.values()[sorted[nthBest-1]]; return LikelihoodObject.GENOTYPE.values()[sorted[nthBest - 1]];
} }
/** /**
@ -226,7 +205,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
*/ */
public double getBestGenotypeValue(int nthBest) { public double getBestGenotypeValue(int nthBest) {
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods()); Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
return (((SinglePointCall) mRecord).getLikelihoods())[sorted[nthBest-1]]; return (((SinglePointCall) mRecord).getLikelihoods())[sorted[nthBest - 1]];
} }
/** /**
@ -254,16 +233,13 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
} }
/** /**
* Is this variant an insertion or a deletion? The contract requires * get the base representation of this Variant
* this to be true if either isInsertion() or isDeletion() returns true. However,
* this method is currently allowed to return true even if neither of isInsertion()
* and isDeletion() does.
* *
* @return * @return a string, of ploidy
*/ */
@Override @Override
public boolean isIndel() { public String getAlternateBases() {
return (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE); return this.getBestGenotype(0).toString();
} }
/** /**
@ -272,31 +248,19 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
* @return * @return
*/ */
@Override @Override
public double getMAF() { public double getNonRefAlleleFrequency() {
return 0; return 0;
} }
/** /** @return the VARIANT_TYPE of the current variant */
* Returns heterozygosity, a more accessible general feature of a variant.
*
* @return
*/
@Override @Override
public double getHeterozygosity() { public VARIANT_TYPE getType() {
return 0; if (this.isSNP()) return VARIANT_TYPE.SNP;
else if (this.isInsertion()) return VARIANT_TYPE.INDEL;
else if (this.isDeletion()) return VARIANT_TYPE.DELETION;
else return VARIANT_TYPE.REFERENCE;
} }
/**
* Is this variant an actual genotype (such as individual call from sequencing, HapMap chip etc), or
* population allelic variant (call from pooled sequencing, dbSNP site etc). Only if variant is a genotype, there
* is a meaningful question of, e.g., whether it is a het, or homogeneous non-ref.
*
* @return true if this variant is an actual genotype.
*/
@Override
public boolean isGenotype() {
return true;
}
/** /**
* Returns phred-mapped confidence in variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref). * Returns phred-mapped confidence in variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref).
@ -304,66 +268,20 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
* @return * @return
*/ */
@Override @Override
public double getVariationConfidence() { public double getNegLog10PError() {
String ref = new String() + mRecord.getRefBase() + mRecord.getRefBase(); String ref = new String() + mRecord.getRefBase() + mRecord.getRefBase();
int index = 0; int index = 0;
for (LikelihoodObject.GENOTYPE g: LikelihoodObject.GENOTYPE.values()) { for (LikelihoodObject.GENOTYPE g : LikelihoodObject.GENOTYPE.values()) {
if (g.toString().equals(ref)) break; if (g.toString().equals(ref)) break;
index++; index++;
} }
return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall)mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR; return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall) mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
} }
/** public int length() {
* Returns phred-mapped confidence in called genotype (e.g. MAQ's consensus confidence, or AlleleCaller's return 1;
* best vs next-best.
*
* @return
*/
@Override
public double getConsensusConfidence() {
return Math.abs(getBestGenotypeValue(1) - getBestGenotypeValue(2)) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
} }
/**
* Returns actual observed genotype. Allowed to return more than two alleles (@see #getPloidy()). If this variant
* is not a genotype, should throw an IllegalStateException.
*
* @return
*/
@Override
public List<String> getGenotype() throws IllegalStateException {
List<String> ret = new ArrayList<String>();
ret.add(this.getBestGenotype(1).toString());
return ret;
}
/**
* Return actual number of observed alleles (chromosomes) in the genotype. If this variant is not a genotype,
* should throw IllegalStateException.
*
* @return
*/
@Override
public int getPloidy() throws IllegalStateException {
return 2; // we're so haploid it hurts
}
/**
* Returns true if the site has at most two known or observed alleles (ref and non-ref), or false if there are > 2 allelic variants known or observed. When
* the implementing class is a genotype, alleles should be always counted including the reference allele whether it was observed in the particular
* individual or not: i.e. if the reference is 'C', then both 'CA' and 'AA' genotypes must be reported as bi-allelic, while 'AT' is <i>not</i> bi-allelic (since there are
* two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C').
*
* @return
*/
@Override
public boolean isBiallelic() {
return false;
}
public int length() { return 1; }
@Override @Override
public int compareTo(ReferenceOrderedDatum that) { public int compareTo(ReferenceOrderedDatum that) {
return this.mLoc.compareTo(that.getLocation()); return this.mLoc.compareTo(that.getLocation());
@ -371,8 +289,10 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
/** /**
* the parse line, which is not used by the GLF rod * the parse line, which is not used by the GLF rod
*
* @param header the header to pass in * @param header the header to pass in
* @param parts the string object * @param parts the string object
*
* @return false, alwayss * @return false, alwayss
* @throws java.io.IOException * @throws java.io.IOException
*/ */

View File

@ -25,14 +25,19 @@
package org.broadinstitute.sting.gatk.refdata; package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.io.IOException; import java.io.IOException;
import java.util.List;
import java.util.Arrays; import java.util.Arrays;
import java.util.List;
public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVariant { public class RodGeliText extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, AllelicVariant {
public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
public GenomeLoc loc; public GenomeLoc loc;
public char refBase = 'N'; public char refBase = 'N';
@ -96,6 +101,27 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
public GenomeLoc getLocation() { return loc; } public GenomeLoc getLocation() { return loc; }
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
@Override
public char getReference() {
return this.refBase;
}
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
@Override
public double getNegLog10PError() {
return Math.abs(lodBtr);
}
public String getRefBasesFWD() { public String getRefBasesFWD() {
return String.format("%c", getRefSnpFWD()); return String.format("%c", getRefSnpFWD());
} }
@ -118,8 +144,24 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
return refBase == bestGenotype.charAt(0) && refBase == bestGenotype.charAt(1); return refBase == bestGenotype.charAt(0) && refBase == bestGenotype.charAt(1);
} }
/**
* get the frequency of this variant
*
* @return VariantFrequency with the stored frequency
*/
@Override
public double getNonRefAlleleFrequency() {
return 1.0;
}
/** @return the VARIANT_TYPE of the current variant */
@Override
public VARIANT_TYPE getType() {
return VARIANT_TYPE.SNP;
}
public boolean isSNP() { public boolean isSNP() {
return !isReference(); return (!bestGenotype.equals(Utils.dupString(this.getReference(),2)));
} }
public boolean isInsertion() { public boolean isInsertion() {
@ -130,10 +172,35 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
return false; return false;
} }
/**
* get the base representation of this Variant
*
* @return a string, of ploidy
*/
@Override
public String getAlternateBases() {
return this.bestGenotype;
}
public boolean isIndel() { public boolean isIndel() {
return false; return false;
} }
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if(this.bestGenotype.toString().charAt(0) == this.getReference())
return this.bestGenotype.toString().charAt(1);
return this.bestGenotype.toString().charAt(0);
}
public double getMAF() { public double getMAF() {
return 0; return 0;
} }
@ -253,4 +320,15 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements AllelicVa
public int getPloidy() throws IllegalStateException { return 2; } public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; } public boolean isBiallelic() { return true; }
*/ */
/**
* get the likelihoods
*
* @return an array in lexigraphical order of the likelihoods
*/
@Override
public org.broadinstitute.sting.utils.genotype.Genotype getGenotype(DiploidGenotype x) {
if (x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
return new BasicGenotype(this.getLocation(),this.bestGenotype,this.getRefSnpFWD(),this.getConsensusConfidence());
}
} }

View File

@ -3,7 +3,9 @@ package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import java.util.*; import java.util.*;
import java.util.regex.MatchResult; import java.util.regex.MatchResult;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -16,7 +18,7 @@ import java.util.regex.Pattern;
* Time: 10:47:14 AM * Time: 10:47:14 AM
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements AllelicVariant { public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements AllelicVariant, Variation, VariantBackedByGenotype {
private String contig, source, feature, strand, frame; private String contig, source, feature, strand, frame;
private long start, stop; private long start, stop;
private double score; private double score;
@ -74,6 +76,26 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
return GenomeLocParser.parseGenomeLoc(contig, start, stop); return GenomeLocParser.parseGenomeLoc(contig, start, stop);
} }
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
@Override
public char getReference() {
return 'N';
}
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
@Override
public double getNegLog10PError() {
return 4; // 1/10000 error
}
public String getAttribute(final String key) { public String getAttribute(final String key) {
return attributes.get(key); return attributes.get(key);
} }
@ -158,10 +180,49 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
public String getAltBasesFWD() { return null; } public String getAltBasesFWD() { return null; }
public char getAltSnpFWD() throws IllegalStateException { return 0; } public char getAltSnpFWD() throws IllegalStateException { return 0; }
public boolean isReference() { return ! isSNP(); } public boolean isReference() { return ! isSNP(); }
/**
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
*
* @return
*/
@Override
public String getAlternateBases() {
return this.feature;
}
/**
* get the frequency of this variant
*
* @return VariantFrequency with the stored frequency
*/
@Override
public double getNonRefAlleleFrequency() {
return this.getMAF();
}
/** @return the VARIANT_TYPE of the current variant */
@Override
public VARIANT_TYPE getType() {
return VARIANT_TYPE.SNP;
}
public boolean isSNP() { return false; } public boolean isSNP() { return false; }
public boolean isInsertion() { return false; } public boolean isInsertion() { return false; }
public boolean isDeletion() { return false; } public boolean isDeletion() { return false; }
public boolean isIndel() { return false; } public boolean isIndel() { return false; }
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
return this.getAltSnpFWD();
}
public double getMAF() { return 0; } public double getMAF() { return 0; }
public double getHeterozygosity() { return 0; } public double getHeterozygosity() { return 0; }
public boolean isGenotype() { return true; } public boolean isGenotype() { return true; }
@ -175,4 +236,15 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
public int getPloidy() throws IllegalStateException { return 2; } public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; } public boolean isBiallelic() { return true; }
public int length() { return 1; } public int length() { return 1; }
/**
* get the likelihoods
*
* @return an array in lexigraphical order of the likelihoods
*/
@Override
public Genotype getGenotype(DiploidGenotype x) {
if (!x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
return new BasicGenotype(this.getLocation(),this.feature,this.getRefSnpFWD(),this.getConsensusConfidence());
}
} }

View File

@ -7,12 +7,15 @@ import java.util.List;
* chr1:1104840 A N 0.000000 -85.341265 -85.341265 0.000000 0.000000 324.000000 162 0 0 * chr1:1104840 A N 0.000000 -85.341265 -85.341265 0.000000 0.000000 324.000000 162 0 0
* chr1:1104841 C N 0.000000 -69.937928 -69.937928 0.000000 0.000000 324.000000 162 0 0 * chr1:1104841 C N 0.000000 -69.937928 -69.937928 0.000000 0.000000 324.000000 162 0 0
* chr1:1104842 A N 0.000000 -84.816002 -84.816002 0.000000 0.000000 324.000000 162 0 0 * chr1:1104842 A N 0.000000 -84.816002 -84.816002 0.000000 0.000000 324.000000 162 0 0
*
*/ */
public interface SNPCallFromGenotypes extends AllelicVariant { public interface SNPCallFromGenotypes extends AllelicVariant {
public int nIndividuals(); public int nIndividuals();
public int nHomRefGenotypes(); public int nHomRefGenotypes();
public int nHetGenotypes(); public int nHetGenotypes();
public int nHomVarGenotypes(); public int nHomVarGenotypes();
public List<Genotype> getGenotypes(); public List<Genotype> getGenotypes();
} }

View File

@ -1,10 +1,12 @@
package org.broadinstitute.sting.gatk.refdata; package org.broadinstitute.sting.gatk.refdata;
import net.sf.picard.util.SequenceUtil; import net.sf.picard.util.SequenceUtil;
import java.util.*;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import java.util.Arrays;
import java.util.List;
/** /**
* Example format: * Example format:
@ -16,7 +18,7 @@ import org.broadinstitute.sting.utils.*;
* Time: 10:47:14 AM * Time: 10:47:14 AM
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVariant { public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, AllelicVariant {
public GenomeLoc loc; // genome location of SNP public GenomeLoc loc; // genome location of SNP
// Reference sequence chromosome or scaffold // Reference sequence chromosome or scaffold
// Start and stop positions in chrom // Start and stop positions in chrom
@ -60,6 +62,26 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
// ---------------------------------------------------------------------- // ----------------------------------------------------------------------
public GenomeLoc getLocation() { return loc; } public GenomeLoc getLocation() { return loc; }
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
@Override
public char getReference() {
return getRefSnpFWD();
}
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
@Override
public double getNegLog10PError() {
return 4; // -log10(0.0001)
}
public boolean onFwdStrand() { public boolean onFwdStrand() {
return strand.equals("+"); return strand.equals("+");
} }
@ -108,10 +130,24 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
} }
public String getAllelesFWDString() { public String getAllelesFWDString() {
return Utils.join("/", getAllelesFWD()); return Utils.join("", getAllelesFWD());
} }
// ---------------------------------------------------------------------- /**
* get the frequency of this variant
*
* @return VariantFrequency with the stored frequency
*/
@Override
public double getNonRefAlleleFrequency() {
return 0; //To change body of implemented methods use File | Settings | File Templates.
}
/** @return the VARIANT_TYPE of the current variant */
@Override
public VARIANT_TYPE getType() {
return VARIANT_TYPE.SNP;
}// ----------------------------------------------------------------------
// //
// What kind of variant are we? // What kind of variant are we?
// //
@ -119,7 +155,34 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
public boolean isSNP() { return varType.contains("single"); } public boolean isSNP() { return varType.contains("single"); }
public boolean isInsertion() { return varType.contains("insertion"); } public boolean isInsertion() { return varType.contains("insertion"); }
public boolean isDeletion() { return varType.contains("deletion"); } public boolean isDeletion() { return varType.contains("deletion"); }
/**
* get the base representation of this Variant
*
* @return a string, of ploidy
*/
@Override
public String getAlternateBases() {
return getAllelesFWDString();
}
public boolean isIndel() { return isInsertion() || isDeletion() || varType.contains("in-del"); } public boolean isIndel() { return isInsertion() || isDeletion() || varType.contains("in-del"); }
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
return getAltSnpFWD(); /*
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference())
return getAlternateBases().charAt(1);
return getAlternateBases().charAt(0); */
}
public boolean isReference() { return false; } // snp locations are never "reference", there's always a variant public boolean isReference() { return false; } // snp locations are never "reference", there's always a variant
public boolean isHapmap() { return validationStatus.contains("by-hapmap"); } public boolean isHapmap() { return validationStatus.contains("by-hapmap"); }
@ -147,7 +210,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
if ( isIndel() ) s += ":Indel"; if ( isIndel() ) s += ":Indel";
if ( isHapmap() ) s += ":Hapmap"; if ( isHapmap() ) s += ":Hapmap";
if ( is2Hit2Allele() ) s += ":2Hit"; if ( is2Hit2Allele() ) s += ":2Hit";
return s; return s;
} }
public String repl() { public String repl() {
@ -213,7 +276,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
} }
public double getMAF() { public double getMAF() {
// Fixme: update to actually get MAF // Fixme: update to actually get MAF
//return avHet; //return avHet;
return -1; return -1;
} }
@ -244,4 +307,14 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
public int length() { return (int)(loc.getStop() - loc.getStart() + 1); } public int length() { return (int)(loc.getStop() - loc.getStart() + 1); }
/**
* get the likelihoods
*
* @return an array in lexigraphical order of the likelihoods
*/
@Override
public org.broadinstitute.sting.utils.genotype.Genotype getGenotype(DiploidGenotype x) {
if (x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
return new BasicGenotype(this.getLocation(),this.getAltBasesFWD(),this.getRefSnpFWD(),this.getConsensusConfidence());
}
} }

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.filters;
import org.broadinstitute.sting.gatk.contexts.VariantContext; import org.broadinstitute.sting.gatk.contexts.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RodGeliText; import org.broadinstitute.sting.gatk.refdata.RodGeliText;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
@ -50,7 +51,7 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant); Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest; boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest;
boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet(variant); boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet((AllelicVariant)variant);
exclude = excludable && highGenotypeConfidence && pointEstimateExclude(counts); exclude = excludable && highGenotypeConfidence && pointEstimateExclude(counts);
// //
// for printing only // for printing only

View File

@ -1,12 +1,12 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval; package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/** /**
* The Broad Institute * The Broad Institute
@ -65,5 +65,5 @@ public abstract class BasicVariantAnalysis implements VariantAnalysis {
} }
public abstract String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context); public abstract String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context);
} }

View File

@ -1,27 +1,30 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval; package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.Utils;
import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/** /**
* The Broad Institute * The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT * SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the * This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
* * <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither * This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/ */
public class CallableBasesAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis { public class CallableBasesAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis {
long all_bases = 0; long all_bases = 0;
long all_calls = 0; long all_calls = 0;
//final static double[] Qthresholds = { 10, 20, 30, 40, 50, 100, 200, 500, 1000 }; //final static double[] Qthresholds = { 10, 20, 30, 40, 50, 100, 200, 500, 1000 };
final static double[] thresholds = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100 }; final static double[] thresholds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100};
long[] discoverable_bases = new long[thresholds.length]; long[] discoverable_bases = new long[thresholds.length];
long[] genotypable_bases = new long[thresholds.length]; long[] genotypable_bases = new long[thresholds.length];
@ -29,36 +32,54 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot
super("callable_bases"); super("callable_bases");
} }
public long nSites() { return all_bases; } public long nSites() {
public long nCalls() { return all_calls; } return all_bases;
public long nDiscoverable(int index) { return discoverable_bases[index]; } }
public double percentDiscoverable(int index) { return (100.0*nDiscoverable(index)) / nSites(); }
public long nGenotypable(int index) { return genotypable_bases[index]; }
public double percentGenotypable(int index) { return (100.0*nGenotypable(index)) / nSites(); }
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { public long nCalls() {
return all_calls;
}
public long nDiscoverable(int index) {
return discoverable_bases[index];
}
public double percentDiscoverable(int index) {
return (100.0 * nDiscoverable(index)) / nSites();
}
public long nGenotypable(int index) {
return genotypable_bases[index];
}
public double percentGenotypable(int index) {
return (100.0 * nGenotypable(index)) / nSites();
}
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
all_bases++; all_bases++;
if ( eval == null ) // no data here! if (eval == null) // no data here!
return null; return null;
// we actually have a record here // we actually have a record here
if ( ! eval.isGenotype() ) { // evaluation record isn't a genotype, die! if (!(eval instanceof VariantBackedByGenotype)) { // evaluation record isn't a genotype, die!
throw new RuntimeException("Evaluation track isn't an Genotype!"); throw new RuntimeException("Evaluation track isn't an Genotype!");
} }
all_calls++; all_calls++;
// For every threshold, updated discoverable and callable // For every threshold, updated discoverable and callable
for ( int i = 0; i < thresholds.length; i++ ) { for (int i = 0; i < thresholds.length; i++) {
double threshold = thresholds[i]; double threshold = thresholds[i];
DiploidGenotype g = DiploidGenotype.valueOf(Utils.dupString(ref, 2));
Genotype genotype = ((VariantBackedByGenotype) eval).getGenotype(g);
// update discoverable // update discoverable
if ( eval.isSNP() && eval.getVariationConfidence() >= threshold ) if (eval.isSNP() && eval.getNegLog10PError() >= threshold)
discoverable_bases[i]++; discoverable_bases[i]++;
if ( eval.isReference() && eval.getConsensusConfidence() >= threshold ) if (!eval.isSNP() && genotype.getNegLog10PError() >= threshold)
discoverable_bases[i]++; discoverable_bases[i]++;
if ( eval.getConsensusConfidence() >= threshold ) if (genotype.getNegLog10PError() >= threshold)
genotypable_bases[i]++; genotypable_bases[i]++;
//System.out.printf("Updating %s SNP=%b, REF=%b VarConf=%f ConConf=%f where threshold=%f: discoverable = %d, genotypable = %d%n", //System.out.printf("Updating %s SNP=%b, REF=%b VarConf=%f ConConf=%f where threshold=%f: discoverable = %d, genotypable = %d%n",
@ -76,7 +97,7 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot
s.add(String.format("")); s.add(String.format(""));
s.add(String.format("confidence_threshold\tdiscoverable_bases\tdiscoverable_bases_percent\tgenotypable_bases\tgenotypable_bases_percent")); s.add(String.format("confidence_threshold\tdiscoverable_bases\tdiscoverable_bases_percent\tgenotypable_bases\tgenotypable_bases_percent"));
for ( int i = 0; i < thresholds.length; i++ ) { for (int i = 0; i < thresholds.length; i++) {
double threshold = thresholds[i]; double threshold = thresholds[i];
s.add(String.format("%6.2f\t%d\t%.6f\t%d\t%.6f", threshold, nDiscoverable(i), percentDiscoverable(i), nGenotypable(i), percentGenotypable(i))); s.add(String.format("%6.2f\t%d\t%.6f\t%d\t%.6f", threshold, nDiscoverable(i), percentDiscoverable(i), nGenotypable(i), percentGenotypable(i)));
} }

View File

@ -1,14 +1,14 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval; package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
import java.util.HashSet; import java.util.HashSet;
import java.util.List;
/** /**
* The Broad Institute * The Broad Institute
@ -24,7 +24,7 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno
ArrayList<HashSet<GenomeLoc>> variantsWithClusters; ArrayList<HashSet<GenomeLoc>> variantsWithClusters;
int minDistanceForFlagging = 5; int minDistanceForFlagging = 5;
int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100}; int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100};
AllelicVariant lastVariant = null; Variation lastVariation = null;
GenomeLoc lastVariantInterval = null; GenomeLoc lastVariantInterval = null;
int nSeen = 0; int nSeen = 0;
@ -36,16 +36,16 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno
variantsWithClusters.add(new HashSet<GenomeLoc>()); variantsWithClusters.add(new HashSet<GenomeLoc>());
} }
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
String r = null; String r = null;
if ( eval != null && eval.isSNP() ) { if ( eval != null && eval.isSNP() ) {
IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null); IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null);
GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation(); GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation();
if (lastVariant != null) { if (lastVariation != null) {
GenomeLoc eL = eval.getLocation(); GenomeLoc eL = eval.getLocation();
GenomeLoc lvL = lastVariant.getLocation(); GenomeLoc lvL = lastVariation.getLocation();
if (eL.getContigIndex() == lvL.getContigIndex()) { if (eL.getContigIndex() == lvL.getContigIndex()) {
long d = eL.distance(lvL); long d = eL.distance(lvL);
if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) { if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) {
@ -66,7 +66,7 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno
} }
} }
lastVariant = eval; lastVariation = eval;
lastVariantInterval = interval; lastVariantInterval = interval;
} }

View File

@ -1,21 +1,24 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval; package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/** /**
* The Broad Institute * The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT * SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the * This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
* * <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither * This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/ */
public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis { public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
private String dbName; private String dbName;
@ -35,48 +38,48 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
public GenotypeConcordance(final String name) { public GenotypeConcordance(final String name) {
super("genotype_concordance"); super("genotype_concordance");
dbName = name; dbName = name;
for ( int i = 0; i < 4; i++ ) { for (int i = 0; i < 4; i++) {
truth_totals[i] = 0; truth_totals[i] = 0;
calls_totals[i] = 0; calls_totals[i] = 0;
for ( int j = 0; j < 4; j++ ) for (int j = 0; j < 4; j++)
table[i][j] = 0; table[i][j] = 0;
} }
} }
public void inc(AllelicVariant chip, AllelicVariant eval, char ref) { public void inc(Variation chip, Variation eval, char ref) {
if ( (chip != null && !chip.isGenotype()) || (eval != null && !eval.isGenotype()) ) if ((chip != null && !(chip instanceof VariantBackedByGenotype) || (eval != null && !(eval instanceof VariantBackedByGenotype))))
throw new StingException("Failure: trying to analyze genotypes of non-genotype data"); throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
DiploidGenotype g = DiploidGenotype.valueOf(Utils.dupString(ref, 2));
int truthIndex, callIndex; int truthIndex, callIndex;
if ( chip == null ) if (chip == null)
truthIndex = UNKNOWN; truthIndex = UNKNOWN;
else if ( chip.isReference() && Utils.countOccurrences(ref, chip.getGenotype().get(0)) == chip.getGenotype().get(0).length() ) else if (chip.getAlternateBases().equals(g.toString()))
truthIndex = REF; truthIndex = REF;
else if ( GenotypeUtils.isHet(chip) ) else if (chip.getAlternateBases().charAt(0) != chip.getAlternateBases().charAt(1))
truthIndex = VAR_HET; truthIndex = VAR_HET;
else else
truthIndex = VAR_HOM; truthIndex = VAR_HOM;
// todo -- FIXME on countOccurences if (eval == null)
if ( eval == null )
callIndex = NO_CALL; callIndex = NO_CALL;
else if ( eval.isReference() && Utils.countOccurrences(ref, eval.getGenotype().get(0)) == eval.getGenotype().get(0).length() ) else if (eval.getAlternateBases().equals(g.toString()))
callIndex = REF; callIndex = REF;
else if ( GenotypeUtils.isHet(eval) ) else if (eval.getAlternateBases().charAt(0) != eval.getAlternateBases().charAt(1))
callIndex = VAR_HET; callIndex = VAR_HET;
else else
callIndex = VAR_HOM; callIndex = VAR_HOM;
if ( chip != null || eval != null ) { if (chip != null || eval != null) {
//System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval); //System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
table[truthIndex][callIndex]++; table[truthIndex][callIndex]++;
truth_totals[truthIndex]++; truth_totals[truthIndex]++;
if ( callIndex != NO_CALL ) calls_totals[callIndex]++; if (callIndex != NO_CALL) calls_totals[callIndex]++;
} }
} }
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
AllelicVariant chip = (AllelicVariant)tracker.lookup(dbName, null); Variation chip = (Variation) tracker.lookup(dbName, null);
inc(chip, eval, ref); inc(chip, eval, ref);
return null; return null;
} }
@ -84,24 +87,24 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
private void addCalledGenotypeConcordance(List<String> s) { private void addCalledGenotypeConcordance(List<String> s) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("CALLED_GENOTYPE_CONCORDANCE\t"); sb.append("CALLED_GENOTYPE_CONCORDANCE\t");
for ( int i = 0; i < 4; i++ ) { for (int i = 0; i < 4; i++) {
int nConcordantCallsI = table[i][i]; int nConcordantCallsI = table[i][i];
String value = "N/A"; String value = "N/A";
if ( i != UNKNOWN ) if (i != UNKNOWN)
value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i]-table[UNKNOWN][i])); value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i]));
sb.append(value); sb.append(value);
} }
s.add(sb.toString()); s.add(sb.toString());
} }
/** /**
* How many overall calls where made that aren't NO_CALLS or UNKNOWNS? * How many overall calls where made that aren't NO_CALLS or UNKNOWNS?
*/ */
private int getNCalled() { private int getNCalled() {
int n = 0; int n = 0;
for ( int i = 0; i < 4; i++ ) for (int i = 0; i < 4; i++)
for ( int j = 0; j < 4; j++ ) for (int j = 0; j < 4; j++)
if ( i != NO_CALL && j != NO_CALL ) n += table[i][j]; if (i != NO_CALL && j != NO_CALL) n += table[i][j];
return n; return n;
} }
@ -109,30 +112,30 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
int nConcordantRefCalls = table[REF][REF]; int nConcordantRefCalls = table[REF][REF];
int nConcordantHetCalls = table[VAR_HET][VAR_HET]; int nConcordantHetCalls = table[VAR_HET][VAR_HET];
int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM]; int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM];
int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM]; int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM];
int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls; int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls;
int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls; int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls;
int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM]; int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM];
int nCalled = getNCalled(); int nCalled = getNCalled();
s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar))); s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar)));
s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls))); s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls)));
s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled))); s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled)));
} }
public List<String> done() { public List<String> done() {
List<String> s = new ArrayList<String>(); List<String> s = new ArrayList<String>();
s.add(String.format("name %s", dbName)); s.add(String.format("name %s", dbName));
s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY")); s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY"));
for (int i=0; i < 4; i++) { for (int i = 0; i < 4; i++) {
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
sb.append(String.format("%15s ", TRUTH_NAMES[i])); sb.append(String.format("%15s ", TRUTH_NAMES[i]));
for (int j=0; j < 4; j++) for (int j = 0; j < 4; j++)
sb.append(String.format("%9d ", table[i][j])); sb.append(String.format("%9d ", table[i][j]));
sb.append(String.format("%9d ", truth_totals[i])); sb.append(String.format("%9d ", truth_totals[i]));
if ( i == VAR_HET || i == VAR_HOM ) { if (i == VAR_HET || i == VAR_HOM) {
sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF]+table[i][VAR_HET]+table[i][VAR_HOM]))); sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
sb.append(String.format("%s", cellPercent(truth_totals[i]-table[i][NO_CALL], truth_totals[i]))); sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
} else { } else {
sb.append("\tN/A\t\t\tN/A"); sb.append("\tN/A\t\t\tN/A");
} }
s.add(sb.toString()); s.add(sb.toString());
@ -140,16 +143,16 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
addCalledGenotypeConcordance(s); addCalledGenotypeConcordance(s);
addOverallStats(s); addOverallStats(s);
for (int i=0; i < 4; i++) { for (int i = 0; i < 4; i++) {
for (int j=0; j < 4; j++) { for (int j = 0; j < 4; j++) {
s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j])); s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j]));
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i]))); s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i])));
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j]))); s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j])));
} }
if ( i == VAR_HET || i == VAR_HOM ) { if (i == VAR_HET || i == VAR_HOM) {
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF]+table[i][VAR_HET]+table[i][VAR_HOM]))); s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i]-table[i][NO_CALL], truth_totals[i]))); s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
} }
} }
return s; return s;
@ -158,7 +161,7 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
private static String cellPercent(int count, int total) { private static String cellPercent(int count, int total) {
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
total = Math.max(total, 0); total = Math.max(total, 0);
sb.append(String.format("%.2f", (100.0*count)/total)); sb.append(String.format("%.2f", (100.0 * count) / total));
sb.append("%"); sb.append("%");
return sb.toString(); return sb.toString();
} }

View File

@ -1,17 +1,16 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval; package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import cern.jet.math.Arithmetic;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes; import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation; import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import cern.jet.math.Arithmetic;
/** /**
* The Broad Institute * The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT * SOFTWARE COPYRIGHT NOTICE AGREEMENT
@ -32,7 +31,7 @@ public class HardyWeinbergEquilibrium extends BasicVariantAnalysis implements Po
this.threshold = threshold; this.threshold = threshold;
} }
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
String r = null; String r = null;
if ( eval != null && if ( eval != null &&

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval; package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/** /**
* The Broad Institute * The Broad Institute
@ -29,19 +29,19 @@ public class IndelMetricsAnalysis extends BasicVariantAnalysis implements Genoty
sizes[0][i] = sizes[1][i] = 0; sizes[0][i] = sizes[1][i] = 0;
} }
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
if ( eval != null && eval.isIndel() ) { if ( eval != null && eval.isInsertion() ) {
if ( eval.isInsertion() ) if ( eval.isInsertion() )
insertions++; insertions++;
else if ( eval.isDeletion() ) else if ( eval.isDeletion() )
deletions++; deletions++;
else else
throw new RuntimeException("Variant is indel, but isn't insertion or deletion!"); throw new RuntimeException("Variation is indel, but isn't insertion or deletion!");
if ( eval.length() < 100 ) { if ( eval.getAlternateBases().length() < 100 ) {
sizes[eval.isDeletion() ? 0 : 1][eval.length()]++; sizes[eval.isDeletion() ? 0 : 1][eval.getAlternateBases().length()]++;
if ( eval.length() > maxSize ) if ( eval.getAlternateBases().length() > maxSize )
maxSize = eval.length(); maxSize = eval.getAlternateBases().length();
} }
} }

View File

@ -1,15 +1,15 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval; package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.io.PrintStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.io.PrintStream;
/** /**
* The Broad Institute * The Broad Institute
@ -26,7 +26,7 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge
int minDistanceForFlagging = 5; int minDistanceForFlagging = 5;
int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100, 1000, 10000}; int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100, 1000, 10000};
AllelicVariant lastVariant = null; Variation lastVariation = null;
GenomeLoc lastVariantInterval = null; GenomeLoc lastVariantInterval = null;
PrintStream violationsOut = null; PrintStream violationsOut = null;
@ -35,16 +35,16 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge
neighborWiseDistances = new ArrayList<Long>(); neighborWiseDistances = new ArrayList<Long>();
} }
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
String r = null; String r = null;
if ( eval != null && eval.isSNP() ) { if ( eval != null && eval.isSNP() ) {
IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null); IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null);
GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation(); GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation();
if (lastVariant != null) { if (lastVariation != null) {
GenomeLoc eL = eval.getLocation(); GenomeLoc eL = eval.getLocation();
GenomeLoc lvL = lastVariant.getLocation(); GenomeLoc lvL = lastVariation.getLocation();
if (eL.getContigIndex() == lvL.getContigIndex()) { if (eL.getContigIndex() == lvL.getContigIndex()) {
long d = eL.distance(lvL); long d = eL.distance(lvL);
if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) { if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) {
@ -57,7 +57,7 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge
} }
} }
lastVariant = eval; lastVariation = eval;
lastVariantInterval = interval; lastVariantInterval = interval;
} }

View File

@ -1,9 +1,10 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval; package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -13,10 +14,9 @@ import java.util.List;
* SOFTWARE COPYRIGHT NOTICE AGREEMENT * SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the * This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
* * <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither * This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/ */
public class TransitionTranversionAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { public class TransitionTranversionAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
long nTransitions = 0, nTransversions = 0; long nTransitions = 0, nTransversions = 0;
@ -25,13 +25,18 @@ public class TransitionTranversionAnalysis extends BasicVariantAnalysis implemen
super("transitions_transversions"); super("transitions_transversions");
} }
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
if ( eval != null && eval.isSNP() ) { if (eval != null && eval.isSNP()) {
char refBase = eval.getRefSnpFWD(); if (eval.getAlternateBases().length() != 2) {
char altBase = eval.getAltSnpFWD(); throw new StingException("TransitionTranversionAnalysis works only with biallelic variants");
}
char refBase = eval.getReference();
char altBase = (eval.getAlternateBases().charAt(0) == refBase) ? eval.getAlternateBases().charAt(1) : eval.getAlternateBases().charAt(0);
BaseUtils.BaseSubstitutionType subType = BaseUtils.SNPSubstitutionType(refBase, altBase); BaseUtils.BaseSubstitutionType subType = BaseUtils.SNPSubstitutionType(refBase, altBase);
if ( subType == BaseUtils.BaseSubstitutionType.TRANSITION ) if (subType == BaseUtils.BaseSubstitutionType.TRANSITION)
nTransitions++; nTransitions++;
else else
nTransversions++; nTransversions++;

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval; package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.List; import java.util.List;
@ -23,7 +23,7 @@ public interface VariantAnalysis {
public PrintStream getCallPrintStream(); public PrintStream getCallPrintStream();
public List<String> getParams(); public List<String> getParams();
public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String filename); public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String filename);
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context); public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context);
public void finalize(long nSites); public void finalize(long nSites);
public List<String> done(); public List<String> done();
} }

View File

@ -1,12 +1,13 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval; package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenotypeUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/** /**
* The Broad Institute * The Broad Institute
@ -27,10 +28,10 @@ public class VariantCounter extends BasicVariantAnalysis implements GenotypeAnal
super("variant_counts"); super("variant_counts");
} }
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
nSNPs += eval == null ? 0 : 1; nSNPs += eval == null ? 0 : 1;
if ( this.getMaster().evalContainsGenotypes && eval != null && eval.isSNP() && GenotypeUtils.isHet(eval) ) if ( this.getMaster().evalContainsGenotypes && eval != null && eval.isSNP() && ((VariantBackedByGenotype)eval).getGenotype( DiploidGenotype.valueOf(eval.getAlternateBases())).isHet() )
nHets++; nHets++;
return null; return null;

View File

@ -1,21 +1,20 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval; package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/** /**
* The Broad Institute * The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT * SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the * This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
* * <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither * This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/ */
public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
private String dbName; private String dbName;
@ -31,45 +30,81 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
dbName = name; dbName = name;
} }
public void inc(AllelicVariant dbSNP, AllelicVariant eval) { public void inc(Variation dbSNP, Variation eval) {
boolean inDB = dbSNP != null; boolean inDB = dbSNP != null;
boolean inEval = eval != null; boolean inEval = eval != null;
if (inDB ) { if (inDB) {
if ( dbSNP.isSNP() ) nDBSNPs++; if (dbSNP.isSNP()) nDBSNPs++;
if ( dbSNP.isIndel() ) nDBIndels++; if (dbSNP.isIndel()) nDBIndels++;
//System.out.printf("snp=%b ins=%b del=%b indel=%b %s%n", dbSNP.isSNP(), dbSNP.isInsertion(), dbSNP.isDeletion(), dbSNP.isIndel(), dbSNP);
} }
if (inEval) nEvalObs++; if (inEval) nEvalObs++;
if (inDB && inEval) { if (inDB && inEval) {
if ( dbSNP.isSNP() ) { // changes the calculation a bit if (dbSNP.isSNP()) { // changes the calculation a bit
nOverlapping++; nOverlapping++;
if ( ! discordantP(dbSNP, eval) ) if (!discordantP(dbSNP, eval))
nConcordant++; nConcordant++;
} }
if ( dbSNP.isIndel() && eval.isSNP() ) if (dbSNP.isIndel() && eval.isSNP())
nSNPsCalledAtIndels++; nSNPsCalledAtIndels++;
} }
} }
public int nDBSNPs() { return nDBSNPs; } public int callCount = 0;
public int nDBIndels() { return nDBIndels; }
public int nEvalSites() { return nEvalObs; }
public int nOverlappingSites() { return nOverlapping; }
public int nConcordant() { return nConcordant; }
public int nNovelSites() { return Math.abs(nEvalSites() - nOverlappingSites()); }
public int nSNPsAtIndels() { return nSNPsCalledAtIndels; }
public boolean discordantP(AllelicVariant dbSNP, AllelicVariant eval) { public int nDBSNPs() {
if (dbSNP != null && dbSNP.isSNP() && eval != null ) { return nDBSNPs;
return ! (dbSNP.getAltSnpFWD() == eval.getAltSnpFWD() || dbSNP.getRefSnpFWD() == eval.getAltSnpFWD()); }
} else {
return false; public int nDBIndels() {
return nDBIndels;
}
public int nEvalSites() {
return nEvalObs;
}
public int nOverlappingSites() {
return nOverlapping;
}
public int nConcordant() {
return nConcordant;
}
public int nNovelSites() {
return Math.abs(nEvalSites() - nOverlappingSites());
}
public int nSNPsAtIndels() {
return nSNPsCalledAtIndels;
}
public static int count = 0;
public boolean discordantP(Variation dbSNP, Variation eval) {
if (eval != null) {
char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : eval.getReference();
if (dbSNP != null && dbSNP.isSNP()) {
boolean val = !dbSNP.getAlternateBases().contains(String.valueOf(alt));
if (val) {
//System.err.print(eval.getLocation());
//System.err.print(" eval.alt = " + alt);
//System.err.println(" dbsnp.bases = " + dbSNP.getAlternateBases());
//System.err.println(val);
//System.err.println(count);
}
//else System.err.println("not count == " + count++);
return val;
}
} }
return false;
} }
@ -86,17 +121,19 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
return nConcordant() / (1.0 * nOverlappingSites()); return nConcordant() / (1.0 * nOverlappingSites());
} }
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
// There are four cases here: // There are four cases here:
AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(dbName, null); Variation dbsnp = (Variation) tracker.lookup(dbName, null);
boolean isSNP = dbsnp != null && dbsnp.isSNP(); boolean isSNP = dbsnp != null && dbsnp.isSNP();
inc(dbsnp, eval); inc(dbsnp, eval);
if ( dbsnp != null && eval != null ) { if (dbsnp != null && eval != null) {
if ( dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval) ) {
if (dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval)) {
return String.format("Discordant [DBSNP %s] [EVAL %s]", dbsnp, eval); return String.format("Discordant [DBSNP %s] [EVAL %s]", dbsnp, eval);
} else if ( dbsnp.isIndel() && eval.isSNP() ) { } else if (dbsnp.isIndel() && eval.isSNP()) {
return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getGenotype().get(0), eval); return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getAlternateBases(), eval);
} else { } else {
return null; return null;
// return "Novel " + eval; // return "Novel " + eval;
@ -128,10 +165,10 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
s.add(String.format("n_concordant %d", nConcordant())); s.add(String.format("n_concordant %d", nConcordant()));
s.add(String.format("n_novel_sites %d", nNovelSites())); s.add(String.format("n_novel_sites %d", nNovelSites()));
s.add(String.format("percent_eval_sites_in_db %.2f", 100*fractionEvalSitesCoveredByDB())); s.add(String.format("percent_eval_sites_in_db %.2f", 100 * fractionEvalSitesCoveredByDB()));
s.add(String.format("concordance_rate %.2f", 100*concordanceRate())); s.add(String.format("concordance_rate %.2f", 100 * concordanceRate()));
s.add(String.format("percent_db_sites_in_eval %.2f", 100*fractionDBSitesDiscoveredInEval())); s.add(String.format("percent_db_sites_in_eval %.2f", 100 * fractionDBSitesDiscoveredInEval()));
s.add(String.format("n_snp_calls_at_indels %d", nSNPsAtIndels())); s.add(String.format("n_snp_calls_at_indels %d", nSNPsAtIndels()));
s.add(String.format("percent_calls_at_indels %.2f", nSNPsAtIndels() / (0.01 * nEvalSites()))); s.add(String.format("percent_calls_at_indels %.2f", nSNPsAtIndels() / (0.01 * nEvalSites())));

View File

@ -2,14 +2,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant; import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes;
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
@ -28,8 +26,8 @@ import java.util.*;
* *
*/ */
@By(DataSource.REFERENCE) @By(DataSource.REFERENCE)
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=AllelicVariant.class)}) @Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=RodGeliText.class)}) // right now we have no base variant class for rods, this should change
@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=AllelicVariant.class), @RMD(name="dbsnp",type=AllelicVariant.class),@RMD(name="hapmap-chip",type=AllelicVariant.class), @RMD(name="interval",type=IntervalRod.class)}) @Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=RodGeliText.class), @RMD(name="dbsnp",type=rodDbSNP.class),@RMD(name="hapmap-chip",type=RodGenotypeChipAsGFF.class), @RMD(name="interval",type=IntervalRod.class)})
public class VariantEvalWalker extends RefWalker<Integer, Integer> { public class VariantEvalWalker extends RefWalker<Integer, Integer> {
@Argument(shortName="minConfidenceScore", doc="Minimum confidence score to consider an evaluation SNP a variant", required=false) @Argument(shortName="minConfidenceScore", doc="Minimum confidence score to consider an evaluation SNP a variant", required=false)
public int minConfidenceScore = -1; public int minConfidenceScore = -1;
@ -181,19 +179,18 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nSites++; nSites++;
// Iterate over each analysis, and update it // Iterate over each analysis, and update it
AllelicVariant eval = (AllelicVariant)tracker.lookup("eval", null); Variation eval = (Variation)tracker.lookup("eval", null);
//if ( eval!=null ) System.out.printf("Eval: %f %d %b%n", eval.getVariationConfidence(), minDiscoveryQ, eval.getVariationConfidence() < minDiscoveryQ); //if ( eval!=null ) System.out.printf("Eval: %f %d %b%n", eval.getVariationConfidence(), minDiscoveryQ, eval.getVariationConfidence() < minDiscoveryQ);
if ( eval != null ) { if ( eval != null ) {
// comment to disable variant filtering by confidence score // comment to disable variant filtering by confidence score
if ( evalContainsGenotypes ) { if ( evalContainsGenotypes ) {
// Genotyping - use best vs. next best lod // Genotyping - use best vs. next best lod
if ( eval.getConsensusConfidence() < minConfidenceScore ) eval = null; if ( eval.getNegLog10PError() < minConfidenceScore ) eval = null;
} else { } else {
// Variant discovery - use best vs. reference lod // Variation discovery - use best vs. reference lod
if ( Math.abs(eval.getVariationConfidence()) < minConfidenceScore ) eval = null; if ( Math.abs(eval.getNegLog10PError()) < minConfidenceScore ) eval = null;
} }
} }
@ -202,7 +199,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
// update the known / novel set by checking whether the knownSNPDBName track has an entry here // update the known / novel set by checking whether the knownSNPDBName track has an entry here
if ( eval != null ) { if ( eval != null ) {
AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(knownSNPDBName, null); Variation dbsnp = (Variation)tracker.lookup(knownSNPDBName, null);
String noveltySet = dbsnp == null ? NOVEL_SNPS : KNOWN_SNPS; String noveltySet = dbsnp == null ? NOVEL_SNPS : KNOWN_SNPS;
updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context); updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context);
} }
@ -219,7 +216,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
} }
public void updateAnalysisSet(final String analysisSetName, AllelicVariant eval, public void updateAnalysisSet(final String analysisSetName, Variation eval,
RefMetaDataTracker tracker, char ref, AlignmentContext context) { RefMetaDataTracker tracker, char ref, AlignmentContext context) {
// Iterate over each analysis, and update it // Iterate over each analysis, and update it
if ( getAnalysisSet(analysisSetName) != null ) { if ( getAnalysisSet(analysisSetName) != null ) {

View File

@ -1,8 +1,9 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval; package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant; import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.genotype.Variation;
/** /**
* The Broad Institute * The Broad Institute
@ -22,7 +23,7 @@ public class VariantMatcher extends BasicVariantAnalysis implements GenotypeAnal
dbName = name; dbName = name;
} }
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
String r = null; String r = null;
AllelicVariant db = (AllelicVariant)tracker.lookup(dbName, null); AllelicVariant db = (AllelicVariant)tracker.lookup(dbName, null);

View File

@ -20,7 +20,7 @@ public class GenotypeUtils {
// VariantType() {} // VariantType() {}
} }
/** This method accepts rods that implement either Genotype or GenotypeList interface (all others will result in an exception). Variant /** This method accepts rods that implement either Genotype or GenotypeList interface (all others will result in an exception). Variant
* (Genotype object) of the specified type (point mutation or indel) will be extracted from GenotypeList rod if such variant exists, or the rod itself * (Genotype object) of the specified type (point mutation or indel) will be extracted from GenotypeList rod if such variant exists, or the rod itself
* will be typecasted and returned back if it implements Genotype and represents the specified variant type. If the last argument is false, then * will be typecasted and returned back if it implements Genotype and represents the specified variant type. If the last argument is false, then
* null will be returned in all other cases. If the last argument is true and either a) rod is a GenotypeList that lacks a call of the specified type, but call * null will be returned in all other cases. If the last argument is true and either a) rod is a GenotypeList that lacks a call of the specified type, but call

View File

@ -0,0 +1,143 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
*
* @author aaron
*
* Class BasicGenotype
*
* represents a basic genotype object
*/
public class BasicGenotype implements Genotype {
// the genotype string
private String mGenotype;
private GenomeLoc mLocation;
private char mRef;
private double mNetLog10PError;
/**
* create a basic genotype
* @param genotype
*/
public BasicGenotype(GenomeLoc location, String genotype, char ref, double netLog10PError) {
mNetLog10PError = netLog10PError;
mGenotype = genotype;
mLocation = location;
mRef = ref;
}
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
@Override
public double getNegLog10PError() {
return mNetLog10PError;
}
/**
* get the bases that represent this
*
* @return the bases, as a string
*/
@Override
public String getBases() {
return mGenotype;
}
/**
* get the ploidy
*
* @return the ploidy value
*/
@Override
public int getPloidy() {
return mGenotype.length();
}
/**
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
*
* @return true if we're homozygous, false otherwise
*/
@Override
public boolean isHom() {
if (mGenotype.length() < 1)
return false;
char base = mGenotype.charAt(0);
for (char cur : mGenotype.toCharArray()) {
if (base != cur) {
return false;
}
}
return true;
}
/**
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
*
* @return true if we're het, false otherwise
*/
@Override
public boolean isHet() {
return !isHom();
}
/**
* get the genotype's location
*
* @return a GenomeLoc representing the location
*/
@Override
public GenomeLoc getLocation() {
return mLocation;
}
/**
* returns true if the genotype is a point genotype, false if it's a indel / deletion
*
* @return true is a SNP
*/
@Override
public boolean isPointGenotype() {
return true;
}
/**
* given the reference, are we a variant? (non-ref)
*
* @param ref the reference base or bases
*
* @return true if we're a variant
*/
@Override
public boolean isVariant(char ref) {
return !(mGenotype.charAt(0) == ref && isHom());
}
/**
* get the reference base.
*
* @return a character, representing the reference base
*/
@Override
public char getReference() {
return mRef;
}
/**
* return this genotype as a variant
*
* @return the variant
*/
@Override
public Variation toVariation() {
if (!isVariant(this.mRef)) throw new IllegalStateException("this genotype is not a variant");
return new BasicVariation(this.getBases(),mRef,this.getBases().length(),mLocation,mNetLog10PError);
}
}

View File

@ -8,7 +8,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
* User: aaronmckenna * User: aaronmckenna
* Date: Sep 9, 2009 * Date: Sep 9, 2009
* Time: 9:32:34 PM * Time: 9:32:34 PM
* * <p/>
* a basic implementation of variant * a basic implementation of variant
*/ */
public class BasicVariation implements Variation { public class BasicVariation implements Variation {
@ -18,12 +18,14 @@ public class BasicVariation implements Variation {
protected final int mLength; protected final int mLength;
protected final GenomeLoc mLocation; protected final GenomeLoc mLocation;
protected final double mConfidence; protected final double mConfidence;
/** /**
* the constructor * the constructor
* @param bases the bases that this variant represents *
* @param bases the bases that this variant represents
* @param reference the reference base * @param reference the reference base
* @param length are we a single base variant, or a indel/deletion? length is negitive for an indel, * @param length are we a single base variant, or a indel/deletion? length is negitive for an indel,
* positive for a indel, and 0 for a substitution * positive for a indel, and 0 for a substitution
*/ */
public BasicVariation(String bases, char reference, int length, GenomeLoc location, double confidence) { public BasicVariation(String bases, char reference, int length, GenomeLoc location, double confidence) {
mBases = bases; mBases = bases;
@ -40,14 +42,14 @@ public class BasicVariation implements Variation {
@Override @Override
public VARIANT_TYPE getType() { public VARIANT_TYPE getType() {
if (mLength >0) return VARIANT_TYPE.INDEL; if (mLength > 0) return VARIANT_TYPE.INDEL;
if (mLength <0) return VARIANT_TYPE.DELETION; if (mLength < 0) return VARIANT_TYPE.DELETION;
return (isSNP()) ? VARIANT_TYPE.SNP : VARIANT_TYPE.REFERENCE; return (isSNP()) ? VARIANT_TYPE.SNP : VARIANT_TYPE.REFERENCE;
} }
@Override @Override
public boolean isSNP() { public boolean isSNP() {
if (mLength == 0 && Utils.dupString(mRef,2).equals(mBases)) return true; if (mLength == 0 && Utils.dupString(mRef, 2).equals(mBases)) return true;
return false; return false;
} }
@ -62,7 +64,7 @@ public class BasicVariation implements Variation {
} }
@Override @Override
public String getBases() { public String getAlternateBases() {
return mBases; return mBases;
} }
@ -72,19 +74,8 @@ public class BasicVariation implements Variation {
} }
@Override @Override
public String getReference() { public char getReference() {
return String.valueOf(mRef); return (mRef);
}
@Override
public boolean isHet() {
if (mLength == 0 && mBases.charAt(0) != mBases.charAt(1)) return true;
return false;
}
@Override
public boolean isHom() {
return !isHet();
} }
@Override @Override
@ -98,12 +89,29 @@ public class BasicVariation implements Variation {
return false; return false;
} }
/**
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
*
* @return true if we're an insertion or deletion
*/
@Override @Override
public char getAlternateBase() { public boolean isIndel() {
if (mLength != 0) { return (isDeletion() || isInsertion());
throw new UnsupportedOperationException("Unable to get alternate base for indel / deletion");
}
if (mBases.charAt(0) == mRef) return mBases.charAt(1);
else return mBases.charAt(0);
} }
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference())
return getAlternateBases().charAt(1);
return getAlternateBases().charAt(0);
}
} }

View File

@ -0,0 +1,18 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
/**
* @author aaron
* <p/>
* Interface VariantBackedByGenotype
* <p/>
* this variant is backed by genotypic information
*/
public interface VariantBackedByGenotype {
/**
* get the likelihoods
* @return an array in lexigraphical order of the likelihoods
*/
public Genotype getGenotype(DiploidGenotype x);
}

View File

@ -46,13 +46,6 @@ public interface Variation {
*/ */
public boolean isDeletion(); public boolean isDeletion();
/**
* get the base representation of this Variant
*
* @return a string, of ploidy
*/
public String getBases();
/** /**
* get the location that this Variant represents * get the location that this Variant represents
* *
@ -65,13 +58,7 @@ public interface Variation {
* *
* @return the reference base or bases, as a string * @return the reference base or bases, as a string
*/ */
public String getReference(); public char getReference();
/** is our base representation heterozygous */
public boolean isHet();
/** is our base representation homozygous */
public boolean isHom();
/** /**
* get the -1 * (log 10 of the error value) * get the -1 * (log 10 of the error value)
@ -87,9 +74,22 @@ public interface Variation {
public boolean isReference(); public boolean isReference();
/** /**
* gets the alternate base. If this is homref, throws an UnsupportedOperationException * gets the alternate bases. If this is homref, throws an UnsupportedOperationException
* @return * @return
*/ */
public char getAlternateBase(); public String getAlternateBases();
/**
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
* @return true if we're an insertion or deletion
*/
public boolean isIndel();
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
* @return a char, representing the alternate base
*/
public char getAlternativeBaseForSNP();
} }

View File

@ -8,14 +8,12 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.junit.Assert; import org.junit.Assert;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import org.junit.Before; import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import java.io.File; import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -60,38 +58,7 @@ public class RodGLFTest extends BaseTest {
assertEquals(finalRecordCount, counter); assertEquals(finalRecordCount, counter);
} }
/** make sure we're returning true to biallelic */
@Test
public void testIsBiallelic() {
RodGLF glf = iter.next();
Assert.assertFalse(iter.isBiallelic());
}
// best vs next-best
@Test
public void testGetConsensusConfidence() {
RodGLF glf = iter.next();
assertEquals(120.0, iter.getConsensusConfidence(), 0.005);
glf = iter.next();
assertEquals(120.0, iter.getConsensusConfidence(), 0.005);
}
// best vs. ref
@Test
public void testGetVariationConfidence() {
RodGLF glf = iter.next();
assertEquals(0.0, iter.getVariationConfidence(), 0.005);
glf = iter.next();
assertEquals(250.0, iter.getVariationConfidence(), 0.005);
}
@Test
public void testGetGenotype() {
RodGLF glf = iter.next();
List<String> str = iter.getGenotype();
Assert.assertTrue(str.get(0).equals("AA"));
}
@Test @Test
public void testIsSNP() { public void testIsSNP() {
@ -112,7 +79,7 @@ public class RodGLFTest extends BaseTest {
glf = iter.next(); glf = iter.next();
Assert.assertFalse(iter.isReference()); Assert.assertFalse(iter.isReference());
} }
/*
@Test(expected = IllegalStateException.class) @Test(expected = IllegalStateException.class)
public void testGetAltSnpFWDIllegalException() { public void testGetAltSnpFWDIllegalException() {
RodGLF glf = iter.next(); RodGLF glf = iter.next();
@ -159,7 +126,7 @@ public class RodGLFTest extends BaseTest {
/** /**
* move to the second and third bases, and check that the * move to the second and third bases, and check that the
* alternate bases are correct. * alternate bases are correct.
*/ *
@Test @Test
public void testGetAltBasesFWD() { public void testGetAltBasesFWD() {
RodGLF glf = iter.next(); RodGLF glf = iter.next();
@ -169,7 +136,7 @@ public class RodGLFTest extends BaseTest {
Assert.assertTrue("CT".equals(iter.getAltBasesFWD())); Assert.assertTrue("CT".equals(iter.getAltBasesFWD()));
} }
*/
@Test @Test
public void testRodLocations() { public void testRodLocations() {
GenomeLoc loc = null; GenomeLoc loc = null;
@ -177,7 +144,7 @@ public class RodGLFTest extends BaseTest {
RodGLF glf = iter.next(); RodGLF glf = iter.next();
if (loc != null) { if (loc != null) {
if (iter.getLocation().isBefore(loc)) { if (iter.getLocation().isBefore(loc)) {
fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + iter.getLocation().toString()); Assert.fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + iter.getLocation().toString());
} }
} }
loc = iter.getLocation(); loc = iter.getLocation();

View File

@ -0,0 +1,62 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.FileNotFoundException;
/**
*
* @author aaron
*
* Class BasicGenotypeTest
*
* tests the basic genotype class
*/
public class BasicGenotypeTest extends BaseTest {
private static IndexedFastaSequenceFile seq;
@BeforeClass
public static void beforeTests() {
try {
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
} catch (FileNotFoundException e) {
throw new StingException("unable to load the sequence dictionary");
}
GenomeLocParser.setupRefContigOrdering(seq);
}
@Test
public void testBasicGenotypeIsHom() {
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
Assert.assertTrue(gt.isHom());
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
Assert.assertTrue(!gt2.isHom());
}
@Test
public void testBasicGenotypeIsHet() {
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
Assert.assertTrue(!gt.isHet());
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
Assert.assertTrue(gt2.isHet());
}
@Test
public void testBasicGenotypeIsVariant() {
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
Assert.assertTrue(!gt.isVariant('A'));
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
Assert.assertTrue(gt2.isVariant('A'));
BasicGenotype gt3 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"TT",'A',0);
Assert.assertTrue(gt3.isVariant('A'));
}
}