From e03fccb223217b7acb0949d48fce02e7e2b7c1f6 Mon Sep 17 00:00:00 2001 From: aaron Date: Mon, 14 Sep 2009 05:34:33 +0000 Subject: [PATCH] Changes to switch Variant Eval over to the new Variation system. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1611 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/RodGLF.java | 224 ++++++------------ .../sting/gatk/refdata/RodGeliText.java | 84 ++++++- .../gatk/refdata/RodGenotypeChipAsGFF.java | 76 +++++- .../gatk/refdata/SNPCallFromGenotypes.java | 5 +- .../sting/gatk/refdata/rodDbSNP.java | 89 ++++++- .../gatk/walkers/filters/RatioFilter.java | 3 +- .../varianteval/BasicVariantAnalysis.java | 6 +- .../varianteval/CallableBasesAnalysis.java | 63 +++-- .../varianteval/ClusterCounterAnalysis.java | 18 +- .../varianteval/GenotypeConcordance.java | 93 ++++---- .../varianteval/HardyWeinbergEquilibrium.java | 7 +- .../varianteval/IndelMetricsAnalysis.java | 18 +- .../varianteval/NeighborDistanceAnalysis.java | 18 +- .../TransitionTranversionAnalysis.java | 23 +- .../walkers/varianteval/VariantAnalysis.java | 6 +- .../walkers/varianteval/VariantCounter.java | 13 +- .../varianteval/VariantDBCoverage.java | 107 ++++++--- .../varianteval/VariantEvalWalker.java | 23 +- .../walkers/varianteval/VariantMatcher.java | 5 +- .../sting/utils/GenotypeUtils.java | 2 +- .../sting/utils/genotype/BasicGenotype.java | 143 +++++++++++ .../sting/utils/genotype/BasicVariation.java | 62 ++--- .../genotype/VariantBackedByGenotype.java | 18 ++ .../sting/utils/genotype/Variation.java | 32 +-- .../sting/gatk/refdata/RodGLFTest.java | 41 +--- .../utils/genotype/BasicGenotypeTest.java | 62 +++++ 26 files changed, 825 insertions(+), 416 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java create mode 100644 java/src/org/broadinstitute/sting/utils/genotype/VariantBackedByGenotype.java create mode 100644 java/test/org/broadinstitute/sting/utils/genotype/BasicGenotypeTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java index 1f6cc3120..c0757c348 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.genotype.LikelihoodObject; +import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.glf.GLFReader; import org.broadinstitute.sting.utils.genotype.glf.GLFRecord; import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall; @@ -13,9 +14,7 @@ import org.broadinstitute.sting.utils.genotype.glf.VariableLengthCall; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.util.ArrayList; import java.util.Iterator; -import java.util.List; /** @@ -25,7 +24,7 @@ import java.util.List; *

* the rod class for GLF data. */ -public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator { +public class RodGLF implements ReferenceOrderedDatum, Variation, Iterator { static int count = 0; public GLFReader mReader; private final String mName; @@ -38,6 +37,7 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator methods (where Variant=SNP,Insertion, etc) should * return false at such site to ensure consistency. This method is included for use with genotyping calls (isGenotype()==true), it makes @@ -183,6 +135,31 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator getGenotype() throws IllegalStateException { - List ret = new ArrayList(); - ret.add(this.getBestGenotype(1).toString()); - return ret; - } - - /** - * Return actual number of observed alleles (chromosomes) in the genotype. If this variant is not a genotype, - * should throw IllegalStateException. - * - * @return - */ - @Override - public int getPloidy() throws IllegalStateException { - return 2; // we're so haploid it hurts - } - - /** - * Returns true if the site has at most two known or observed alleles (ref and non-ref), or false if there are > 2 allelic variants known or observed. When - * the implementing class is a genotype, alleles should be always counted including the reference allele whether it was observed in the particular - * individual or not: i.e. if the reference is 'C', then both 'CA' and 'AA' genotypes must be reported as bi-allelic, while 'AT' is not bi-allelic (since there are - * two allelic variants, 'A' and 'T' in addition to the (known) reference variant 'C'). - * - * @return - */ - @Override - public boolean isBiallelic() { - return false; - } - - public int length() { return 1; } - @Override public int compareTo(ReferenceOrderedDatum that) { return this.mLoc.compareTo(that.getLocation()); @@ -371,8 +289,10 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator getGenotypes(); } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java index 46a7a1972..fc5abcc34 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java @@ -1,10 +1,12 @@ package org.broadinstitute.sting.gatk.refdata; import net.sf.picard.util.SequenceUtil; - -import java.util.*; - import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.genotype.*; +import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; + +import java.util.Arrays; +import java.util.List; /** * Example format: @@ -16,7 +18,7 @@ import org.broadinstitute.sting.utils.*; * Time: 10:47:14 AM * To change this template use File | Settings | File Templates. */ -public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVariant { +public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, AllelicVariant { public GenomeLoc loc; // genome location of SNP // Reference sequence chromosome or scaffold // Start and stop positions in chrom @@ -60,6 +62,26 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria // ---------------------------------------------------------------------- public GenomeLoc getLocation() { return loc; } + /** + * get the reference base(s) at this position + * + * @return the reference base or bases, as a string + */ + @Override + public char getReference() { + return getRefSnpFWD(); + } + + /** + * get the -1 * (log 10 of the error value) + * + * @return the log based error estimate + */ + @Override + public double getNegLog10PError() { + return 4; // -log10(0.0001) + } + public boolean onFwdStrand() { return strand.equals("+"); } @@ -108,10 +130,24 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria } public String getAllelesFWDString() { - return Utils.join("/", getAllelesFWD()); + return Utils.join("", getAllelesFWD()); } - // ---------------------------------------------------------------------- + /** + * get the frequency of this variant + * + * @return VariantFrequency with the stored frequency + */ + @Override + public double getNonRefAlleleFrequency() { + return 0; //To change body of implemented methods use File | Settings | File Templates. + } + + /** @return the VARIANT_TYPE of the current variant */ + @Override + public VARIANT_TYPE getType() { + return VARIANT_TYPE.SNP; + }// ---------------------------------------------------------------------- // // What kind of variant are we? // @@ -119,7 +155,34 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria public boolean isSNP() { return varType.contains("single"); } public boolean isInsertion() { return varType.contains("insertion"); } public boolean isDeletion() { return varType.contains("deletion"); } + + /** + * get the base representation of this Variant + * + * @return a string, of ploidy + */ + @Override + public String getAlternateBases() { + return getAllelesFWDString(); + } + public boolean isIndel() { return isInsertion() || isDeletion() || varType.contains("in-del"); } + + /** + * gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case + * of + * + * @return a char, representing the alternate base + */ + @Override + public char getAlternativeBaseForSNP() { + return getAltSnpFWD(); /* + if (!this.isSNP()) throw new IllegalStateException("we're not a SNP"); + if (getAlternateBases().charAt(0) == this.getReference()) + return getAlternateBases().charAt(1); + return getAlternateBases().charAt(0); */ + } + public boolean isReference() { return false; } // snp locations are never "reference", there's always a variant public boolean isHapmap() { return validationStatus.contains("by-hapmap"); } @@ -147,7 +210,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria if ( isIndel() ) s += ":Indel"; if ( isHapmap() ) s += ":Hapmap"; if ( is2Hit2Allele() ) s += ":2Hit"; - return s; + return s; } public String repl() { @@ -213,7 +276,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria } public double getMAF() { - // Fixme: update to actually get MAF + // Fixme: update to actually get MAF //return avHet; return -1; } @@ -244,4 +307,14 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria public int length() { return (int)(loc.getStop() - loc.getStart() + 1); } + /** + * get the likelihoods + * + * @return an array in lexigraphical order of the likelihoods + */ + @Override + public org.broadinstitute.sting.utils.genotype.Genotype getGenotype(DiploidGenotype x) { + if (x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype"); + return new BasicGenotype(this.getLocation(),this.getAltBasesFWD(),this.getRefSnpFWD(),this.getConsensusConfidence()); + } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/RatioFilter.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/RatioFilter.java index 470b069ed..61bc820ec 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/RatioFilter.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/RatioFilter.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broadinstitute.sting.gatk.contexts.VariantContext; import org.broadinstitute.sting.gatk.refdata.RodGeliText; +import org.broadinstitute.sting.gatk.refdata.AllelicVariant; import org.broadinstitute.sting.utils.*; import org.apache.log4j.Logger; @@ -50,7 +51,7 @@ public abstract class RatioFilter implements VariantExclusionCriterion { Pair counts = scoreVariant(ref, pileup, variant); boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest; - boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet(variant); + boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet((AllelicVariant)variant); exclude = excludable && highGenotypeConfidence && pointEstimateExclude(counts); // // for printing only diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicVariantAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicVariantAnalysis.java index a84a4c3f5..2435a9f83 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicVariantAnalysis.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicVariantAnalysis.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.AllelicVariant; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.genotype.Variation; import java.io.PrintStream; -import java.util.List; import java.util.ArrayList; +import java.util.List; /** * The Broad Institute @@ -65,5 +65,5 @@ public abstract class BasicVariantAnalysis implements VariantAnalysis { } - public abstract String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context); + public abstract String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context); } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/CallableBasesAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/CallableBasesAnalysis.java index e7c80fde5..5fd2acfba 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/CallableBasesAnalysis.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/CallableBasesAnalysis.java @@ -1,27 +1,30 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; -import org.broadinstitute.sting.gatk.refdata.AllelicVariant; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; +import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; +import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.Utils; -import java.util.List; import java.util.ArrayList; +import java.util.List; /** * The Broad Institute * SOFTWARE COPYRIGHT NOTICE AGREEMENT * This software and its documentation are copyright 2009 by the * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * + *

* This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * */ public class CallableBasesAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis { long all_bases = 0; long all_calls = 0; //final static double[] Qthresholds = { 10, 20, 30, 40, 50, 100, 200, 500, 1000 }; - final static double[] thresholds = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100 }; + final static double[] thresholds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100}; long[] discoverable_bases = new long[thresholds.length]; long[] genotypable_bases = new long[thresholds.length]; @@ -29,36 +32,54 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot super("callable_bases"); } - public long nSites() { return all_bases; } - public long nCalls() { return all_calls; } - public long nDiscoverable(int index) { return discoverable_bases[index]; } - public double percentDiscoverable(int index) { return (100.0*nDiscoverable(index)) / nSites(); } - public long nGenotypable(int index) { return genotypable_bases[index]; } - public double percentGenotypable(int index) { return (100.0*nGenotypable(index)) / nSites(); } + public long nSites() { + return all_bases; + } - public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + public long nCalls() { + return all_calls; + } + + public long nDiscoverable(int index) { + return discoverable_bases[index]; + } + + public double percentDiscoverable(int index) { + return (100.0 * nDiscoverable(index)) / nSites(); + } + + public long nGenotypable(int index) { + return genotypable_bases[index]; + } + + public double percentGenotypable(int index) { + return (100.0 * nGenotypable(index)) / nSites(); + } + + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { all_bases++; - if ( eval == null ) // no data here! + if (eval == null) // no data here! return null; // we actually have a record here - if ( ! eval.isGenotype() ) { // evaluation record isn't a genotype, die! + if (!(eval instanceof VariantBackedByGenotype)) { // evaluation record isn't a genotype, die! throw new RuntimeException("Evaluation track isn't an Genotype!"); } all_calls++; // For every threshold, updated discoverable and callable - for ( int i = 0; i < thresholds.length; i++ ) { + for (int i = 0; i < thresholds.length; i++) { double threshold = thresholds[i]; - + DiploidGenotype g = DiploidGenotype.valueOf(Utils.dupString(ref, 2)); + Genotype genotype = ((VariantBackedByGenotype) eval).getGenotype(g); // update discoverable - if ( eval.isSNP() && eval.getVariationConfidence() >= threshold ) + if (eval.isSNP() && eval.getNegLog10PError() >= threshold) discoverable_bases[i]++; - if ( eval.isReference() && eval.getConsensusConfidence() >= threshold ) + if (!eval.isSNP() && genotype.getNegLog10PError() >= threshold) discoverable_bases[i]++; - if ( eval.getConsensusConfidence() >= threshold ) + if (genotype.getNegLog10PError() >= threshold) genotypable_bases[i]++; //System.out.printf("Updating %s SNP=%b, REF=%b VarConf=%f ConConf=%f where threshold=%f: discoverable = %d, genotypable = %d%n", @@ -76,7 +97,7 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot s.add(String.format("")); s.add(String.format("confidence_threshold\tdiscoverable_bases\tdiscoverable_bases_percent\tgenotypable_bases\tgenotypable_bases_percent")); - for ( int i = 0; i < thresholds.length; i++ ) { + for (int i = 0; i < thresholds.length; i++) { double threshold = thresholds[i]; s.add(String.format("%6.2f\t%d\t%.6f\t%d\t%.6f", threshold, nDiscoverable(i), percentDiscoverable(i), nGenotypable(i), percentGenotypable(i))); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ClusterCounterAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ClusterCounterAnalysis.java index 47c273127..bf965a813 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ClusterCounterAnalysis.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ClusterCounterAnalysis.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; -import org.broadinstitute.sting.gatk.refdata.AllelicVariant; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.IntervalRod; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.refdata.IntervalRod; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.genotype.Variation; import java.util.ArrayList; -import java.util.List; import java.util.HashSet; +import java.util.List; /** * The Broad Institute @@ -24,7 +24,7 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno ArrayList> variantsWithClusters; int minDistanceForFlagging = 5; int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100}; - AllelicVariant lastVariant = null; + Variation lastVariation = null; GenomeLoc lastVariantInterval = null; int nSeen = 0; @@ -36,16 +36,16 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno variantsWithClusters.add(new HashSet()); } - public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { String r = null; if ( eval != null && eval.isSNP() ) { IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null); GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation(); - if (lastVariant != null) { + if (lastVariation != null) { GenomeLoc eL = eval.getLocation(); - GenomeLoc lvL = lastVariant.getLocation(); + GenomeLoc lvL = lastVariation.getLocation(); if (eL.getContigIndex() == lvL.getContigIndex()) { long d = eL.distance(lvL); if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) { @@ -66,7 +66,7 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis implements Geno } } - lastVariant = eval; + lastVariation = eval; lastVariantInterval = interval; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java index 1473dd792..ed6d1c15a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java @@ -1,21 +1,24 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; -import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; +import org.broadinstitute.sting.utils.genotype.Variation; -import java.util.List; import java.util.ArrayList; +import java.util.List; /** * The Broad Institute * SOFTWARE COPYRIGHT NOTICE AGREEMENT * This software and its documentation are copyright 2009 by the * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * + *

* This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * */ public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis { private String dbName; @@ -35,48 +38,48 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp public GenotypeConcordance(final String name) { super("genotype_concordance"); dbName = name; - for ( int i = 0; i < 4; i++ ) { + for (int i = 0; i < 4; i++) { truth_totals[i] = 0; calls_totals[i] = 0; - for ( int j = 0; j < 4; j++ ) + for (int j = 0; j < 4; j++) table[i][j] = 0; } } - public void inc(AllelicVariant chip, AllelicVariant eval, char ref) { - if ( (chip != null && !chip.isGenotype()) || (eval != null && !eval.isGenotype()) ) + public void inc(Variation chip, Variation eval, char ref) { + if ((chip != null && !(chip instanceof VariantBackedByGenotype) || (eval != null && !(eval instanceof VariantBackedByGenotype)))) throw new StingException("Failure: trying to analyze genotypes of non-genotype data"); + DiploidGenotype g = DiploidGenotype.valueOf(Utils.dupString(ref, 2)); int truthIndex, callIndex; - if ( chip == null ) + if (chip == null) truthIndex = UNKNOWN; - else if ( chip.isReference() && Utils.countOccurrences(ref, chip.getGenotype().get(0)) == chip.getGenotype().get(0).length() ) + else if (chip.getAlternateBases().equals(g.toString())) truthIndex = REF; - else if ( GenotypeUtils.isHet(chip) ) + else if (chip.getAlternateBases().charAt(0) != chip.getAlternateBases().charAt(1)) truthIndex = VAR_HET; else truthIndex = VAR_HOM; - // todo -- FIXME on countOccurences - if ( eval == null ) + if (eval == null) callIndex = NO_CALL; - else if ( eval.isReference() && Utils.countOccurrences(ref, eval.getGenotype().get(0)) == eval.getGenotype().get(0).length() ) + else if (eval.getAlternateBases().equals(g.toString())) callIndex = REF; - else if ( GenotypeUtils.isHet(eval) ) + else if (eval.getAlternateBases().charAt(0) != eval.getAlternateBases().charAt(1)) callIndex = VAR_HET; else callIndex = VAR_HOM; - if ( chip != null || eval != null ) { + if (chip != null || eval != null) { //System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval); table[truthIndex][callIndex]++; truth_totals[truthIndex]++; - if ( callIndex != NO_CALL ) calls_totals[callIndex]++; + if (callIndex != NO_CALL) calls_totals[callIndex]++; } } - public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - AllelicVariant chip = (AllelicVariant)tracker.lookup(dbName, null); + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + Variation chip = (Variation) tracker.lookup(dbName, null); inc(chip, eval, ref); return null; } @@ -84,24 +87,24 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp private void addCalledGenotypeConcordance(List s) { StringBuilder sb = new StringBuilder(); sb.append("CALLED_GENOTYPE_CONCORDANCE\t"); - for ( int i = 0; i < 4; i++ ) { + for (int i = 0; i < 4; i++) { int nConcordantCallsI = table[i][i]; String value = "N/A"; - if ( i != UNKNOWN ) - value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i]-table[UNKNOWN][i])); + if (i != UNKNOWN) + value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i])); sb.append(value); - } - s.add(sb.toString()); + } + s.add(sb.toString()); } - + /** * How many overall calls where made that aren't NO_CALLS or UNKNOWNS? */ private int getNCalled() { int n = 0; - for ( int i = 0; i < 4; i++ ) - for ( int j = 0; j < 4; j++ ) - if ( i != NO_CALL && j != NO_CALL ) n += table[i][j]; + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + if (i != NO_CALL && j != NO_CALL) n += table[i][j]; return n; } @@ -109,30 +112,30 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp int nConcordantRefCalls = table[REF][REF]; int nConcordantHetCalls = table[VAR_HET][VAR_HET]; int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM]; - int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM]; + int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM]; int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls; int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls; int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM]; int nCalled = getNCalled(); s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar))); - s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls))); - s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled))); + s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls))); + s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled))); } public List done() { List s = new ArrayList(); s.add(String.format("name %s", dbName)); s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY")); - for (int i=0; i < 4; i++) { + for (int i = 0; i < 4; i++) { StringBuffer sb = new StringBuffer(); sb.append(String.format("%15s ", TRUTH_NAMES[i])); - for (int j=0; j < 4; j++) + for (int j = 0; j < 4; j++) sb.append(String.format("%9d ", table[i][j])); sb.append(String.format("%9d ", truth_totals[i])); - if ( i == VAR_HET || i == VAR_HOM ) { - sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF]+table[i][VAR_HET]+table[i][VAR_HOM]))); - sb.append(String.format("%s", cellPercent(truth_totals[i]-table[i][NO_CALL], truth_totals[i]))); - } else { + if (i == VAR_HET || i == VAR_HOM) { + sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM]))); + sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i]))); + } else { sb.append("\tN/A\t\t\tN/A"); } s.add(sb.toString()); @@ -140,16 +143,16 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp addCalledGenotypeConcordance(s); addOverallStats(s); - - for (int i=0; i < 4; i++) { - for (int j=0; j < 4; j++) { + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j])); s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i]))); s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j]))); } - if ( i == VAR_HET || i == VAR_HOM ) { - s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF]+table[i][VAR_HET]+table[i][VAR_HOM]))); - s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i]-table[i][NO_CALL], truth_totals[i]))); + if (i == VAR_HET || i == VAR_HOM) { + s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM]))); + s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i]))); } } return s; @@ -158,7 +161,7 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp private static String cellPercent(int count, int total) { StringBuffer sb = new StringBuffer(); total = Math.max(total, 0); - sb.append(String.format("%.2f", (100.0*count)/total)); + sb.append(String.format("%.2f", (100.0 * count) / total)); sb.append("%"); return sb.toString(); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/HardyWeinbergEquilibrium.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/HardyWeinbergEquilibrium.java index b7a5968f6..9ae4d38f0 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/HardyWeinbergEquilibrium.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/HardyWeinbergEquilibrium.java @@ -1,17 +1,16 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; +import cern.jet.math.Arithmetic; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.AllelicVariant; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation; +import org.broadinstitute.sting.utils.genotype.Variation; import java.util.ArrayList; import java.util.List; -import cern.jet.math.Arithmetic; - /** * The Broad Institute * SOFTWARE COPYRIGHT NOTICE AGREEMENT @@ -32,7 +31,7 @@ public class HardyWeinbergEquilibrium extends BasicVariantAnalysis implements Po this.threshold = threshold; } - public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { String r = null; if ( eval != null && diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/IndelMetricsAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/IndelMetricsAnalysis.java index 0c599bff8..6878d91fa 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/IndelMetricsAnalysis.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/IndelMetricsAnalysis.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.AllelicVariant; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.genotype.Variation; -import java.util.List; import java.util.ArrayList; +import java.util.List; /** * The Broad Institute @@ -29,19 +29,19 @@ public class IndelMetricsAnalysis extends BasicVariantAnalysis implements Genoty sizes[0][i] = sizes[1][i] = 0; } - public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - if ( eval != null && eval.isIndel() ) { + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + if ( eval != null && eval.isInsertion() ) { if ( eval.isInsertion() ) insertions++; else if ( eval.isDeletion() ) deletions++; else - throw new RuntimeException("Variant is indel, but isn't insertion or deletion!"); + throw new RuntimeException("Variation is indel, but isn't insertion or deletion!"); - if ( eval.length() < 100 ) { - sizes[eval.isDeletion() ? 0 : 1][eval.length()]++; - if ( eval.length() > maxSize ) - maxSize = eval.length(); + if ( eval.getAlternateBases().length() < 100 ) { + sizes[eval.isDeletion() ? 0 : 1][eval.getAlternateBases().length()]++; + if ( eval.getAlternateBases().length() > maxSize ) + maxSize = eval.getAlternateBases().length(); } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/NeighborDistanceAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/NeighborDistanceAnalysis.java index 5ba919403..f7fbc3723 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/NeighborDistanceAnalysis.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/NeighborDistanceAnalysis.java @@ -1,15 +1,15 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; -import org.broadinstitute.sting.gatk.refdata.AllelicVariant; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.IntervalRod; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.refdata.IntervalRod; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.genotype.Variation; +import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.io.PrintStream; /** * The Broad Institute @@ -26,7 +26,7 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge int minDistanceForFlagging = 5; int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100, 1000, 10000}; - AllelicVariant lastVariant = null; + Variation lastVariation = null; GenomeLoc lastVariantInterval = null; PrintStream violationsOut = null; @@ -35,16 +35,16 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge neighborWiseDistances = new ArrayList(); } - public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { String r = null; if ( eval != null && eval.isSNP() ) { IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null); GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation(); - if (lastVariant != null) { + if (lastVariation != null) { GenomeLoc eL = eval.getLocation(); - GenomeLoc lvL = lastVariant.getLocation(); + GenomeLoc lvL = lastVariation.getLocation(); if (eL.getContigIndex() == lvL.getContigIndex()) { long d = eL.distance(lvL); if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) { @@ -57,7 +57,7 @@ public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements Ge } } - lastVariant = eval; + lastVariation = eval; lastVariantInterval = interval; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/TransitionTranversionAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/TransitionTranversionAnalysis.java index 09a338058..004946854 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/TransitionTranversionAnalysis.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/TransitionTranversionAnalysis.java @@ -1,9 +1,10 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; -import org.broadinstitute.sting.gatk.refdata.AllelicVariant; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.genotype.Variation; import java.util.ArrayList; import java.util.List; @@ -13,10 +14,9 @@ import java.util.List; * SOFTWARE COPYRIGHT NOTICE AGREEMENT * This software and its documentation are copyright 2009 by the * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * + *

* This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * */ public class TransitionTranversionAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { long nTransitions = 0, nTransversions = 0; @@ -25,13 +25,18 @@ public class TransitionTranversionAnalysis extends BasicVariantAnalysis implemen super("transitions_transversions"); } - public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - if ( eval != null && eval.isSNP() ) { - char refBase = eval.getRefSnpFWD(); - char altBase = eval.getAltSnpFWD(); + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + if (eval != null && eval.isSNP()) { + if (eval.getAlternateBases().length() != 2) { + throw new StingException("TransitionTranversionAnalysis works only with biallelic variants"); + } + + + char refBase = eval.getReference(); + char altBase = (eval.getAlternateBases().charAt(0) == refBase) ? eval.getAlternateBases().charAt(1) : eval.getAlternateBases().charAt(0); BaseUtils.BaseSubstitutionType subType = BaseUtils.SNPSubstitutionType(refBase, altBase); - if ( subType == BaseUtils.BaseSubstitutionType.TRANSITION ) + if (subType == BaseUtils.BaseSubstitutionType.TRANSITION) nTransitions++; else nTransversions++; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantAnalysis.java index 6add48a04..4f5c31b58 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantAnalysis.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantAnalysis.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; -import org.broadinstitute.sting.gatk.refdata.AllelicVariant; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.genotype.Variation; import java.io.PrintStream; import java.util.List; @@ -23,7 +23,7 @@ public interface VariantAnalysis { public PrintStream getCallPrintStream(); public List getParams(); public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String filename); - public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context); + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context); public void finalize(long nSites); public List done(); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantCounter.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantCounter.java index b51754a47..232b2521c 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantCounter.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantCounter.java @@ -1,12 +1,13 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; -import org.broadinstitute.sting.gatk.refdata.AllelicVariant; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.utils.GenotypeUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; +import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; +import org.broadinstitute.sting.utils.genotype.Variation; -import java.util.List; import java.util.ArrayList; +import java.util.List; /** * The Broad Institute @@ -27,10 +28,10 @@ public class VariantCounter extends BasicVariantAnalysis implements GenotypeAnal super("variant_counts"); } - public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { nSNPs += eval == null ? 0 : 1; - if ( this.getMaster().evalContainsGenotypes && eval != null && eval.isSNP() && GenotypeUtils.isHet(eval) ) + if ( this.getMaster().evalContainsGenotypes && eval != null && eval.isSNP() && ((VariantBackedByGenotype)eval).getGenotype( DiploidGenotype.valueOf(eval.getAlternateBases())).isHet() ) nHets++; return null; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java index 39df49aa5..ef35537d7 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java @@ -1,21 +1,20 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; -import org.broadinstitute.sting.gatk.refdata.AllelicVariant; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.genotype.Variation; -import java.util.List; import java.util.ArrayList; +import java.util.List; /** * The Broad Institute * SOFTWARE COPYRIGHT NOTICE AGREEMENT * This software and its documentation are copyright 2009 by the * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * + *

* This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * */ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { private String dbName; @@ -31,45 +30,81 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA dbName = name; } - public void inc(AllelicVariant dbSNP, AllelicVariant eval) { + public void inc(Variation dbSNP, Variation eval) { boolean inDB = dbSNP != null; boolean inEval = eval != null; - if (inDB ) { - if ( dbSNP.isSNP() ) nDBSNPs++; - if ( dbSNP.isIndel() ) nDBIndels++; - - //System.out.printf("snp=%b ins=%b del=%b indel=%b %s%n", dbSNP.isSNP(), dbSNP.isInsertion(), dbSNP.isDeletion(), dbSNP.isIndel(), dbSNP); + if (inDB) { + if (dbSNP.isSNP()) nDBSNPs++; + if (dbSNP.isIndel()) nDBIndels++; } if (inEval) nEvalObs++; if (inDB && inEval) { - if ( dbSNP.isSNP() ) { // changes the calculation a bit + if (dbSNP.isSNP()) { // changes the calculation a bit nOverlapping++; - if ( ! discordantP(dbSNP, eval) ) + if (!discordantP(dbSNP, eval)) nConcordant++; } - if ( dbSNP.isIndel() && eval.isSNP() ) + if (dbSNP.isIndel() && eval.isSNP()) nSNPsCalledAtIndels++; } } - public int nDBSNPs() { return nDBSNPs; } - public int nDBIndels() { return nDBIndels; } - public int nEvalSites() { return nEvalObs; } - public int nOverlappingSites() { return nOverlapping; } - public int nConcordant() { return nConcordant; } - public int nNovelSites() { return Math.abs(nEvalSites() - nOverlappingSites()); } - public int nSNPsAtIndels() { return nSNPsCalledAtIndels; } + public int callCount = 0; - public boolean discordantP(AllelicVariant dbSNP, AllelicVariant eval) { - if (dbSNP != null && dbSNP.isSNP() && eval != null ) { - return ! (dbSNP.getAltSnpFWD() == eval.getAltSnpFWD() || dbSNP.getRefSnpFWD() == eval.getAltSnpFWD()); - } else { - return false; + public int nDBSNPs() { + return nDBSNPs; + } + + public int nDBIndels() { + return nDBIndels; + } + + public int nEvalSites() { + return nEvalObs; + } + + public int nOverlappingSites() { + return nOverlapping; + } + + public int nConcordant() { + return nConcordant; + } + + public int nNovelSites() { + return Math.abs(nEvalSites() - nOverlappingSites()); + } + + public int nSNPsAtIndels() { + return nSNPsCalledAtIndels; + } + public static int count = 0; + public boolean discordantP(Variation dbSNP, Variation eval) { + if (eval != null) { + char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : eval.getReference(); + + if (dbSNP != null && dbSNP.isSNP()) { + + boolean val = !dbSNP.getAlternateBases().contains(String.valueOf(alt)); + + if (val) { + //System.err.print(eval.getLocation()); + //System.err.print(" eval.alt = " + alt); + //System.err.println(" dbsnp.bases = " + dbSNP.getAlternateBases()); + //System.err.println(val); + + //System.err.println(count); + } + //else System.err.println("not count == " + count++); + + return val; + } } + return false; } @@ -86,17 +121,19 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA return nConcordant() / (1.0 * nOverlappingSites()); } - public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + // There are four cases here: - AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(dbName, null); + Variation dbsnp = (Variation) tracker.lookup(dbName, null); boolean isSNP = dbsnp != null && dbsnp.isSNP(); inc(dbsnp, eval); - if ( dbsnp != null && eval != null ) { - if ( dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval) ) { + if (dbsnp != null && eval != null) { + + if (dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval)) { return String.format("Discordant [DBSNP %s] [EVAL %s]", dbsnp, eval); - } else if ( dbsnp.isIndel() && eval.isSNP() ) { - return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getGenotype().get(0), eval); + } else if (dbsnp.isIndel() && eval.isSNP()) { + return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getAlternateBases(), eval); } else { return null; // return "Novel " + eval; @@ -128,10 +165,10 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA s.add(String.format("n_concordant %d", nConcordant())); s.add(String.format("n_novel_sites %d", nNovelSites())); - s.add(String.format("percent_eval_sites_in_db %.2f", 100*fractionEvalSitesCoveredByDB())); - s.add(String.format("concordance_rate %.2f", 100*concordanceRate())); + s.add(String.format("percent_eval_sites_in_db %.2f", 100 * fractionEvalSitesCoveredByDB())); + s.add(String.format("concordance_rate %.2f", 100 * concordanceRate())); - s.add(String.format("percent_db_sites_in_eval %.2f", 100*fractionDBSitesDiscoveredInEval())); + s.add(String.format("percent_db_sites_in_eval %.2f", 100 * fractionDBSitesDiscoveredInEval())); s.add(String.format("n_snp_calls_at_indels %d", nSNPsAtIndels())); s.add(String.format("percent_calls_at_indels %.2f", nSNPsAtIndels() / (0.01 * nEvalSites()))); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index 8b3bd115e..20d3654d9 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -2,14 +2,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.AllelicVariant; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes; -import org.broadinstitute.sting.gatk.refdata.IntervalRod; +import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.genotype.Variation; import java.io.File; import java.io.FileNotFoundException; @@ -28,8 +26,8 @@ import java.util.*; * */ @By(DataSource.REFERENCE) -@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=AllelicVariant.class)}) -@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=AllelicVariant.class), @RMD(name="dbsnp",type=AllelicVariant.class),@RMD(name="hapmap-chip",type=AllelicVariant.class), @RMD(name="interval",type=IntervalRod.class)}) +@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=RodGeliText.class)}) // right now we have no base variant class for rods, this should change +@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=RodGeliText.class), @RMD(name="dbsnp",type=rodDbSNP.class),@RMD(name="hapmap-chip",type=RodGenotypeChipAsGFF.class), @RMD(name="interval",type=IntervalRod.class)}) public class VariantEvalWalker extends RefWalker { @Argument(shortName="minConfidenceScore", doc="Minimum confidence score to consider an evaluation SNP a variant", required=false) public int minConfidenceScore = -1; @@ -181,19 +179,18 @@ public class VariantEvalWalker extends RefWalker { public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { nSites++; - // Iterate over each analysis, and update it - AllelicVariant eval = (AllelicVariant)tracker.lookup("eval", null); + Variation eval = (Variation)tracker.lookup("eval", null); //if ( eval!=null ) System.out.printf("Eval: %f %d %b%n", eval.getVariationConfidence(), minDiscoveryQ, eval.getVariationConfidence() < minDiscoveryQ); if ( eval != null ) { // comment to disable variant filtering by confidence score if ( evalContainsGenotypes ) { // Genotyping - use best vs. next best lod - if ( eval.getConsensusConfidence() < minConfidenceScore ) eval = null; + if ( eval.getNegLog10PError() < minConfidenceScore ) eval = null; } else { - // Variant discovery - use best vs. reference lod - if ( Math.abs(eval.getVariationConfidence()) < minConfidenceScore ) eval = null; + // Variation discovery - use best vs. reference lod + if ( Math.abs(eval.getNegLog10PError()) < minConfidenceScore ) eval = null; } } @@ -202,7 +199,7 @@ public class VariantEvalWalker extends RefWalker { // update the known / novel set by checking whether the knownSNPDBName track has an entry here if ( eval != null ) { - AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(knownSNPDBName, null); + Variation dbsnp = (Variation)tracker.lookup(knownSNPDBName, null); String noveltySet = dbsnp == null ? NOVEL_SNPS : KNOWN_SNPS; updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context); } @@ -219,7 +216,7 @@ public class VariantEvalWalker extends RefWalker { } - public void updateAnalysisSet(final String analysisSetName, AllelicVariant eval, + public void updateAnalysisSet(final String analysisSetName, Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { // Iterate over each analysis, and update it if ( getAnalysisSet(analysisSetName) != null ) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantMatcher.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantMatcher.java index d1eab6eb8..14ef39fbe 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantMatcher.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantMatcher.java @@ -1,8 +1,9 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.refdata.AllelicVariant; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.utils.genotype.Variation; /** * The Broad Institute @@ -22,7 +23,7 @@ public class VariantMatcher extends BasicVariantAnalysis implements GenotypeAnal dbName = name; } - public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { String r = null; AllelicVariant db = (AllelicVariant)tracker.lookup(dbName, null); diff --git a/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java b/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java index bca4e0508..be62d6ced 100644 --- a/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java +++ b/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java @@ -20,7 +20,7 @@ public class GenotypeUtils { // VariantType() {} } - /** This method accepts rods that implement either Genotype or GenotypeList interface (all others will result in an exception). Variant + /** This method accepts rods that implement either Genotype or GenotypeList interface (all others will result in an exception). Variant * (Genotype object) of the specified type (point mutation or indel) will be extracted from GenotypeList rod if such variant exists, or the rod itself * will be typecasted and returned back if it implements Genotype and represents the specified variant type. If the last argument is false, then * null will be returned in all other cases. If the last argument is true and either a) rod is a GenotypeList that lacks a call of the specified type, but call diff --git a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java new file mode 100644 index 000000000..ac295533c --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java @@ -0,0 +1,143 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.utils.GenomeLoc; + + +/** + * + * @author aaron + * + * Class BasicGenotype + * + * represents a basic genotype object + */ +public class BasicGenotype implements Genotype { + // the genotype string + private String mGenotype; + private GenomeLoc mLocation; + private char mRef; + private double mNetLog10PError; + + /** + * create a basic genotype + * @param genotype + */ + public BasicGenotype(GenomeLoc location, String genotype, char ref, double netLog10PError) { + mNetLog10PError = netLog10PError; + mGenotype = genotype; + mLocation = location; + mRef = ref; + } + + /** + * get the -1 * (log 10 of the error value) + * + * @return the log based error estimate + */ + @Override + public double getNegLog10PError() { + return mNetLog10PError; + } + + /** + * get the bases that represent this + * + * @return the bases, as a string + */ + @Override + public String getBases() { + return mGenotype; + } + + /** + * get the ploidy + * + * @return the ploidy value + */ + @Override + public int getPloidy() { + return mGenotype.length(); + } + + /** + * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) + * + * @return true if we're homozygous, false otherwise + */ + @Override + public boolean isHom() { + if (mGenotype.length() < 1) + return false; + + char base = mGenotype.charAt(0); + for (char cur : mGenotype.toCharArray()) { + if (base != cur) { + return false; + } + } + return true; + } + + /** + * Returns true if observed alleles differ (regardless of whether they are ref or alt) + * + * @return true if we're het, false otherwise + */ + @Override + public boolean isHet() { + return !isHom(); + } + + /** + * get the genotype's location + * + * @return a GenomeLoc representing the location + */ + @Override + public GenomeLoc getLocation() { + return mLocation; + } + + /** + * returns true if the genotype is a point genotype, false if it's a indel / deletion + * + * @return true is a SNP + */ + @Override + public boolean isPointGenotype() { + return true; + } + + /** + * given the reference, are we a variant? (non-ref) + * + * @param ref the reference base or bases + * + * @return true if we're a variant + */ + @Override + public boolean isVariant(char ref) { + return !(mGenotype.charAt(0) == ref && isHom()); + } + + /** + * get the reference base. + * + * @return a character, representing the reference base + */ + @Override + public char getReference() { + return mRef; + } + + /** + * return this genotype as a variant + * + * @return the variant + */ + @Override + public Variation toVariation() { + if (!isVariant(this.mRef)) throw new IllegalStateException("this genotype is not a variant"); + return new BasicVariation(this.getBases(),mRef,this.getBases().length(),mLocation,mNetLog10PError); + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/BasicVariation.java b/java/src/org/broadinstitute/sting/utils/genotype/BasicVariation.java index 6ae281e31..d258da444 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/BasicVariation.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/BasicVariation.java @@ -8,7 +8,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; * User: aaronmckenna * Date: Sep 9, 2009 * Time: 9:32:34 PM - * + *

* a basic implementation of variant */ public class BasicVariation implements Variation { @@ -18,12 +18,14 @@ public class BasicVariation implements Variation { protected final int mLength; protected final GenomeLoc mLocation; protected final double mConfidence; + /** * the constructor - * @param bases the bases that this variant represents + * + * @param bases the bases that this variant represents * @param reference the reference base - * @param length are we a single base variant, or a indel/deletion? length is negitive for an indel, - * positive for a indel, and 0 for a substitution + * @param length are we a single base variant, or a indel/deletion? length is negitive for an indel, + * positive for a indel, and 0 for a substitution */ public BasicVariation(String bases, char reference, int length, GenomeLoc location, double confidence) { mBases = bases; @@ -40,14 +42,14 @@ public class BasicVariation implements Variation { @Override public VARIANT_TYPE getType() { - if (mLength >0) return VARIANT_TYPE.INDEL; - if (mLength <0) return VARIANT_TYPE.DELETION; + if (mLength > 0) return VARIANT_TYPE.INDEL; + if (mLength < 0) return VARIANT_TYPE.DELETION; return (isSNP()) ? VARIANT_TYPE.SNP : VARIANT_TYPE.REFERENCE; } @Override public boolean isSNP() { - if (mLength == 0 && Utils.dupString(mRef,2).equals(mBases)) return true; + if (mLength == 0 && Utils.dupString(mRef, 2).equals(mBases)) return true; return false; } @@ -62,7 +64,7 @@ public class BasicVariation implements Variation { } @Override - public String getBases() { + public String getAlternateBases() { return mBases; } @@ -72,19 +74,8 @@ public class BasicVariation implements Variation { } @Override - public String getReference() { - return String.valueOf(mRef); - } - - @Override - public boolean isHet() { - if (mLength == 0 && mBases.charAt(0) != mBases.charAt(1)) return true; - return false; - } - - @Override - public boolean isHom() { - return !isHet(); + public char getReference() { + return (mRef); } @Override @@ -98,12 +89,29 @@ public class BasicVariation implements Variation { return false; } + /** + * are we an insertion or a deletion? yes, then return true. No? Well, false it is. + * + * @return true if we're an insertion or deletion + */ @Override - public char getAlternateBase() { - if (mLength != 0) { - throw new UnsupportedOperationException("Unable to get alternate base for indel / deletion"); - } - if (mBases.charAt(0) == mRef) return mBases.charAt(1); - else return mBases.charAt(0); + public boolean isIndel() { + return (isDeletion() || isInsertion()); } + + /** + * gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case + * of + * + * @return a char, representing the alternate base + */ + @Override + public char getAlternativeBaseForSNP() { + if (!this.isSNP()) throw new IllegalStateException("we're not a SNP"); + if (getAlternateBases().charAt(0) == this.getReference()) + return getAlternateBases().charAt(1); + return getAlternateBases().charAt(0); + } + + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/VariantBackedByGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/VariantBackedByGenotype.java new file mode 100644 index 000000000..964dc9521 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/VariantBackedByGenotype.java @@ -0,0 +1,18 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; + +/** + * @author aaron + *

+ * Interface VariantBackedByGenotype + *

+ * this variant is backed by genotypic information + */ +public interface VariantBackedByGenotype { + /** + * get the likelihoods + * @return an array in lexigraphical order of the likelihoods + */ + public Genotype getGenotype(DiploidGenotype x); +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Variation.java b/java/src/org/broadinstitute/sting/utils/genotype/Variation.java index 03637d256..5954f661a 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/Variation.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Variation.java @@ -46,13 +46,6 @@ public interface Variation { */ public boolean isDeletion(); - /** - * get the base representation of this Variant - * - * @return a string, of ploidy - */ - public String getBases(); - /** * get the location that this Variant represents * @@ -65,13 +58,7 @@ public interface Variation { * * @return the reference base or bases, as a string */ - public String getReference(); - - /** is our base representation heterozygous */ - public boolean isHet(); - - /** is our base representation homozygous */ - public boolean isHom(); + public char getReference(); /** * get the -1 * (log 10 of the error value) @@ -87,9 +74,22 @@ public interface Variation { public boolean isReference(); /** - * gets the alternate base. If this is homref, throws an UnsupportedOperationException + * gets the alternate bases. If this is homref, throws an UnsupportedOperationException * @return */ - public char getAlternateBase(); + public String getAlternateBases(); + + /** + * are we an insertion or a deletion? yes, then return true. No? Well, false it is. + * @return true if we're an insertion or deletion + */ + public boolean isIndel(); + + /** + * gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case + * of + * @return a char, representing the alternate base + */ + public char getAlternativeBaseForSNP(); } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java index e66000fd0..886ab7747 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java @@ -8,14 +8,12 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import org.junit.Assert; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import java.io.File; import java.util.ArrayList; -import java.util.List; /** * Created by IntelliJ IDEA. @@ -60,38 +58,7 @@ public class RodGLFTest extends BaseTest { assertEquals(finalRecordCount, counter); } - /** make sure we're returning true to biallelic */ - @Test - public void testIsBiallelic() { - RodGLF glf = iter.next(); - Assert.assertFalse(iter.isBiallelic()); - } - // best vs next-best - @Test - public void testGetConsensusConfidence() { - RodGLF glf = iter.next(); - assertEquals(120.0, iter.getConsensusConfidence(), 0.005); - glf = iter.next(); - assertEquals(120.0, iter.getConsensusConfidence(), 0.005); - } - - // best vs. ref - @Test - public void testGetVariationConfidence() { - RodGLF glf = iter.next(); - assertEquals(0.0, iter.getVariationConfidence(), 0.005); - glf = iter.next(); - assertEquals(250.0, iter.getVariationConfidence(), 0.005); - } - - - @Test - public void testGetGenotype() { - RodGLF glf = iter.next(); - List str = iter.getGenotype(); - Assert.assertTrue(str.get(0).equals("AA")); - } @Test public void testIsSNP() { @@ -112,7 +79,7 @@ public class RodGLFTest extends BaseTest { glf = iter.next(); Assert.assertFalse(iter.isReference()); } - + /* @Test(expected = IllegalStateException.class) public void testGetAltSnpFWDIllegalException() { RodGLF glf = iter.next(); @@ -159,7 +126,7 @@ public class RodGLFTest extends BaseTest { /** * move to the second and third bases, and check that the * alternate bases are correct. - */ + * @Test public void testGetAltBasesFWD() { RodGLF glf = iter.next(); @@ -169,7 +136,7 @@ public class RodGLFTest extends BaseTest { Assert.assertTrue("CT".equals(iter.getAltBasesFWD())); } - +*/ @Test public void testRodLocations() { GenomeLoc loc = null; @@ -177,7 +144,7 @@ public class RodGLFTest extends BaseTest { RodGLF glf = iter.next(); if (loc != null) { if (iter.getLocation().isBefore(loc)) { - fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + iter.getLocation().toString()); + Assert.fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + iter.getLocation().toString()); } } loc = iter.getLocation(); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/BasicGenotypeTest.java b/java/test/org/broadinstitute/sting/utils/genotype/BasicGenotypeTest.java new file mode 100644 index 000000000..c80ac3cc7 --- /dev/null +++ b/java/test/org/broadinstitute/sting/utils/genotype/BasicGenotypeTest.java @@ -0,0 +1,62 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.File; +import java.io.FileNotFoundException; + + +/** + * + * @author aaron + * + * Class BasicGenotypeTest + * + * tests the basic genotype class + */ +public class BasicGenotypeTest extends BaseTest { + private static IndexedFastaSequenceFile seq; + + @BeforeClass + public static void beforeTests() { + try { + seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); + } catch (FileNotFoundException e) { + throw new StingException("unable to load the sequence dictionary"); + } + GenomeLocParser.setupRefContigOrdering(seq); + + } + + @Test + public void testBasicGenotypeIsHom() { + BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0); + Assert.assertTrue(gt.isHom()); + BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0); + Assert.assertTrue(!gt2.isHom()); + } + + @Test + public void testBasicGenotypeIsHet() { + BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0); + Assert.assertTrue(!gt.isHet()); + BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0); + Assert.assertTrue(gt2.isHet()); + } + + @Test + public void testBasicGenotypeIsVariant() { + BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0); + Assert.assertTrue(!gt.isVariant('A')); + BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0); + Assert.assertTrue(gt2.isVariant('A')); + BasicGenotype gt3 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"TT",'A',0); + Assert.assertTrue(gt3.isVariant('A')); + } +}