From f783cb30e0082f923fc9b72144671b8537b07ca4 Mon Sep 17 00:00:00 2001 From: aaron Date: Wed, 23 Sep 2009 18:24:05 +0000 Subject: [PATCH] adding an interface so that the current @Requires with ROD annotations work in walkers like VariantEval git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1700 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/RodGLF.java | 5 +- .../sting/gatk/refdata/RodGeliText.java | 5 +- .../gatk/refdata/RodGenotypeChipAsGFF.java | 2 +- .../sting/gatk/refdata/RodVCF.java | 2 +- .../sting/gatk/refdata/SimpleIndelROD.java | 86 ++++++++++++++++++- .../sting/gatk/refdata/VariationRod.java | 15 ++++ .../sting/gatk/refdata/rodDbSNP.java | 2 +- .../varianteval/VariantEvalWalker.java | 4 +- 8 files changed, 108 insertions(+), 13 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/VariationRod.java diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java index 4977b195e..d203c948a 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java @@ -5,7 +5,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.genotype.LikelihoodObject; -import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.glf.GLFReader; import org.broadinstitute.sting.utils.genotype.glf.GLFRecord; import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall; @@ -14,9 +13,9 @@ import org.broadinstitute.sting.utils.genotype.glf.VariableLengthCall; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import java.util.ArrayList; /** @@ -26,7 +25,7 @@ import java.util.ArrayList; *

* the rod class for GLF data. */ -public class RodGLF implements ReferenceOrderedDatum, Variation, Iterator { +public class RodGLF implements VariationRod, Iterator { static int count = 0; public GLFReader mReader; private final String mName; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java index c426c8720..cc8d6c603 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java @@ -29,16 +29,15 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.genotype.BasicGenotype; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; -import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; -public class RodGeliText extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, AllelicVariant { +public class RodGeliText extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype, AllelicVariant { public enum Genotype_Strings { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java index 97f79db9e..dd67cde3e 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java @@ -18,7 +18,7 @@ import java.util.regex.Pattern; * Time: 10:47:14 AM * To change this template use File | Settings | File Templates. */ -public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements AllelicVariant, Variation, VariantBackedByGenotype { +public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements AllelicVariant, VariationRod, VariantBackedByGenotype { private String contig, source, feature, strand, frame; private long start, stop; private double score; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java index 17766e430..bbbc831ab 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java @@ -25,7 +25,7 @@ import java.util.List; *

* An implementation of the ROD for VCF. */ -public class RodVCF extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, Iterator { +public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype, Iterator { // our VCF related information private VCFReader mReader; public VCFRecord mCurrentRecord; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java index 85579645d..a21493719 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java @@ -3,9 +3,10 @@ package org.broadinstitute.sting.gatk.refdata; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import java.util.*; +import java.util.Arrays; +import java.util.List; -public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVariant { +public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVariant, VariationRod { private boolean KGENOMES_FORMAT = false, checkedFormat = false; @@ -18,6 +19,16 @@ public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVaria return GenomeLocParser.createGenomeLoc(this.get("0"), pos, (isDeletion() ? pos+length() : pos+1)); } + /** + * get the reference base(s) at this position + * + * @return the reference base or bases, as a string + */ + @Override + public String getReference() { + return String.valueOf(getRef()); + } + public List getFWDAlleles() { if ( is1KGFormat() ) return Arrays.asList(this.get("4")); @@ -36,8 +47,46 @@ public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVaria public boolean isGenotype() { return false; } public boolean isPointGenotype() { return false; } public boolean isIndelGenotype() { return true; } + + /** + * get the frequency of this variant + * + * @return VariantFrequency with the stored frequency + */ + @Override + public double getNonRefAlleleFrequency() { + return 0.0; + } + + /** @return the VARIANT_TYPE of the current variant */ + @Override + public VARIANT_TYPE getType() { + return VARIANT_TYPE.INDEL; + } + public boolean isSNP() { return false; } public boolean isReference() { return false; } + + /** + * gets the alternate base. Use this method if we're biallelic + * + * @return + */ + @Override + public String getAlternateBase() { + return ""; + } + + /** + * gets the alternate bases. Use this method if teh allele count is greater then 2 + * + * @return + */ + @Override + public List getAlternateBases() { + return Arrays.asList(new String[]{""}); + } + public boolean isInsertion() { if ( is1KGFormat() ) return this.get("3").equals("I"); @@ -49,10 +98,43 @@ public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVaria return this.get("3").charAt(0) == '-'; } public boolean isIndel() { return true; } + + /** + * gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP + * of + * + * @return a char, representing the alternate base + */ + @Override + public char getAlternativeBaseForSNP() { + return getAltSnpFWD(); + } + + /** + * gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP + * + * @return a char, representing the alternate base + */ + @Override + public char getReferenceForSNP() { + return getRefSnpFWD(); + } + public double getVariantConfidence() { return 0.0; } public double getVariationConfidence() { return 0.0; } public double getConsensusConfidence() { return 0.0; } public boolean isBiallelic() { return true; } + + /** + * get the -1 * (log 10 of the error value) + * + * @return the log based error estimate + */ + @Override + public double getNegLog10PError() { + return getVariationConfidence(); + } + public boolean isHom() { return false; } public boolean isHet() { return false; } public double getHeterozygosity() { return 0.0; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariationRod.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariationRod.java new file mode 100644 index 000000000..474914ae2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariationRod.java @@ -0,0 +1,15 @@ +package org.broadinstitute.sting.gatk.refdata; + +import org.broadinstitute.sting.utils.genotype.Variation; + +/** + * @author aaron + *

+ * Interface VariationRod + *

+ * This interface combines two interfaces: Variation and ReferenceOrderedDatum. This + * was required so that the reference ordered data require attribute would have an interface + * that both specified variation and ROD compliance. + */ +public interface VariationRod extends Variation, ReferenceOrderedDatum { +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java index c74ff0335..f10504dc5 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java @@ -18,7 +18,7 @@ import java.util.List; * Time: 10:47:14 AM * To change this template use File | Settings | File Templates. */ -public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, AllelicVariant { +public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype, AllelicVariant { public GenomeLoc loc; // genome location of SNP // Reference sequence chromosome or scaffold // Start and stop positions in chrom diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index 44ea6f7aa..91b82807e 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -26,8 +26,8 @@ import java.util.*; * */ @By(DataSource.REFERENCE) -@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=RodGeliText.class)}) // right now we have no base variant class for rods, this should change -@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=RodGeliText.class), @RMD(name="dbsnp",type=rodDbSNP.class),@RMD(name="hapmap-chip",type=RodGenotypeChipAsGFF.class), @RMD(name="interval",type=IntervalRod.class)}) +@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=VariationRod.class)}) // right now we have no base variant class for rods, this should change +@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=VariationRod.class), @RMD(name="dbsnp",type=rodDbSNP.class),@RMD(name="hapmap-chip",type=RodGenotypeChipAsGFF.class), @RMD(name="interval",type=IntervalRod.class)}) public class VariantEvalWalker extends RefWalker { @Argument(shortName="minConfidenceScore", doc="Minimum confidence score to consider an evaluation SNP a variant", required=false) public int minConfidenceScore = -1;