diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/HapMapAlleleFrequenciesROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/HapMapAlleleFrequenciesROD.java deleted file mode 100644 index 0464d40a7..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/HapMapAlleleFrequenciesROD.java +++ /dev/null @@ -1,109 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import net.sf.samtools.util.SequenceUtil; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; - -import java.util.Arrays; -import java.util.List; - -/** - * ReferenceOrderedDatum class to hold HapMap AlleleFrequency Data - */ -public class HapMapAlleleFrequenciesROD extends BasicReferenceOrderedDatum { - public GenomeLoc loc; // genome location of SNP - // Reference sequence chromosome or scaffold - // Start and stop positions in chrom - - public String rsNumber; // dbsnp rsNumber for this site - - public String hgBuild; - - public char Strand; // strand of the supplied alleles - - public char refAllele; - public char varAllele; - - public double refFreq; - public double varFreq; - - - public String strand; // maybe we don't need these? - public String alleles; // maybe we don't need these? - public Integer refCounts; // maybe we don't need these? - public Integer varCounts; // maybe we don't need these? - public Integer totalCounts; // maybe we don't need these? - - public HapMapAlleleFrequenciesROD(String name) { - super(name); - } - - public GenomeLoc getLocation() { return loc; } - - public String toString() { - //rs11511647 HG18 chr10 62765 + T/C T C 21 97 0.178 0.822 118 - - return String.format( - "%s\t%s\t%s\t%d\t%s\t%s\t%s\t%s\t%d\t%d\t%1.3f\t%1.3f\t%d", - rsNumber, hgBuild, getLocation().getContig(), getLocation().getStart(), strand, alleles, refAllele, varAllele, - refCounts, varCounts, refFreq, varFreq, totalCounts); - } - - public String toSimpleString() { - return String.format("%s:%s:%s:%1.3f", rsNumber, alleles, strand, varFreq); - } - - public String repl() { - return toString(); - } - - public boolean parseLine(final Object header, final String[] parts) { - try { - // rs11511647 <=> HG18 <=> chr10 <=> 62765 <=> + <=> T/C <=> T <=> C <=> 21 <=> 97 <=> 0.178 <=> 0.822 <=> 118 - - rsNumber = parts[0]; //rs# - hgBuild = parts[1]; // build - - String contig = parts[2]; // chrom - long start = Long.parseLong(parts[3]); // The final is 1 based - long stop = start; - - strand = parts[4]; // strand - alleles = parts[5]; //alleles - refAllele = parts[6].charAt(0); // ref_allele - varAllele = parts[7].charAt(0); // var_allele - refCounts = Integer.parseInt(parts[8]); // CEU_ref - varCounts = Integer.parseInt(parts[9]); // CEU_var - refFreq = Double.parseDouble(parts[10]); // CEU_ref_freq - varFreq = Double.parseDouble(parts[11]); // CEU_var_freq - totalCounts = Integer.parseInt(parts[12]); // CEU_var - - loc = GenomeLocParser.parseGenomeLoc(contig, start, stop); - - } catch ( RuntimeException e ) { - System.out.printf(" Exception caught during parsing HapMap Allele Freq %s%n", Utils.join(" <=> ", parts)); - throw e; - } - return true; - } - - public double getVarAlleleFreq() { return this.varFreq; } - - public List getAllelesFWD() { - List alleleList; - if ( onFwdStrand() ) - alleleList = Arrays.asList(alleles.split("/")); - else - alleleList = Arrays.asList(SequenceUtil.reverseComplement(alleles).split("/")); - - return alleleList; - } - - public boolean onFwdStrand() { - return strand.equals("+"); - } - - - -} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java deleted file mode 100755 index 9ad2b8ac9..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java +++ /dev/null @@ -1,137 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.broadinstitute.sting.utils.genotype.Genotype; - -import java.util.Arrays; -import java.util.List; - -/** - * loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom - * chr1:1104840 A N 0.000000 -85.341265 -85.341265 0.000000 0.000000 324.000000 162 0 0 - * chr1:1104841 C N 0.000000 -69.937928 -69.937928 0.000000 0.000000 324.000000 162 0 0 - * chr1:1104842 A N 0.000000 -84.816002 -84.816002 0.000000 0.000000 324.000000 162 0 0 - * - */ -public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes, VariationRod { - public PooledEMSNPROD(final String name) { - super(name); - } - - //GenomeLoc getLocation(); - public String getRefBasesFWD() { return this.get("ref"); } - public char getRefSnpFWD() throws IllegalStateException { return getRefBasesFWD().charAt(0); } - public String getAltBasesFWD() { return this.get("alt"); } - public char getAltSnpFWD() throws IllegalStateException { return getAltBasesFWD().charAt(0); } - public boolean isReference() { return getVariationConfidence() < 0.01; } - - /** - * get the frequency of this variant - * - * @return VariantFrequency with the stored frequency - */ - @Override - public double getNonRefAlleleFrequency() { - return this.getMAF(); - } - - /** @return the VARIANT_TYPE of the current variant */ - @Override - public VARIANT_TYPE getType() { - if (isSNP()) { - return VARIANT_TYPE.SNP; - } - return VARIANT_TYPE.REFERENCE; - } - - public boolean isSNP() { return ! isReference(); } - public boolean isInsertion() { return false; } - public boolean isDeletion() { return false; } - - /** - * get the reference base(s) at this position - * - * @return the reference base or bases, as a string - */ - @Override - public String getReference() { - return this.get("ref"); - } - - public boolean isIndel() { return false; } - - /** - * gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP - * of - * - * @return a char, representing the alternate base - */ - @Override - public char getAlternativeBaseForSNP() { - return this.getAltSnpFWD(); - } - - /** - * gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP - * - * @return a char, representing the alternate base - */ - @Override - public char getReferenceForSNP() { - return this.getRefSnpFWD(); - } - - public double getMAF() { return Double.parseDouble(this.get("EM_alt_freq")); } - public double getHeterozygosity() { return 2 * getMAF() * (1 - getMAF()); } - public boolean isGenotype() { return false; } - public double getVariationConfidence() { return Double.parseDouble(this.get("lod")); } - public double getConsensusConfidence() { return -1; } - public List getGenotype() throws IllegalStateException { throw new IllegalStateException(); } - public int getPloidy() throws IllegalStateException { return 2; } - public boolean isBiallelic() { return true; } - - /** - * get the -1 * (log 10 of the error value) - * - * @return the log based error estimate - */ - @Override - public double getNegLog10PError() { - return this.getVariationConfidence(); - } - - /** - * gets the alternate alleles. This method should return all the alleles present at the location, - * NOT including the reference base. This is returned as a string list with no guarantee ordering - * of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest - * frequency). - * - * @return an alternate allele list - */ - @Override - public List getAlternateAlleleList() { - return Arrays.asList(getAltBasesFWD()); - } - - /** - * gets the alleles. This method should return all the alleles present at the location, - * including the reference base. The first allele should always be the reference allele, followed - * by an unordered list of alternate alleles. - * - * @return an alternate allele list - */ - @Override - public List getAlleleList() { - List alleles = Arrays.asList(this.getReference()); - alleles.addAll(getAlternateAlleleList()); - return alleles; - } - - public int length() { return 1; } - - // SNPCallFromGenotypes interface - public int nIndividuals() { return Integer.parseInt(this.get("EM_N")); } - public int nHomRefGenotypes() { return Integer.parseInt(this.get("n_ref")); } - public int nHetGenotypes() { return Integer.parseInt(this.get("n_het")); } - public int nHomVarGenotypes() { return Integer.parseInt(this.get("n_hom")); } - public List getGenotypes() { return null; } -} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java index 5a313e5d1..491924669 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java @@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.genotype.BasicGenotype; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; import java.util.*; import java.util.regex.MatchResult; @@ -21,7 +20,7 @@ import java.util.regex.Pattern; * Time: 10:47:14 AM * To change this template use File | Settings | File Templates. */ -public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype { +public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements VariationRod { private String contig, source, feature, strand, frame; private long start, stop; private double score; @@ -273,7 +272,6 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements * * @return a map in lexigraphical order of the genotypes */ - @Override public Genotype getCalledGenotype() { return new BasicGenotype(this.getLocation(),this.feature,Character.toString(this.getRefSnpFWD()),this.getConsensusConfidence()); } @@ -283,7 +281,6 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements * * @return an array in lexigraphical order of the likelihoods */ - @Override public List getGenotypes() { List ret = new ArrayList(); ret.add(new BasicGenotype(this.getLocation(),this.feature,Character.toString(this.getRefSnpFWD()),this.getConsensusConfidence())); @@ -298,7 +295,6 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements * * @return true if available, false otherwise */ - @Override public boolean hasGenotype(DiploidGenotype x) { if (!x.toString().equals(this.getAltBasesFWD())) return false; return true; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SangerSNPROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/SangerSNPROD.java deleted file mode 100755 index c4eff9ac9..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SangerSNPROD.java +++ /dev/null @@ -1,150 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.genotype.Genotype; - -import java.util.ArrayList; -import java.util.List; - -public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes { - public SangerSNPROD(final String name) { - super(name); - } - - public GenomeLoc getLocation() { - loc = GenomeLocParser.createGenomeLoc(this.get("0"), Long.parseLong(this.get("1"))); - return loc; - } - - /** - * get the reference base(s) at this position - * - * @return the reference base or bases, as a string - */ - @Override - public String getReference() { - return String.valueOf(getRefBasesFWD().charAt(0)); - } - - public String getRefBasesFWD() { return this.get("2"); } - public char getRefSnpFWD() throws IllegalStateException { return getRefBasesFWD().charAt(0); } - public String getAltBasesFWD() { return this.get("3"); } - public char getAltSnpFWD() throws IllegalStateException { return getAltBasesFWD().charAt(0); } - public boolean isReference() { return getVariationConfidence() < 0.01; } - - /** - * get the frequency of this variant, if we're a variant. If we're reference this method - * should return 0. - * - * @return double with the stored frequency - */ - @Override - public double getNonRefAlleleFrequency() { - return this.getMAF(); - } - - /** - * A convenience method, for switching over the variation type - * - * @return the VARIANT_TYPE of the current variant - */ - @Override - public VARIANT_TYPE getType() { - if (isReference()) return VARIANT_TYPE.REFERENCE; - else return VARIANT_TYPE.SNP; - } - - public boolean isSNP() { return ! isReference(); } - public boolean isInsertion() { return false; } - public boolean isDeletion() { return false; } - public boolean isIndel() { return false; } - - /** - * gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP - * of - * - * @return a char, representing the alternate base - */ - @Override - public char getAlternativeBaseForSNP() { - return this.getAltSnpFWD(); - } - - /** - * gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP - * - * @return a char, representing the alternate base - */ - @Override - public char getReferenceForSNP() { - return this.getRefSnpFWD(); - } - - public double getMAF() { return -1; } - public double getHeterozygosity() { return -1; } - public boolean isGenotype() { return false; } - public double getVariationConfidence() { return -1; } - public double getConsensusConfidence() { return -1; } - public List getGenotype() throws IllegalStateException { throw new IllegalStateException(); } - public int getPloidy() throws IllegalStateException { return 2; } - public boolean isBiallelic() { return true; } - - /** - * get the -1 * (log 10 of the error value) - * - * @return the postive number space log based error estimate - */ - @Override - public double getNegLog10PError() { - return this.getVariationConfidence(); - } - - /** - * gets the alternate alleles. This method should return all the alleles present at the location, - * NOT including the reference base. This is returned as a string list with no guarantee ordering - * of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest - * frequency). - * - * @return an alternate allele list - */ - @Override - public List getAlternateAlleleList() { - List ret = new ArrayList(); - for (char c : get("3").toCharArray()) - ret.add(String.valueOf(c)); - return ret; - } - - /** - * gets the alleles. This method should return all the alleles present at the location, - * including the reference base. The first allele should always be the reference allele, followed - * by an unordered list of alternate alleles. - * - * @return an alternate allele list - */ - @Override - public List getAlleleList() { - List ret = new ArrayList(); - ret.add(this.getReference()); - for (char c : get("3").toCharArray()) - ret.add(String.valueOf(c)); - return ret; - } - - public int length() { return 1; } - - // SNPCallFromGenotypes interface - public int nIndividuals() { return -1; } - public int nHomRefGenotypes() { return -1; } - public int nHetGenotypes() { return -1; } - public int nHomVarGenotypes() { return -1; } - public List getGenotypes() { return null; } - - public String toString() { - StringBuffer sb = new StringBuffer(); - sb.append(getLocation().getContig() + "\t" + getLocation().getStart() + "\t"); - sb.append(getRefBasesFWD() + "\t" + getAltBasesFWD()); - return sb.toString(); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java index 1e270b7f3..13d95ad08 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java @@ -56,15 +56,12 @@ public class RODTrackBuilder implements RMDTrackBuilder { // All known ROD types Types.put("GFF", RodGenotypeChipAsGFF.class); Types.put("dbSNP", rodDbSNP.class); - Types.put("HapMapAlleleFrequencies", HapMapAlleleFrequenciesROD.class); Types.put("SAMPileup", rodSAMPileup.class); Types.put("GELI", rodGELI.class); Types.put("RefSeq", rodRefSeq.class); Types.put("Table", TabularROD.class); Types.put("AnnotatorInputTable", AnnotatorROD.class); - Types.put("PooledEM", PooledEMSNPROD.class); Types.put("CleanedOutSNP", CleanedOutSNPROD.class); - Types.put("SangerSNP", SangerSNPROD.class); Types.put("SimpleIndel", SimpleIndelROD.class); Types.put("PointIndel", PointIndelROD.class); Types.put("HapMap", HapMapROD.class); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/refdata/HapmapVCFROD.java b/java/src/org/broadinstitute/sting/oneoffprojects/refdata/HapmapVCFROD.java index 3b20574df..8109d3c77 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/refdata/HapmapVCFROD.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/refdata/HapmapVCFROD.java @@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader; import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; @@ -23,7 +22,7 @@ import java.util.List; * User: chartl * Date: Jan 29, 2010 */ -public class HapmapVCFROD extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype, Iterator { +public class HapmapVCFROD extends BasicReferenceOrderedDatum implements VariationRod, Iterator { // This is a (hopefully temporary) wrapper class for certain VCF files that we want to protect from // utilities that grab genotypes or sample names across all VCF files diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java index a89f16d0c..ec1d5ecb1 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java @@ -2,14 +2,14 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodGLF; import org.broadinstitute.sting.gatk.refdata.RodVCF; +import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; import java.util.HashMap; @@ -112,23 +112,12 @@ public class AnalyzeAnnotationsWalker extends RodWalker { Object rod = feature.getUnderlyingObject(); if( rod != null && feature.getName().toUpperCase().startsWith("TRUTH") ) { isInTruthSet = true; - - // Next see if the truth sets say this site is variant or reference - if( rod instanceof RodVCF ) { - if( ((RodVCF) rod).isSNP() ) { - isTrueVariant = true; - } - } else if( rod instanceof RodGLF ) { - if( ((RodGLF) rod).isSNP() ) { - isTrueVariant = true; - } - } else if( rod instanceof VariantBackedByGenotype ) { - if( ((VariantBackedByGenotype) rod).getCalledGenotype().isVariant(ref.getBase()) ) { - isTrueVariant = true; - } - } else { - throw new StingException( "Truth ROD is of unknown ROD type: " + feature.getName() ); - } + VariantContext variantContext = VariantContextAdaptors.toVariantContext(feature.getName(),rod); + // First check that the conversion to VC worked correctly; next see if the truth sets say this site is variant or reference + if (variantContext == null) + throw new StingException("Truth ROD is of type that can't be converted to a VariantContext ( type = " + feature.getName() + ")"); + else if (variantContext.isSNP()) + isTrueVariant = true; } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/VariantBackedByGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/VariantBackedByGenotype.java deleted file mode 100644 index 3cb98e9f7..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/VariantBackedByGenotype.java +++ /dev/null @@ -1,35 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -import java.util.List; - -/** - * @author aaron - *

- * Interface VariantBackedByGenotype - *

- * this variant is backed by genotypic information - */ -public interface VariantBackedByGenotype { - /** - * get the genotype - * - * @return a specific genotype that represents the called genotype - */ - public Genotype getCalledGenotype(); - - /** - * get the genotype - * - * @return a map in lexigraphical order of the genotypes - */ - public List getGenotypes(); - /** - * do we have the specified genotype? not all backedByGenotypes - * have all the genotype data. - * - * @param x the genotype - * - * @return true if available, false otherwise - */ - public boolean hasGenotype(DiploidGenotype x); -}