removing some outdated ROD classes (PooledEMSNPROD and SangerSNPROD), removing an out-of-date interface (VariantBackedByBenotype), and moving AnalyzeAnnotationWalker over to VariationContext.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3188 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-04-16 18:59:29 +00:00
parent d5e5589b8f
commit e11ca74eb5
8 changed files with 10 additions and 460 deletions

View File

@ -1,109 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.util.SequenceUtil;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import java.util.Arrays;
import java.util.List;
/**
* ReferenceOrderedDatum class to hold HapMap AlleleFrequency Data
*/
public class HapMapAlleleFrequenciesROD extends BasicReferenceOrderedDatum {
public GenomeLoc loc; // genome location of SNP
// Reference sequence chromosome or scaffold
// Start and stop positions in chrom
public String rsNumber; // dbsnp rsNumber for this site
public String hgBuild;
public char Strand; // strand of the supplied alleles
public char refAllele;
public char varAllele;
public double refFreq;
public double varFreq;
public String strand; // maybe we don't need these?
public String alleles; // maybe we don't need these?
public Integer refCounts; // maybe we don't need these?
public Integer varCounts; // maybe we don't need these?
public Integer totalCounts; // maybe we don't need these?
public HapMapAlleleFrequenciesROD(String name) {
super(name);
}
public GenomeLoc getLocation() { return loc; }
public String toString() {
//rs11511647 HG18 chr10 62765 + T/C T C 21 97 0.178 0.822 118
return String.format(
"%s\t%s\t%s\t%d\t%s\t%s\t%s\t%s\t%d\t%d\t%1.3f\t%1.3f\t%d",
rsNumber, hgBuild, getLocation().getContig(), getLocation().getStart(), strand, alleles, refAllele, varAllele,
refCounts, varCounts, refFreq, varFreq, totalCounts);
}
public String toSimpleString() {
return String.format("%s:%s:%s:%1.3f", rsNumber, alleles, strand, varFreq);
}
public String repl() {
return toString();
}
public boolean parseLine(final Object header, final String[] parts) {
try {
// rs11511647 <=> HG18 <=> chr10 <=> 62765 <=> + <=> T/C <=> T <=> C <=> 21 <=> 97 <=> 0.178 <=> 0.822 <=> 118
rsNumber = parts[0]; //rs#
hgBuild = parts[1]; // build
String contig = parts[2]; // chrom
long start = Long.parseLong(parts[3]); // The final is 1 based
long stop = start;
strand = parts[4]; // strand
alleles = parts[5]; //alleles
refAllele = parts[6].charAt(0); // ref_allele
varAllele = parts[7].charAt(0); // var_allele
refCounts = Integer.parseInt(parts[8]); // CEU_ref
varCounts = Integer.parseInt(parts[9]); // CEU_var
refFreq = Double.parseDouble(parts[10]); // CEU_ref_freq
varFreq = Double.parseDouble(parts[11]); // CEU_var_freq
totalCounts = Integer.parseInt(parts[12]); // CEU_var
loc = GenomeLocParser.parseGenomeLoc(contig, start, stop);
} catch ( RuntimeException e ) {
System.out.printf(" Exception caught during parsing HapMap Allele Freq %s%n", Utils.join(" <=> ", parts));
throw e;
}
return true;
}
public double getVarAlleleFreq() { return this.varFreq; }
public List<String> getAllelesFWD() {
List<String> alleleList;
if ( onFwdStrand() )
alleleList = Arrays.asList(alleles.split("/"));
else
alleleList = Arrays.asList(SequenceUtil.reverseComplement(alleles).split("/"));
return alleleList;
}
public boolean onFwdStrand() {
return strand.equals("+");
}
}

View File

@ -1,137 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.genotype.Genotype;
import java.util.Arrays;
import java.util.List;
/**
* loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom
* chr1:1104840 A N 0.000000 -85.341265 -85.341265 0.000000 0.000000 324.000000 162 0 0
* chr1:1104841 C N 0.000000 -69.937928 -69.937928 0.000000 0.000000 324.000000 162 0 0
* chr1:1104842 A N 0.000000 -84.816002 -84.816002 0.000000 0.000000 324.000000 162 0 0
*
*/
public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes, VariationRod {
public PooledEMSNPROD(final String name) {
super(name);
}
//GenomeLoc getLocation();
public String getRefBasesFWD() { return this.get("ref"); }
public char getRefSnpFWD() throws IllegalStateException { return getRefBasesFWD().charAt(0); }
public String getAltBasesFWD() { return this.get("alt"); }
public char getAltSnpFWD() throws IllegalStateException { return getAltBasesFWD().charAt(0); }
public boolean isReference() { return getVariationConfidence() < 0.01; }
/**
* get the frequency of this variant
*
* @return VariantFrequency with the stored frequency
*/
@Override
public double getNonRefAlleleFrequency() {
return this.getMAF();
}
/** @return the VARIANT_TYPE of the current variant */
@Override
public VARIANT_TYPE getType() {
if (isSNP()) {
return VARIANT_TYPE.SNP;
}
return VARIANT_TYPE.REFERENCE;
}
public boolean isSNP() { return ! isReference(); }
public boolean isInsertion() { return false; }
public boolean isDeletion() { return false; }
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
@Override
public String getReference() {
return this.get("ref");
}
public boolean isIndel() { return false; }
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
* of
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
return this.getAltSnpFWD();
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
*/
@Override
public char getReferenceForSNP() {
return this.getRefSnpFWD();
}
public double getMAF() { return Double.parseDouble(this.get("EM_alt_freq")); }
public double getHeterozygosity() { return 2 * getMAF() * (1 - getMAF()); }
public boolean isGenotype() { return false; }
public double getVariationConfidence() { return Double.parseDouble(this.get("lod")); }
public double getConsensusConfidence() { return -1; }
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
@Override
public double getNegLog10PError() {
return this.getVariationConfidence();
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
return Arrays.asList(getAltBasesFWD());
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
List<String> alleles = Arrays.asList(this.getReference());
alleles.addAll(getAlternateAlleleList());
return alleles;
}
public int length() { return 1; }
// SNPCallFromGenotypes interface
public int nIndividuals() { return Integer.parseInt(this.get("EM_N")); }
public int nHomRefGenotypes() { return Integer.parseInt(this.get("n_ref")); }
public int nHetGenotypes() { return Integer.parseInt(this.get("n_het")); }
public int nHomVarGenotypes() { return Integer.parseInt(this.get("n_hom")); }
public List<Genotype> getGenotypes() { return null; }
}

View File

@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import java.util.*;
import java.util.regex.MatchResult;
@ -21,7 +20,7 @@ import java.util.regex.Pattern;
* Time: 10:47:14 AM
* To change this template use File | Settings | File Templates.
*/
public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype {
public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements VariationRod {
private String contig, source, feature, strand, frame;
private long start, stop;
private double score;
@ -273,7 +272,6 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
*
* @return a map in lexigraphical order of the genotypes
*/
@Override
public Genotype getCalledGenotype() {
return new BasicGenotype(this.getLocation(),this.feature,Character.toString(this.getRefSnpFWD()),this.getConsensusConfidence());
}
@ -283,7 +281,6 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
*
* @return an array in lexigraphical order of the likelihoods
*/
@Override
public List<Genotype> getGenotypes() {
List<Genotype> ret = new ArrayList<Genotype>();
ret.add(new BasicGenotype(this.getLocation(),this.feature,Character.toString(this.getRefSnpFWD()),this.getConsensusConfidence()));
@ -298,7 +295,6 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
*
* @return true if available, false otherwise
*/
@Override
public boolean hasGenotype(DiploidGenotype x) {
if (!x.toString().equals(this.getAltBasesFWD())) return false;
return true;

View File

@ -1,150 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.genotype.Genotype;
import java.util.ArrayList;
import java.util.List;
public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
public SangerSNPROD(final String name) {
super(name);
}
public GenomeLoc getLocation() {
loc = GenomeLocParser.createGenomeLoc(this.get("0"), Long.parseLong(this.get("1")));
return loc;
}
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
@Override
public String getReference() {
return String.valueOf(getRefBasesFWD().charAt(0));
}
public String getRefBasesFWD() { return this.get("2"); }
public char getRefSnpFWD() throws IllegalStateException { return getRefBasesFWD().charAt(0); }
public String getAltBasesFWD() { return this.get("3"); }
public char getAltSnpFWD() throws IllegalStateException { return getAltBasesFWD().charAt(0); }
public boolean isReference() { return getVariationConfidence() < 0.01; }
/**
* get the frequency of this variant, if we're a variant. If we're reference this method
* should return 0.
*
* @return double with the stored frequency
*/
@Override
public double getNonRefAlleleFrequency() {
return this.getMAF();
}
/**
* A convenience method, for switching over the variation type
*
* @return the VARIANT_TYPE of the current variant
*/
@Override
public VARIANT_TYPE getType() {
if (isReference()) return VARIANT_TYPE.REFERENCE;
else return VARIANT_TYPE.SNP;
}
public boolean isSNP() { return ! isReference(); }
public boolean isInsertion() { return false; }
public boolean isDeletion() { return false; }
public boolean isIndel() { return false; }
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
* of
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
return this.getAltSnpFWD();
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
*/
@Override
public char getReferenceForSNP() {
return this.getRefSnpFWD();
}
public double getMAF() { return -1; }
public double getHeterozygosity() { return -1; }
public boolean isGenotype() { return false; }
public double getVariationConfidence() { return -1; }
public double getConsensusConfidence() { return -1; }
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
/**
* get the -1 * (log 10 of the error value)
*
* @return the postive number space log based error estimate
*/
@Override
public double getNegLog10PError() {
return this.getVariationConfidence();
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
List<String> ret = new ArrayList<String>();
for (char c : get("3").toCharArray())
ret.add(String.valueOf(c));
return ret;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
List<String> ret = new ArrayList<String>();
ret.add(this.getReference());
for (char c : get("3").toCharArray())
ret.add(String.valueOf(c));
return ret;
}
public int length() { return 1; }
// SNPCallFromGenotypes interface
public int nIndividuals() { return -1; }
public int nHomRefGenotypes() { return -1; }
public int nHetGenotypes() { return -1; }
public int nHomVarGenotypes() { return -1; }
public List<Genotype> getGenotypes() { return null; }
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append(getLocation().getContig() + "\t" + getLocation().getStart() + "\t");
sb.append(getRefBasesFWD() + "\t" + getAltBasesFWD());
return sb.toString();
}
}

View File

@ -56,15 +56,12 @@ public class RODTrackBuilder implements RMDTrackBuilder {
// All known ROD types
Types.put("GFF", RodGenotypeChipAsGFF.class);
Types.put("dbSNP", rodDbSNP.class);
Types.put("HapMapAlleleFrequencies", HapMapAlleleFrequenciesROD.class);
Types.put("SAMPileup", rodSAMPileup.class);
Types.put("GELI", rodGELI.class);
Types.put("RefSeq", rodRefSeq.class);
Types.put("Table", TabularROD.class);
Types.put("AnnotatorInputTable", AnnotatorROD.class);
Types.put("PooledEM", PooledEMSNPROD.class);
Types.put("CleanedOutSNP", CleanedOutSNPROD.class);
Types.put("SangerSNP", SangerSNPROD.class);
Types.put("SimpleIndel", SimpleIndelROD.class);
Types.put("PointIndel", PointIndelROD.class);
Types.put("HapMap", HapMapROD.class);

View File

@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
@ -23,7 +22,7 @@ import java.util.List;
* User: chartl
* Date: Jan 29, 2010
*/
public class HapmapVCFROD extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype, Iterator<HapmapVCFROD> {
public class HapmapVCFROD extends BasicReferenceOrderedDatum implements VariationRod, Iterator<HapmapVCFROD> {
// This is a (hopefully temporary) wrapper class for certain VCF files that we want to protect from
// utilities that grab genotypes or sample names across all VCF files

View File

@ -2,14 +2,14 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodGLF;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import java.util.HashMap;
@ -112,23 +112,12 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
Object rod = feature.getUnderlyingObject();
if( rod != null && feature.getName().toUpperCase().startsWith("TRUTH") ) {
isInTruthSet = true;
// Next see if the truth sets say this site is variant or reference
if( rod instanceof RodVCF ) {
if( ((RodVCF) rod).isSNP() ) {
isTrueVariant = true;
}
} else if( rod instanceof RodGLF ) {
if( ((RodGLF) rod).isSNP() ) {
isTrueVariant = true;
}
} else if( rod instanceof VariantBackedByGenotype ) {
if( ((VariantBackedByGenotype) rod).getCalledGenotype().isVariant(ref.getBase()) ) {
isTrueVariant = true;
}
} else {
throw new StingException( "Truth ROD is of unknown ROD type: " + feature.getName() );
}
VariantContext variantContext = VariantContextAdaptors.toVariantContext(feature.getName(),rod);
// First check that the conversion to VC worked correctly; next see if the truth sets say this site is variant or reference
if (variantContext == null)
throw new StingException("Truth ROD is of type that can't be converted to a VariantContext ( type = " + feature.getName() + ")");
else if (variantContext.isSNP())
isTrueVariant = true;
}
}

View File

@ -1,35 +0,0 @@
package org.broadinstitute.sting.utils.genotype;
import java.util.List;
/**
* @author aaron
* <p/>
* Interface VariantBackedByGenotype
* <p/>
* this variant is backed by genotypic information
*/
public interface VariantBackedByGenotype {
/**
* get the genotype
*
* @return a specific genotype that represents the called genotype
*/
public Genotype getCalledGenotype();
/**
* get the genotype
*
* @return a map in lexigraphical order of the genotypes
*/
public List<Genotype> getGenotypes();
/**
* do we have the specified genotype? not all backedByGenotypes
* have all the genotype data.
*
* @param x the genotype
*
* @return true if available, false otherwise
*/
public boolean hasGenotype(DiploidGenotype x);
}