removing some out-of-date RODs and some unused genotype writer formats

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3304 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-05-05 19:07:13 +00:00
parent c998c48a23
commit 6bbcc47b5d
6 changed files with 1 additions and 493 deletions

View File

@ -1,24 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
public class CleanedOutSNPROD extends TabularROD {
private static final String REAL_SNP_STRING = "SAME_SNP";
private static final String FALSE_SNP_STRING = "NOT_SNP";
public CleanedOutSNPROD(String name) {
super(name);
}
public GenomeLoc getLocation() {
return GenomeLocParser.parseGenomeLoc(this.get("0"));
}
public boolean isRealSNP() {
String s = this.get("1");
return s != null && s.equals(REAL_SNP_STRING);
}
}

View File

@ -1,15 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
public class PointIndelROD extends SimpleIndelROD {
public PointIndelROD(String name) {
super(name);
}
public GenomeLoc getLocation() {
return GenomeLocParser.createGenomeLoc(this.get("0"), Long.parseLong(this.get("1")));
}
}

View File

@ -1,275 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import java.util.*;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
/**
* Class for representing arbitrary reference ordered data sets
*
* User: mdepristo
* Date: Feb 27, 2009
* Time: 10:47:14 AM
* To change this template use File | Settings | File Templates.
*/
public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum {
private String contig, source, feature, strand, frame;
private long start, stop;
private double score;
private HashMap<String, String> attributes;
// ----------------------------------------------------------------------
//
// Constructors
//
// ----------------------------------------------------------------------
public RodGenotypeChipAsGFF(final String name) {
super(name);
}
public void setValues(final String contig, final String source, final String feature,
final long start, final long stop, final double score,
final String strand, final String frame, HashMap<String, String> attributes) {
this.contig = contig;
this.source = source;
this.feature = feature;
this.start = start;
this.stop= stop;
this.score = score;
this.strand = strand;
this.frame = frame;
this.attributes = attributes;
}
// ----------------------------------------------------------------------
//
// Accessors
//
// ----------------------------------------------------------------------
public String getSource() {
return source;
}
public String getFeature() {
return feature;
}
public String getStrand() {
return strand;
}
public String getFrame() {
return frame;
}
public double getScore() {
return score;
}
public GenomeLoc getLocation() {
return GenomeLocParser.parseGenomeLoc(contig, start, stop);
}
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
public String getReference() {
throw new IllegalStateException("Chip data is unable to determine the reference");
}
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
public double getNegLog10PError() {
return 4; // 1/10000 error
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
public List<String> getAlternateAlleleList() {
throw new StingException("Hapmap is unable to provide an alternate allele list; the reference is unknown");
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
public List<String> getAlleleList() {
List<String> ret = new ArrayList<String>();
for (char c : feature.toCharArray())
ret.add(String.valueOf(c));
return ret;
}
public String getAttribute(final String key) {
return attributes.get(key);
}
public boolean containsAttribute(final String key) {
return attributes.containsKey(key);
}
public HashMap<String,String> getAttributes() {
return attributes;
}
public String getAttributeString() {
String[] strings = new String[attributes.size()];
int i = 0;
for ( Map.Entry<String, String> pair : attributes.entrySet() ) {
strings[i++] = pair.getKey() + " " + pair.getValue();
//strings[i++] = "(" + pair.getKey() + ") (" + pair.getValue() + ")";
}
return Utils.join(" ; ", strings);
}
// ----------------------------------------------------------------------
//
// formatting
//
// ----------------------------------------------------------------------
public String toString() {
return String.format("%s\t%s\t%s\t%d\t%d\t%f\t%s\t%s\t%s", contig, source, feature, start, stop+1, score, strand, frame, getAttributeString());
}
public String repl() {
return this.toString();
}
public String toSimpleString() {
return String.format("chip-genotype: %s", feature);
}
private static Pattern GFF_DELIM = Pattern.compile("\\s+;\\s*");
private static Pattern GFF_ATTRIBUTE_PATTERN = Pattern.compile("([A-Za-z][A-Za-z0-9_]*)((?:\\s+\\S+)+)");
final private HashMap<String, String> parseAttributes( final String attributeLine ) {
HashMap<String, String> attributes = new HashMap<String, String>();
Scanner scanner = new Scanner(attributeLine);
scanner.useDelimiter(GFF_DELIM);
while ( scanner.hasNext(GFF_ATTRIBUTE_PATTERN) ) {
MatchResult result = scanner.match();
String key = result.group(1);
String value = result.group(2).replace("\"", "").trim();
//System.out.printf(" Adding %s / %s (total %d)%n", key, value, result.groupCount());
attributes.put(key, value);
String n = scanner.next();
//System.out.printf(" next is %s%n", n);
}
return attributes;
}
public boolean parseLine(final Object header, final String[] parts) {
//System.out.printf("Parsing GFFLine %s%n", Utils.join(" ", parts));
final String contig = parts[0];
final String source = parts[1];
final String feature = parts[2];
final long start = Long.parseLong(parts[3]);
final long stop = Long.parseLong(parts[4])-1;
double score = Double.NaN;
if ( ! parts[5].equals(".") )
score = Double.parseDouble(parts[5]);
final String strand = parts[6];
final String frame = parts[7];
final String attributeParts = Utils.join(" ", parts, 8, parts.length);
HashMap<String, String> attributes = parseAttributes(attributeParts);
setValues(contig, source, feature, start, stop, score, strand, frame, attributes);
return true;
}
public String getRefBasesFWD() { return null; }
public char getRefSnpFWD() throws IllegalStateException { return 0; }
public String getAltBasesFWD() { return null; }
public char getAltSnpFWD() throws IllegalStateException { return 0; }
public boolean isReference() { return ! isSNP(); }
/**
* get the frequency of this variant
*
* @return VariantFrequency with the stored frequency
*/
public double getNonRefAlleleFrequency() {
return this.getMAF();
}
public boolean isSNP() { return false; }
public boolean isInsertion() { return false; }
public boolean isDeletion() { return false; }
public boolean isIndel() { return false; }
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
*
* @return a char, representing the alternate base
*/
public char getAlternativeBaseForSNP() {
return this.getAltSnpFWD();
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
*/
public char getReferenceForSNP() {
return this.getRefSnpFWD();
}
public double getMAF() { return 0; }
public double getHeterozygosity() { return 0; }
public boolean isGenotype() { return true; }
public double getVariationConfidence() { return score; }
public double getConsensusConfidence() { return score; }
public List<String> getGenotype() throws IllegalStateException {
//System.out.printf("feature = %s%n", feature);
return Arrays.asList(feature);
}
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
public int length() { return 1; }
/**
* do we have the specified genotype? not all backedByGenotypes
* have all the genotype data.
*
* @param x the genotype
*
* @return true if available, false otherwise
*/
public boolean hasGenotype(DiploidGenotype x) {
if (!x.toString().equals(this.getAltBasesFWD())) return false;
return true;
}
}

View File

@ -1,174 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Arrays;
import java.util.List;
public class SimpleIndelROD extends TabularROD {
private boolean KGENOMES_FORMAT = false, checkedFormat = false;
public SimpleIndelROD(String name) {
super(name);
}
public GenomeLoc getLocation() {
long pos = Long.parseLong(this.get("1"));
return GenomeLocParser.createGenomeLoc(this.get("0"), pos, (isDeletion() ? pos+length() : pos+1));
}
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
public String getReference() {
return String.valueOf(getRef());
}
public List<String> getFWDAlleles() {
if ( is1KGFormat() )
return Arrays.asList(this.get("4"));
String str = this.get("3");
int pos = str.indexOf(":");
if ( pos == -1 ) {
return Arrays.asList(str.substring(1));
}
return Arrays.asList(str.substring(1, pos));
}
public String getFWDRefBases() { return ""; }
public String getAltBasesFWD() { return getFWDAlleles().get(0); }
public String getRefBasesFWD() { return ""; }
public char getRefSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
public char getAltSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
public char getRef() { return 'N'; }
public List<String> getGenotype() { return getFWDAlleles(); }
public boolean isGenotype() { return false; }
public boolean isPointGenotype() { return false; }
public boolean isIndelGenotype() { return true; }
/**
* get the frequency of this variant
*
* @return VariantFrequency with the stored frequency
*/
public double getNonRefAlleleFrequency() {
return 0.0;
}
public boolean isSNP() { return false; }
public boolean isReference() { return false; }
public boolean isInsertion() {
if ( is1KGFormat() )
return this.get("3").equals("I");
return this.get("3").charAt(0) == '+';
}
public boolean isDeletion() {
if ( is1KGFormat() )
return this.get("3").equals("D");
return this.get("3").charAt(0) == '-';
}
public boolean isIndel() { return true; }
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
* of
*
* @return a char, representing the alternate base
*/
public char getAlternativeBaseForSNP() {
return getAltSnpFWD();
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
*/
public char getReferenceForSNP() {
return getRefSnpFWD();
}
public double getVariantConfidence() { return 0.0; }
public double getVariationConfidence() { return 0.0; }
public double getConsensusConfidence() { return 0.0; }
public boolean isBiallelic() { return true; }
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
public double getNegLog10PError() {
return getVariationConfidence();
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
public List<String> getAlternateAlleleList() {
List<String> ret = getAlleleList();
for (String val : ret) {
if (val.equals(this.getReference())) ret.remove(val);
}
return ret;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
public List<String> getAlleleList() {
return this.getFWDAlleles();
}
public boolean isHom() { return false; }
public boolean isHet() { return false; }
public double getHeterozygosity() { return 0.0; }
public double getMAF() { return 0.0; }
public int getPloidy() { return 2; }
public int length() {
if ( is1KGFormat() )
return Integer.parseInt(this.get("2"));
return getFWDAlleles().get(0).length();
}
public boolean allowIncompleteRecords() {
return true;
}
public String getSamplesString() {
return (is1KGFormat() && this.get("5") != null ? this.get("5") : "");
}
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append(getLocation().getContig() + "\t" + getLocation().getStart() + "\t");
sb.append(length() + "\t" + (isInsertion() ? "I" : "D") + "\t" + getFWDAlleles().get(0));
String samples = getSamplesString();
if ( samples.length() > 0 )
sb.append("\t" + samples);
return sb.toString();
}
private boolean is1KGFormat() {
if ( !checkedFormat ) {
checkedFormat = true;
KGENOMES_FORMAT = this.get("3").equals("D") || this.get("3").equals("I");
}
return KGENOMES_FORMAT;
}
}

View File

@ -54,15 +54,11 @@ public class RODTrackBuilder implements RMDTrackBuilder {
static {
// All known ROD types
Types.put("GFF", RodGenotypeChipAsGFF.class);
Types.put("SAMPileup", rodSAMPileup.class);
Types.put("GELI", rodGELI.class);
Types.put("RefSeq", rodRefSeq.class);
Types.put("Table", TabularROD.class);
Types.put("AnnotatorInputTable", AnnotatorROD.class);
Types.put("CleanedOutSNP", CleanedOutSNPROD.class);
Types.put("SimpleIndel", SimpleIndelROD.class);
Types.put("PointIndel", PointIndelROD.class);
Types.put("HapMap", HapMapROD.class);
Types.put("Intervals", IntervalRod.class);
Types.put("GLF", RodGLF.class);

View File

@ -22,7 +22,7 @@ import java.util.Set;
public class GenotypeWriterFactory {
/** available genotype writers */
public enum GENOTYPE_FORMAT {
GELI, GLF, GFF, TABULAR, GELI_BINARY, VCF
GELI, GLF, GELI_BINARY, VCF
}
/**