removing some out-of-date RODs and some unused genotype writer formats
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3304 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c998c48a23
commit
6bbcc47b5d
|
|
@ -1,24 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
|
||||
public class CleanedOutSNPROD extends TabularROD {
|
||||
|
||||
private static final String REAL_SNP_STRING = "SAME_SNP";
|
||||
private static final String FALSE_SNP_STRING = "NOT_SNP";
|
||||
|
||||
public CleanedOutSNPROD(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
return GenomeLocParser.parseGenomeLoc(this.get("0"));
|
||||
}
|
||||
|
||||
public boolean isRealSNP() {
|
||||
String s = this.get("1");
|
||||
return s != null && s.equals(REAL_SNP_STRING);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
public class PointIndelROD extends SimpleIndelROD {
|
||||
|
||||
public PointIndelROD(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
return GenomeLocParser.createGenomeLoc(this.get("0"), Long.parseLong(this.get("1")));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,275 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.MatchResult;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Class for representing arbitrary reference ordered data sets
|
||||
*
|
||||
* User: mdepristo
|
||||
* Date: Feb 27, 2009
|
||||
* Time: 10:47:14 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum {
|
||||
private String contig, source, feature, strand, frame;
|
||||
private long start, stop;
|
||||
private double score;
|
||||
private HashMap<String, String> attributes;
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Constructors
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public RodGenotypeChipAsGFF(final String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
public void setValues(final String contig, final String source, final String feature,
|
||||
final long start, final long stop, final double score,
|
||||
final String strand, final String frame, HashMap<String, String> attributes) {
|
||||
this.contig = contig;
|
||||
this.source = source;
|
||||
this.feature = feature;
|
||||
this.start = start;
|
||||
this.stop= stop;
|
||||
this.score = score;
|
||||
this.strand = strand;
|
||||
this.frame = frame;
|
||||
this.attributes = attributes;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Accessors
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public String getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
public String getFeature() {
|
||||
return feature;
|
||||
}
|
||||
|
||||
public String getStrand() {
|
||||
return strand;
|
||||
}
|
||||
|
||||
public String getFrame() {
|
||||
return frame;
|
||||
}
|
||||
|
||||
public double getScore() {
|
||||
return score;
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
return GenomeLocParser.parseGenomeLoc(contig, start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference base(s) at this position
|
||||
*
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
|
||||
public String getReference() {
|
||||
throw new IllegalStateException("Chip data is unable to determine the reference");
|
||||
}
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
* @return the log based error estimate
|
||||
*/
|
||||
|
||||
public double getNegLog10PError() {
|
||||
return 4; // 1/10000 error
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
|
||||
public List<String> getAlternateAlleleList() {
|
||||
throw new StingException("Hapmap is unable to provide an alternate allele list; the reference is unknown");
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
|
||||
public List<String> getAlleleList() {
|
||||
List<String> ret = new ArrayList<String>();
|
||||
for (char c : feature.toCharArray())
|
||||
ret.add(String.valueOf(c));
|
||||
return ret;
|
||||
}
|
||||
|
||||
public String getAttribute(final String key) {
|
||||
return attributes.get(key);
|
||||
}
|
||||
|
||||
public boolean containsAttribute(final String key) {
|
||||
return attributes.containsKey(key);
|
||||
}
|
||||
|
||||
public HashMap<String,String> getAttributes() {
|
||||
return attributes;
|
||||
}
|
||||
|
||||
public String getAttributeString() {
|
||||
String[] strings = new String[attributes.size()];
|
||||
int i = 0;
|
||||
for ( Map.Entry<String, String> pair : attributes.entrySet() ) {
|
||||
strings[i++] = pair.getKey() + " " + pair.getValue();
|
||||
//strings[i++] = "(" + pair.getKey() + ") (" + pair.getValue() + ")";
|
||||
}
|
||||
return Utils.join(" ; ", strings);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// formatting
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public String toString() {
|
||||
return String.format("%s\t%s\t%s\t%d\t%d\t%f\t%s\t%s\t%s", contig, source, feature, start, stop+1, score, strand, frame, getAttributeString());
|
||||
}
|
||||
|
||||
public String repl() {
|
||||
return this.toString();
|
||||
}
|
||||
|
||||
public String toSimpleString() {
|
||||
return String.format("chip-genotype: %s", feature);
|
||||
}
|
||||
|
||||
|
||||
private static Pattern GFF_DELIM = Pattern.compile("\\s+;\\s*");
|
||||
private static Pattern GFF_ATTRIBUTE_PATTERN = Pattern.compile("([A-Za-z][A-Za-z0-9_]*)((?:\\s+\\S+)+)");
|
||||
final private HashMap<String, String> parseAttributes( final String attributeLine ) {
|
||||
HashMap<String, String> attributes = new HashMap<String, String>();
|
||||
Scanner scanner = new Scanner(attributeLine);
|
||||
scanner.useDelimiter(GFF_DELIM);
|
||||
while ( scanner.hasNext(GFF_ATTRIBUTE_PATTERN) ) {
|
||||
MatchResult result = scanner.match();
|
||||
String key = result.group(1);
|
||||
String value = result.group(2).replace("\"", "").trim();
|
||||
//System.out.printf(" Adding %s / %s (total %d)%n", key, value, result.groupCount());
|
||||
attributes.put(key, value);
|
||||
String n = scanner.next();
|
||||
//System.out.printf(" next is %s%n", n);
|
||||
}
|
||||
return attributes;
|
||||
}
|
||||
|
||||
public boolean parseLine(final Object header, final String[] parts) {
|
||||
//System.out.printf("Parsing GFFLine %s%n", Utils.join(" ", parts));
|
||||
|
||||
final String contig = parts[0];
|
||||
final String source = parts[1];
|
||||
final String feature = parts[2];
|
||||
final long start = Long.parseLong(parts[3]);
|
||||
final long stop = Long.parseLong(parts[4])-1;
|
||||
|
||||
double score = Double.NaN;
|
||||
if ( ! parts[5].equals(".") )
|
||||
score = Double.parseDouble(parts[5]);
|
||||
|
||||
final String strand = parts[6];
|
||||
final String frame = parts[7];
|
||||
final String attributeParts = Utils.join(" ", parts, 8, parts.length);
|
||||
HashMap<String, String> attributes = parseAttributes(attributeParts);
|
||||
setValues(contig, source, feature, start, stop, score, strand, frame, attributes);
|
||||
return true;
|
||||
}
|
||||
|
||||
public String getRefBasesFWD() { return null; }
|
||||
public char getRefSnpFWD() throws IllegalStateException { return 0; }
|
||||
public String getAltBasesFWD() { return null; }
|
||||
public char getAltSnpFWD() throws IllegalStateException { return 0; }
|
||||
public boolean isReference() { return ! isSNP(); }
|
||||
|
||||
/**
|
||||
* get the frequency of this variant
|
||||
*
|
||||
* @return VariantFrequency with the stored frequency
|
||||
*/
|
||||
|
||||
public double getNonRefAlleleFrequency() {
|
||||
return this.getMAF();
|
||||
}
|
||||
|
||||
public boolean isSNP() { return false; }
|
||||
public boolean isInsertion() { return false; }
|
||||
public boolean isDeletion() { return false; }
|
||||
public boolean isIndel() { return false; }
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||
* of
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
|
||||
public char getAlternativeBaseForSNP() {
|
||||
return this.getAltSnpFWD();
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
|
||||
public char getReferenceForSNP() {
|
||||
return this.getRefSnpFWD();
|
||||
}
|
||||
|
||||
public double getMAF() { return 0; }
|
||||
public double getHeterozygosity() { return 0; }
|
||||
public boolean isGenotype() { return true; }
|
||||
public double getVariationConfidence() { return score; }
|
||||
public double getConsensusConfidence() { return score; }
|
||||
public List<String> getGenotype() throws IllegalStateException {
|
||||
//System.out.printf("feature = %s%n", feature);
|
||||
return Arrays.asList(feature);
|
||||
}
|
||||
|
||||
public int getPloidy() throws IllegalStateException { return 2; }
|
||||
public boolean isBiallelic() { return true; }
|
||||
public int length() { return 1; }
|
||||
|
||||
|
||||
/**
|
||||
* do we have the specified genotype? not all backedByGenotypes
|
||||
* have all the genotype data.
|
||||
*
|
||||
* @param x the genotype
|
||||
*
|
||||
* @return true if available, false otherwise
|
||||
*/
|
||||
public boolean hasGenotype(DiploidGenotype x) {
|
||||
if (!x.toString().equals(this.getAltBasesFWD())) return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,174 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class SimpleIndelROD extends TabularROD {
|
||||
|
||||
private boolean KGENOMES_FORMAT = false, checkedFormat = false;
|
||||
|
||||
public SimpleIndelROD(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
long pos = Long.parseLong(this.get("1"));
|
||||
return GenomeLocParser.createGenomeLoc(this.get("0"), pos, (isDeletion() ? pos+length() : pos+1));
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference base(s) at this position
|
||||
*
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
public String getReference() {
|
||||
return String.valueOf(getRef());
|
||||
}
|
||||
|
||||
public List<String> getFWDAlleles() {
|
||||
if ( is1KGFormat() )
|
||||
return Arrays.asList(this.get("4"));
|
||||
|
||||
String str = this.get("3");
|
||||
int pos = str.indexOf(":");
|
||||
if ( pos == -1 ) {
|
||||
return Arrays.asList(str.substring(1));
|
||||
}
|
||||
return Arrays.asList(str.substring(1, pos));
|
||||
}
|
||||
|
||||
public String getFWDRefBases() { return ""; }
|
||||
public String getAltBasesFWD() { return getFWDAlleles().get(0); }
|
||||
public String getRefBasesFWD() { return ""; }
|
||||
public char getRefSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
|
||||
public char getAltSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
|
||||
public char getRef() { return 'N'; }
|
||||
public List<String> getGenotype() { return getFWDAlleles(); }
|
||||
public boolean isGenotype() { return false; }
|
||||
public boolean isPointGenotype() { return false; }
|
||||
public boolean isIndelGenotype() { return true; }
|
||||
|
||||
/**
|
||||
* get the frequency of this variant
|
||||
*
|
||||
* @return VariantFrequency with the stored frequency
|
||||
*/
|
||||
public double getNonRefAlleleFrequency() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
public boolean isSNP() { return false; }
|
||||
public boolean isReference() { return false; }
|
||||
|
||||
public boolean isInsertion() {
|
||||
if ( is1KGFormat() )
|
||||
return this.get("3").equals("I");
|
||||
return this.get("3").charAt(0) == '+';
|
||||
}
|
||||
public boolean isDeletion() {
|
||||
if ( is1KGFormat() )
|
||||
return this.get("3").equals("D");
|
||||
return this.get("3").charAt(0) == '-';
|
||||
}
|
||||
public boolean isIndel() { return true; }
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
||||
* of
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
public char getAlternativeBaseForSNP() {
|
||||
return getAltSnpFWD();
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
public char getReferenceForSNP() {
|
||||
return getRefSnpFWD();
|
||||
}
|
||||
|
||||
public double getVariantConfidence() { return 0.0; }
|
||||
public double getVariationConfidence() { return 0.0; }
|
||||
public double getConsensusConfidence() { return 0.0; }
|
||||
public boolean isBiallelic() { return true; }
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
* @return the log based error estimate
|
||||
*/
|
||||
public double getNegLog10PError() {
|
||||
return getVariationConfidence();
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public List<String> getAlternateAlleleList() {
|
||||
List<String> ret = getAlleleList();
|
||||
for (String val : ret) {
|
||||
if (val.equals(this.getReference())) ret.remove(val);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public List<String> getAlleleList() {
|
||||
return this.getFWDAlleles();
|
||||
}
|
||||
|
||||
public boolean isHom() { return false; }
|
||||
public boolean isHet() { return false; }
|
||||
public double getHeterozygosity() { return 0.0; }
|
||||
public double getMAF() { return 0.0; }
|
||||
public int getPloidy() { return 2; }
|
||||
public int length() {
|
||||
if ( is1KGFormat() )
|
||||
return Integer.parseInt(this.get("2"));
|
||||
return getFWDAlleles().get(0).length();
|
||||
}
|
||||
|
||||
public boolean allowIncompleteRecords() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public String getSamplesString() {
|
||||
return (is1KGFormat() && this.get("5") != null ? this.get("5") : "");
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(getLocation().getContig() + "\t" + getLocation().getStart() + "\t");
|
||||
sb.append(length() + "\t" + (isInsertion() ? "I" : "D") + "\t" + getFWDAlleles().get(0));
|
||||
String samples = getSamplesString();
|
||||
if ( samples.length() > 0 )
|
||||
sb.append("\t" + samples);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private boolean is1KGFormat() {
|
||||
if ( !checkedFormat ) {
|
||||
checkedFormat = true;
|
||||
KGENOMES_FORMAT = this.get("3").equals("D") || this.get("3").equals("I");
|
||||
}
|
||||
return KGENOMES_FORMAT;
|
||||
}
|
||||
}
|
||||
|
|
@ -54,15 +54,11 @@ public class RODTrackBuilder implements RMDTrackBuilder {
|
|||
|
||||
static {
|
||||
// All known ROD types
|
||||
Types.put("GFF", RodGenotypeChipAsGFF.class);
|
||||
Types.put("SAMPileup", rodSAMPileup.class);
|
||||
Types.put("GELI", rodGELI.class);
|
||||
Types.put("RefSeq", rodRefSeq.class);
|
||||
Types.put("Table", TabularROD.class);
|
||||
Types.put("AnnotatorInputTable", AnnotatorROD.class);
|
||||
Types.put("CleanedOutSNP", CleanedOutSNPROD.class);
|
||||
Types.put("SimpleIndel", SimpleIndelROD.class);
|
||||
Types.put("PointIndel", PointIndelROD.class);
|
||||
Types.put("HapMap", HapMapROD.class);
|
||||
Types.put("Intervals", IntervalRod.class);
|
||||
Types.put("GLF", RodGLF.class);
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import java.util.Set;
|
|||
public class GenotypeWriterFactory {
|
||||
/** available genotype writers */
|
||||
public enum GENOTYPE_FORMAT {
|
||||
GELI, GLF, GFF, TABULAR, GELI_BINARY, VCF
|
||||
GELI, GLF, GELI_BINARY, VCF
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue