From 6bbcc47b5d12ab3d721fb2652756429a9d8f1ea0 Mon Sep 17 00:00:00 2001 From: aaron Date: Wed, 5 May 2010 19:07:13 +0000 Subject: [PATCH] removing some out-of-date RODs and some unused genotype writer formats git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3304 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/CleanedOutSNPROD.java | 24 -- .../sting/gatk/refdata/PointIndelROD.java | 15 - .../gatk/refdata/RodGenotypeChipAsGFF.java | 275 ------------------ .../sting/gatk/refdata/SimpleIndelROD.java | 174 ----------- .../tracks/builders/RODTrackBuilder.java | 4 - .../utils/genotype/GenotypeWriterFactory.java | 2 +- 6 files changed, 1 insertion(+), 493 deletions(-) delete mode 100755 java/src/org/broadinstitute/sting/gatk/refdata/CleanedOutSNPROD.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/refdata/PointIndelROD.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/CleanedOutSNPROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/CleanedOutSNPROD.java deleted file mode 100755 index 4b66e81e8..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/CleanedOutSNPROD.java +++ /dev/null @@ -1,24 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; - - -public class CleanedOutSNPROD extends TabularROD { - - private static final String REAL_SNP_STRING = "SAME_SNP"; - private static final String FALSE_SNP_STRING = "NOT_SNP"; - - public CleanedOutSNPROD(String name) { - super(name); - } - - public GenomeLoc getLocation() { - return GenomeLocParser.parseGenomeLoc(this.get("0")); - } - - public boolean isRealSNP() { - String s = this.get("1"); - return s != null && s.equals(REAL_SNP_STRING); - } - } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/PointIndelROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/PointIndelROD.java deleted file mode 100755 index c214b0c35..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/PointIndelROD.java +++ /dev/null @@ -1,15 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; - -public class PointIndelROD extends SimpleIndelROD { - - public PointIndelROD(String name) { - super(name); - } - - public GenomeLoc getLocation() { - return GenomeLocParser.createGenomeLoc(this.get("0"), Long.parseLong(this.get("1"))); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java deleted file mode 100644 index ebe82a1d0..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java +++ /dev/null @@ -1,275 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.genotype.DiploidGenotype; - -import java.util.*; -import java.util.regex.MatchResult; -import java.util.regex.Pattern; - -/** - * Class for representing arbitrary reference ordered data sets - * - * User: mdepristo - * Date: Feb 27, 2009 - * Time: 10:47:14 AM - * To change this template use File | Settings | File Templates. - */ -public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum { - private String contig, source, feature, strand, frame; - private long start, stop; - private double score; - private HashMap attributes; - - // ---------------------------------------------------------------------- - // - // Constructors - // - // ---------------------------------------------------------------------- - public RodGenotypeChipAsGFF(final String name) { - super(name); - } - - public void setValues(final String contig, final String source, final String feature, - final long start, final long stop, final double score, - final String strand, final String frame, HashMap attributes) { - this.contig = contig; - this.source = source; - this.feature = feature; - this.start = start; - this.stop= stop; - this.score = score; - this.strand = strand; - this.frame = frame; - this.attributes = attributes; - } - - // ---------------------------------------------------------------------- - // - // Accessors - // - // ---------------------------------------------------------------------- - public String getSource() { - return source; - } - - public String getFeature() { - return feature; - } - - public String getStrand() { - return strand; - } - - public String getFrame() { - return frame; - } - - public double getScore() { - return score; - } - - public GenomeLoc getLocation() { - return GenomeLocParser.parseGenomeLoc(contig, start, stop); - } - - /** - * get the reference base(s) at this position - * - * @return the reference base or bases, as a string - */ - - public String getReference() { - throw new IllegalStateException("Chip data is unable to determine the reference"); - } - - /** - * get the -1 * (log 10 of the error value) - * - * @return the log based error estimate - */ - - public double getNegLog10PError() { - return 4; // 1/10000 error - } - - /** - * gets the alternate alleles. This method should return all the alleles present at the location, - * NOT including the reference base. This is returned as a string list with no guarantee ordering - * of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest - * frequency). - * - * @return an alternate allele list - */ - - public List getAlternateAlleleList() { - throw new StingException("Hapmap is unable to provide an alternate allele list; the reference is unknown"); - } - - /** - * gets the alleles. This method should return all the alleles present at the location, - * including the reference base. The first allele should always be the reference allele, followed - * by an unordered list of alternate alleles. - * - * @return an alternate allele list - */ - - public List getAlleleList() { - List ret = new ArrayList(); - for (char c : feature.toCharArray()) - ret.add(String.valueOf(c)); - return ret; - } - - public String getAttribute(final String key) { - return attributes.get(key); - } - - public boolean containsAttribute(final String key) { - return attributes.containsKey(key); - } - - public HashMap getAttributes() { - return attributes; - } - - public String getAttributeString() { - String[] strings = new String[attributes.size()]; - int i = 0; - for ( Map.Entry pair : attributes.entrySet() ) { - strings[i++] = pair.getKey() + " " + pair.getValue(); - //strings[i++] = "(" + pair.getKey() + ") (" + pair.getValue() + ")"; - } - return Utils.join(" ; ", strings); - } - - // ---------------------------------------------------------------------- - // - // formatting - // - // ---------------------------------------------------------------------- - public String toString() { - return String.format("%s\t%s\t%s\t%d\t%d\t%f\t%s\t%s\t%s", contig, source, feature, start, stop+1, score, strand, frame, getAttributeString()); - } - - public String repl() { - return this.toString(); - } - - public String toSimpleString() { - return String.format("chip-genotype: %s", feature); - } - - - private static Pattern GFF_DELIM = Pattern.compile("\\s+;\\s*"); - private static Pattern GFF_ATTRIBUTE_PATTERN = Pattern.compile("([A-Za-z][A-Za-z0-9_]*)((?:\\s+\\S+)+)"); - final private HashMap parseAttributes( final String attributeLine ) { - HashMap attributes = new HashMap(); - Scanner scanner = new Scanner(attributeLine); - scanner.useDelimiter(GFF_DELIM); - while ( scanner.hasNext(GFF_ATTRIBUTE_PATTERN) ) { - MatchResult result = scanner.match(); - String key = result.group(1); - String value = result.group(2).replace("\"", "").trim(); - //System.out.printf(" Adding %s / %s (total %d)%n", key, value, result.groupCount()); - attributes.put(key, value); - String n = scanner.next(); - //System.out.printf(" next is %s%n", n); - } - return attributes; - } - - public boolean parseLine(final Object header, final String[] parts) { - //System.out.printf("Parsing GFFLine %s%n", Utils.join(" ", parts)); - - final String contig = parts[0]; - final String source = parts[1]; - final String feature = parts[2]; - final long start = Long.parseLong(parts[3]); - final long stop = Long.parseLong(parts[4])-1; - - double score = Double.NaN; - if ( ! parts[5].equals(".") ) - score = Double.parseDouble(parts[5]); - - final String strand = parts[6]; - final String frame = parts[7]; - final String attributeParts = Utils.join(" ", parts, 8, parts.length); - HashMap attributes = parseAttributes(attributeParts); - setValues(contig, source, feature, start, stop, score, strand, frame, attributes); - return true; - } - - public String getRefBasesFWD() { return null; } - public char getRefSnpFWD() throws IllegalStateException { return 0; } - public String getAltBasesFWD() { return null; } - public char getAltSnpFWD() throws IllegalStateException { return 0; } - public boolean isReference() { return ! isSNP(); } - - /** - * get the frequency of this variant - * - * @return VariantFrequency with the stored frequency - */ - - public double getNonRefAlleleFrequency() { - return this.getMAF(); - } - - public boolean isSNP() { return false; } - public boolean isInsertion() { return false; } - public boolean isDeletion() { return false; } - public boolean isIndel() { return false; } - - /** - * gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case - * of - * - * @return a char, representing the alternate base - */ - - public char getAlternativeBaseForSNP() { - return this.getAltSnpFWD(); - } - - /** - * gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP - * - * @return a char, representing the alternate base - */ - - public char getReferenceForSNP() { - return this.getRefSnpFWD(); - } - - public double getMAF() { return 0; } - public double getHeterozygosity() { return 0; } - public boolean isGenotype() { return true; } - public double getVariationConfidence() { return score; } - public double getConsensusConfidence() { return score; } - public List getGenotype() throws IllegalStateException { - //System.out.printf("feature = %s%n", feature); - return Arrays.asList(feature); - } - - public int getPloidy() throws IllegalStateException { return 2; } - public boolean isBiallelic() { return true; } - public int length() { return 1; } - - - /** - * do we have the specified genotype? not all backedByGenotypes - * have all the genotype data. - * - * @param x the genotype - * - * @return true if available, false otherwise - */ - public boolean hasGenotype(DiploidGenotype x) { - if (!x.toString().equals(this.getAltBasesFWD())) return false; - return true; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java deleted file mode 100755 index c2a4ce8bb..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java +++ /dev/null @@ -1,174 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; - -import java.util.Arrays; -import java.util.List; - -public class SimpleIndelROD extends TabularROD { - - private boolean KGENOMES_FORMAT = false, checkedFormat = false; - - public SimpleIndelROD(String name) { - super(name); - } - - public GenomeLoc getLocation() { - long pos = Long.parseLong(this.get("1")); - return GenomeLocParser.createGenomeLoc(this.get("0"), pos, (isDeletion() ? pos+length() : pos+1)); - } - - /** - * get the reference base(s) at this position - * - * @return the reference base or bases, as a string - */ - public String getReference() { - return String.valueOf(getRef()); - } - - public List getFWDAlleles() { - if ( is1KGFormat() ) - return Arrays.asList(this.get("4")); - - String str = this.get("3"); - int pos = str.indexOf(":"); - if ( pos == -1 ) { - return Arrays.asList(str.substring(1)); - } - return Arrays.asList(str.substring(1, pos)); - } - - public String getFWDRefBases() { return ""; } - public String getAltBasesFWD() { return getFWDAlleles().get(0); } - public String getRefBasesFWD() { return ""; } - public char getRefSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); } - public char getAltSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); } - public char getRef() { return 'N'; } - public List getGenotype() { return getFWDAlleles(); } - public boolean isGenotype() { return false; } - public boolean isPointGenotype() { return false; } - public boolean isIndelGenotype() { return true; } - - /** - * get the frequency of this variant - * - * @return VariantFrequency with the stored frequency - */ - public double getNonRefAlleleFrequency() { - return 0.0; - } - - public boolean isSNP() { return false; } - public boolean isReference() { return false; } - - public boolean isInsertion() { - if ( is1KGFormat() ) - return this.get("3").equals("I"); - return this.get("3").charAt(0) == '+'; - } - public boolean isDeletion() { - if ( is1KGFormat() ) - return this.get("3").equals("D"); - return this.get("3").charAt(0) == '-'; - } - public boolean isIndel() { return true; } - - /** - * gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP - * of - * - * @return a char, representing the alternate base - */ - public char getAlternativeBaseForSNP() { - return getAltSnpFWD(); - } - - /** - * gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP - * - * @return a char, representing the alternate base - */ - public char getReferenceForSNP() { - return getRefSnpFWD(); - } - - public double getVariantConfidence() { return 0.0; } - public double getVariationConfidence() { return 0.0; } - public double getConsensusConfidence() { return 0.0; } - public boolean isBiallelic() { return true; } - - /** - * get the -1 * (log 10 of the error value) - * - * @return the log based error estimate - */ - public double getNegLog10PError() { - return getVariationConfidence(); - } - - /** - * gets the alternate alleles. This method should return all the alleles present at the location, - * NOT including the reference base. This is returned as a string list with no guarantee ordering - * of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest - * frequency). - * - * @return an alternate allele list - */ - public List getAlternateAlleleList() { - List ret = getAlleleList(); - for (String val : ret) { - if (val.equals(this.getReference())) ret.remove(val); - } - return ret; - } - - /** - * gets the alleles. This method should return all the alleles present at the location, - * including the reference base. The first allele should always be the reference allele, followed - * by an unordered list of alternate alleles. - * - * @return an alternate allele list - */ - public List getAlleleList() { - return this.getFWDAlleles(); - } - - public boolean isHom() { return false; } - public boolean isHet() { return false; } - public double getHeterozygosity() { return 0.0; } - public double getMAF() { return 0.0; } - public int getPloidy() { return 2; } - public int length() { - if ( is1KGFormat() ) - return Integer.parseInt(this.get("2")); - return getFWDAlleles().get(0).length(); - } - - public boolean allowIncompleteRecords() { - return true; - } - - public String getSamplesString() { - return (is1KGFormat() && this.get("5") != null ? this.get("5") : ""); - } - - public String toString() { - StringBuffer sb = new StringBuffer(); - sb.append(getLocation().getContig() + "\t" + getLocation().getStart() + "\t"); - sb.append(length() + "\t" + (isInsertion() ? "I" : "D") + "\t" + getFWDAlleles().get(0)); - String samples = getSamplesString(); - if ( samples.length() > 0 ) - sb.append("\t" + samples); - return sb.toString(); - } - - private boolean is1KGFormat() { - if ( !checkedFormat ) { - checkedFormat = true; - KGENOMES_FORMAT = this.get("3").equals("D") || this.get("3").equals("I"); - } - return KGENOMES_FORMAT; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java index 431d49ce7..9e9c898aa 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java @@ -54,15 +54,11 @@ public class RODTrackBuilder implements RMDTrackBuilder { static { // All known ROD types - Types.put("GFF", RodGenotypeChipAsGFF.class); Types.put("SAMPileup", rodSAMPileup.class); Types.put("GELI", rodGELI.class); Types.put("RefSeq", rodRefSeq.class); Types.put("Table", TabularROD.class); Types.put("AnnotatorInputTable", AnnotatorROD.class); - Types.put("CleanedOutSNP", CleanedOutSNPROD.class); - Types.put("SimpleIndel", SimpleIndelROD.class); - Types.put("PointIndel", PointIndelROD.class); Types.put("HapMap", HapMapROD.class); Types.put("Intervals", IntervalRod.class); Types.put("GLF", RodGLF.class); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index ef2f91e08..d20d32a02 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -22,7 +22,7 @@ import java.util.Set; public class GenotypeWriterFactory { /** available genotype writers */ public enum GENOTYPE_FORMAT { - GELI, GLF, GFF, TABULAR, GELI_BINARY, VCF + GELI, GLF, GELI_BINARY, VCF } /**