diff --git a/java/src/edu/mit/broad/sting/atk/AnalysisTK.java b/java/src/edu/mit/broad/sting/atk/AnalysisTK.java index a449702d9..bb3b9114c 100644 --- a/java/src/edu/mit/broad/sting/atk/AnalysisTK.java +++ b/java/src/edu/mit/broad/sting/atk/AnalysisTK.java @@ -7,9 +7,13 @@ import edu.mit.broad.picard.cmdline.Option; import edu.mit.broad.sting.atk.modules.*; import edu.mit.broad.sting.utils.ReferenceOrderedData; +import edu.mit.broad.sting.utils.rodGFF; +import edu.mit.broad.sting.utils.rodDbSNP; import java.io.*; import java.util.HashMap; +import java.util.List; +import java.util.ArrayList; public class AnalysisTK extends CommandLineProgram { // Usage and parameters @@ -44,10 +48,23 @@ public class AnalysisTK extends CommandLineProgram { } protected int doWork() { - ReferenceOrderedData rod = new ReferenceOrderedData(new File("trunk/data/gFFTest.gff")); - rod.testMe(); + final boolean TEST_ROD = false; + ReferenceOrderedData[] rods = null; - this.engine = new TraversalEngine(INPUT_FILE, REF_FILE_ARG); + if ( TEST_ROD ) { + ReferenceOrderedData gff = new ReferenceOrderedData(new File("trunk/data/gFFTest.gff"), rodGFF.class ); + gff.testMe(); + + //ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File("trunk/data/dbSNP_head.txt"), rodDbSNP.class ); + ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File("/Volumes/Users/mdepristo/broad/ATK/exampleSAMs/dbSNP_chr20.txt"), rodDbSNP.class ); + //dbsnp.testMe(); + rods = new ReferenceOrderedData[] { dbsnp }; // { gff, dbsnp }; + } + else { + rods = new ReferenceOrderedData[] {}; // { gff, dbsnp }; + } + + this.engine = new TraversalEngine(INPUT_FILE, REF_FILE_ARG, rods); ValidationStringency strictness; if ( STRICTNESS_ARG == null ) { diff --git a/java/src/edu/mit/broad/sting/atk/LocusWalker.java b/java/src/edu/mit/broad/sting/atk/LocusWalker.java index e65891ba7..72faf4a4c 100755 --- a/java/src/edu/mit/broad/sting/atk/LocusWalker.java +++ b/java/src/edu/mit/broad/sting/atk/LocusWalker.java @@ -1,6 +1,9 @@ package edu.mit.broad.sting.atk; import edu.mit.broad.sting.atk.LocusIterator; +import edu.mit.broad.sting.utils.ReferenceOrderedDatum; + +import java.util.List; /** * Created by IntelliJ IDEA. @@ -14,10 +17,10 @@ public interface LocusWalker { public String walkerType(); // Do we actually want to operate on the context? - boolean filter(char ref, LocusIterator context); + boolean filter(List rodData, char ref, LocusIterator context); // Map over the edu.mit.broad.sting.atk.LocusContext - MapType map(char ref, LocusIterator context); + MapType map(List rodData, char ref, LocusIterator context); // Given result of map function ReduceType reduceInit(); diff --git a/java/src/edu/mit/broad/sting/atk/TraversalEngine.java b/java/src/edu/mit/broad/sting/atk/TraversalEngine.java index 4c1d60410..1cf2b016e 100755 --- a/java/src/edu/mit/broad/sting/atk/TraversalEngine.java +++ b/java/src/edu/mit/broad/sting/atk/TraversalEngine.java @@ -9,13 +9,21 @@ import edu.mit.broad.picard.filter.FilteringIterator; import edu.mit.broad.picard.reference.ReferenceSequenceFile; import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory; import edu.mit.broad.sting.utils.ReferenceIterator; +import edu.mit.broad.sting.utils.ReferenceOrderedData; +import edu.mit.broad.sting.utils.ReferenceOrderedDatum; import java.io.*; +import java.util.List; +import java.util.Iterator; +import java.util.ArrayList; +import java.util.Arrays; public class TraversalEngine { // Usage and parameters private File readsFile = null; private File refFileName = null; + private List rods = null; + private String regionStr = null; private String traversalType = null; private ValidationStringency strictness = ValidationStringency.STRICT; @@ -42,9 +50,10 @@ public class TraversalEngine { // Setting up the engine // // -------------------------------------------------------------------------------------------------------------- - public TraversalEngine(File reads, File ref) { + public TraversalEngine(File reads, File ref, ReferenceOrderedData[] rods ) { readsFile = reads; refFileName = ref; + this.rods = Arrays.asList(rods); } public void setRegion(final String reg) { regionStr = regionStr; } @@ -100,6 +109,30 @@ public class TraversalEngine { System.exit(1); } + // -------------------------------------------------------------------------------------------------------------- + // + // dealing with reference ordered data + // + // -------------------------------------------------------------------------------------------------------------- + + protected List initializeRODs() { + // set up reference ordered data + List rodIters = new ArrayList(); + for ( ReferenceOrderedData data : rods ) { + rodIters.add(data.iterator()); + } + return rodIters; + } + + protected List getReferenceOrderedDataAtLocus(List rodIters, + final String contig, final int pos) { + List data = new ArrayList(); + for ( ReferenceOrderedData.RODIterator iter : rodIters ) { + data.add(iter.seekForward(contig, pos)); + } + return data; + } + // -------------------------------------------------------------------------------------------------------------- // // traversal functions @@ -159,6 +192,8 @@ public class TraversalEngine { FilteringIterator filterIter = new FilteringIterator(readStream.iterator(), new locusStreamFilterFunc()); CloseableIterator iter = new LocusIterator(filterIter); + List rodIters = initializeRODs(); + T sum = walker.reduceInit(); while ( iter.hasNext() ) { this.nRecords++; @@ -167,13 +202,14 @@ public class TraversalEngine { final LocusIterator locus = iter.next(); final ReferenceIterator refSite = refIter.seekForward(locus.getContig(), locus.getPosition()); final char refBase = refSite.getBaseAsChar(); + final List rodData = getReferenceOrderedDataAtLocus(rodIters, locus.getContig(), locus.getPosition()); if ( DEBUGGING ) System.out.printf(" Reference: %s:%d %c%n", refSite.getCurrentContig().getName(), refSite.getPosition(), refBase); - final boolean keepMeP = walker.filter(refBase, locus); + final boolean keepMeP = walker.filter(rodData, refBase, locus); if ( keepMeP ) { - M x = walker.map(refBase, locus); + M x = walker.map(rodData, refBase, locus); sum = walker.reduce(x, sum); } diff --git a/java/src/edu/mit/broad/sting/atk/modules/EmptyLocusWalker.java b/java/src/edu/mit/broad/sting/atk/modules/EmptyLocusWalker.java index 6f7c24836..aefd722bd 100755 --- a/java/src/edu/mit/broad/sting/atk/modules/EmptyLocusWalker.java +++ b/java/src/edu/mit/broad/sting/atk/modules/EmptyLocusWalker.java @@ -2,8 +2,11 @@ package edu.mit.broad.sting.atk.modules; import edu.mit.broad.sting.atk.LocusWalker; import edu.mit.broad.sting.atk.LocusIterator; +import edu.mit.broad.sting.utils.ReferenceOrderedDatum; import edu.mit.broad.sam.SAMRecord; +import java.util.List; + /** * Created by IntelliJ IDEA. * User: mdepristo @@ -18,12 +21,12 @@ public class EmptyLocusWalker implements LocusWalker { public String walkerType() { return "ByLocus"; } // Do we actually want to operate on the context? - public boolean filter(char ref, LocusIterator context) { + public boolean filter(List rodData, char ref, LocusIterator context) { return true; // We are keeping all the reads } // Map over the edu.mit.broad.sting.atk.LocusContext - public Integer map(char ref, LocusIterator context) { + public Integer map(List rodData, char ref, LocusIterator context) { return 1; } diff --git a/java/src/edu/mit/broad/sting/atk/modules/PileupWalker.java b/java/src/edu/mit/broad/sting/atk/modules/PileupWalker.java index eef13ee27..4be761aec 100644 --- a/java/src/edu/mit/broad/sting/atk/modules/PileupWalker.java +++ b/java/src/edu/mit/broad/sting/atk/modules/PileupWalker.java @@ -2,6 +2,7 @@ package edu.mit.broad.sting.atk.modules; import edu.mit.broad.sting.atk.LocusWalker; import edu.mit.broad.sting.atk.LocusIterator; +import edu.mit.broad.sting.utils.ReferenceOrderedDatum; import edu.mit.broad.sam.SAMRecord; import java.util.List; @@ -20,12 +21,12 @@ public class PileupWalker implements LocusWalker { public String walkerType() { return "ByLocus"; } // Do we actually want to operate on the context? - public boolean filter(char ref, LocusIterator context) { + public boolean filter(List rodData, char ref, LocusIterator context) { return true; // We are keeping all the reads } // Map over the edu.mit.broad.sting.atk.LocusContext - public Integer map(char ref, LocusIterator context) { + public Integer map(List rodData, char ref, LocusIterator context) { //System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size()); //for ( SAMRecord read : context.getReads() ) { // System.out.println(" -> " + read.getReadName()); @@ -49,8 +50,18 @@ public class PileupWalker implements LocusWalker { //System.out.printf(" [%2d] [%s] %s%n", offset, read.getReadString().charAt(offset), read.getReadString()); } - if ( context.getPosition() % 10 == 0 ) - System.out.printf("%s:%d: %s %s %s%n", context.getContig(), context.getPosition(), ref, bases, quals); + String rodString = ""; + for ( ReferenceOrderedDatum datum : rodData ) { + if ( datum != null ) { + rodString += datum.toSimpleString(); + } + } + if ( rodString != "" ) + rodString = "[ROD: " + rodString + "]"; + + if ( context.getPosition() % 1 == 0 ) { + System.out.printf("%s:%d: %s %s %s %s%n", context.getContig(), context.getPosition(), ref, bases, quals, rodString); + } //for ( int offset : context.getOffsets() ) { // System.out.println(" -> " + read.getReadName()); diff --git a/java/src/edu/mit/broad/sting/utils/ReferenceOrderedData.java b/java/src/edu/mit/broad/sting/utils/ReferenceOrderedData.java index 34000289a..eeb7c54b5 100644 --- a/java/src/edu/mit/broad/sting/utils/ReferenceOrderedData.java +++ b/java/src/edu/mit/broad/sting/utils/ReferenceOrderedData.java @@ -1,15 +1,7 @@ package edu.mit.broad.sting.utils; -import edu.mit.broad.sam.SAMRecord; -import edu.mit.broad.sam.util.CloseableIterator; -import edu.mit.broad.picard.util.TabbedTextFileParser; - import java.io.File; -import java.io.InputStream; -import java.io.FileInputStream; -import java.io.BufferedInputStream; import java.util.Iterator; -import java.util.HashMap; /** * Class for representing arbitrary reference ordered data sets @@ -19,40 +11,17 @@ import java.util.HashMap; * Time: 10:47:14 AM * To change this template use File | Settings | File Templates. */ -public class ReferenceOrderedData implements Iterable { +public class ReferenceOrderedData implements Iterable { private File file = null; + private Class type = null; // runtime type information for object construction - public ReferenceOrderedData(File file) { + public ReferenceOrderedData(File file, Class type ) { this.file = file; - } - - // ---------------------------------------------------------------------- - // - // Iteration - // - // ---------------------------------------------------------------------- - private class RODIterator implements Iterator { - TabbedTextFileParser parser = null; - public RODIterator() { - parser = new TabbedTextFileParser(true, file); - } - - public boolean hasNext() { - return parser.hasNext(); - } - - public ReferenceOrderedDatum next() { - String parts[] = parser.next(); - return parseGFFLine(parts); - } - - public void remove () { - throw new UnsupportedOperationException(); - } + this.type = type; } public RODIterator iterator() { - return new RODIterator(); + return new RODIterator(new SimpleRODIterator()); } // ---------------------------------------------------------------------- @@ -66,28 +35,112 @@ public class ReferenceOrderedData implements Iterable { } } + // ---------------------------------------------------------------------- + // + // Iteration + // + // ---------------------------------------------------------------------- + private class SimpleRODIterator implements Iterator { + private WhitespaceTextFileParser parser = null; + + public SimpleRODIterator() { + parser = new WhitespaceTextFileParser(true, file); + } + + public boolean hasNext() { + return parser.hasNext(); + } + + public ROD next() { + String parts[] = parser.next(); + return parseLine(parts); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + } + + public class RODIterator implements Iterator { + private PushbackIterator it; + private ROD prev = null; + + public RODIterator(SimpleRODIterator it) { + this.it = new PushbackIterator(it); + } + + public boolean hasNext() { return it.hasNext(); } + public ROD next() { + prev = it.next(); + return prev; + } + + // + // Seeks forward in the file until we reach (or cross) a record at contig / pos + // If we don't find anything and cross beyond contig / pos, we return null + // Otherwise we return the first object who's start is at pos + // + public ROD seekForward(final String contigName, final int pos) { + ROD result = null; + + //System.out.printf(" *** starting seek to %s %d %s%n", contigName, pos, prev); + while ( hasNext() ) { + ROD current = next(); + //System.out.printf(" -> Seeking to %s %d AT %s %d%n", contigName, pos, current.getContig(), current.getStart()); + int strCmp = contigName.compareTo( prev.getContig() ); + if ( strCmp == 0 ) { + // The contigs are equal + if ( current.getStart() > pos ) { + // There was nothing to find, push back next and return null + it.pushback(current); + break; + } + else if ( pos == current.getStart() ) { + // We found a record at contig / pos, return it + result = current; + break; + } + } + else if ( strCmp < 0 ) { + // We've gone past the desired contig, break + break; + } + } + + /* + if ( result == null ) + ; + //System.out.printf(" --- seek result to %s %d is NULL%n", contigName, pos); + else + System.out.printf(" ### Found %s %d%n", result.getContig(), result.getStart()); + */ + + // we ran out of elements or found something + return result; + } + + public void remove() { + throw new UnsupportedOperationException(); + } + } // ---------------------------------------------------------------------- // // Parsing // // ---------------------------------------------------------------------- - ReferenceOrderedDatum parseGFFLine(final String[] parts) { + ROD parseLine(final String[] parts) { //System.out.printf("Parsing GFFLine %s%n", Utils.join(" ", parts)); - - final String contig = parts[0]; - final String source = parts[1]; - final String feature = parts[2]; - final long start = Long.parseLong(parts[3]); - final long stop = Long.parseLong(parts[4]); - - double score = Double.NaN; - if ( ! parts[5].equals(".") ) - score = Double.parseDouble(parts[5]); - - final String strand = parts[6]; - final String frame = parts[7]; - HashMap attributes = null; - return new ReferenceOrderedDatum(contig, source, feature, start, stop, score, strand, frame, attributes); + try { + ROD obj = type.newInstance(); + obj.parseLine(parts); + return obj; + } catch ( java.lang.InstantiationException e ) { + System.out.println(e); + return null; // wow, unsafe! + } catch ( java.lang.IllegalAccessException e ) { + System.out.println(e); + return null; // wow, unsafe! + } } } diff --git a/java/src/edu/mit/broad/sting/utils/ReferenceOrderedDatum.java b/java/src/edu/mit/broad/sting/utils/ReferenceOrderedDatum.java index 3444d1397..ca1c88aaf 100644 --- a/java/src/edu/mit/broad/sting/utils/ReferenceOrderedDatum.java +++ b/java/src/edu/mit/broad/sting/utils/ReferenceOrderedDatum.java @@ -1,7 +1,5 @@ package edu.mit.broad.sting.utils; -import java.util.HashMap; - /** * Created by IntelliJ IDEA. * User: mdepristo @@ -9,85 +7,15 @@ import java.util.HashMap; * Time: 10:49:47 AM * To change this template use File | Settings | File Templates. */ -public class ReferenceOrderedDatum { - private String contig, source, feature, strand, frame; - private long start, stop; - private double score; - private HashMap attributes; +public abstract class ReferenceOrderedDatum { + public ReferenceOrderedDatum() { } - // ---------------------------------------------------------------------- - // - // Constructors - // - // ---------------------------------------------------------------------- - public ReferenceOrderedDatum(final String contig, final String source, final String feature, - final long start, final long stop, final double score, - final String strand, final String frame, HashMap attributes) { - this.contig = contig; - this.source = source; - this.feature = feature; - this.start = start; - this.stop= stop; - this.score = score; - this.strand = strand; - this.frame = frame; - this.attributes = attributes; - } + public abstract void parseLine(final String[] parts); -// public ReferenceOrderedDatum(final String contig, final String source, final String feature, -// final long start, final long stop, final double score, -// final String strand, final String frame) { -// ReferenceOrderedDatum(contig, source, feature, start, stop, score, strand, frame, null); -// } - - // ---------------------------------------------------------------------- - // - // Accessors - // - // ---------------------------------------------------------------------- - public String getContig() { - return this.contig; - } - - public String getSource() { - return source; - } - - public String getFeature() { - return feature; - } - - public String getStrand() { - return strand; - } - - public String getFrame() { - return frame; - } - - public long getStart() { - return start; - } - - public long getStop() { - return stop; - } - - public double getScore() { - return score; - } - - public String getAttribute(final String key) { - return attributes.get(key); - } - - // ---------------------------------------------------------------------- - // - // formatting - // - // ---------------------------------------------------------------------- - public String toString() { - return String.format("%s\t%s\t%s\t%d\t%d\t%f\t%s\t%s", contig, source, feature, start, stop, score, strand, frame); - } + public abstract String toString(); + public abstract String toSimpleString(); + public abstract String getContig(); + public abstract long getStart(); + public abstract long getStop(); } diff --git a/java/src/edu/mit/broad/sting/utils/WhitespaceTextFileParser.java b/java/src/edu/mit/broad/sting/utils/WhitespaceTextFileParser.java new file mode 100644 index 000000000..7c128b05e --- /dev/null +++ b/java/src/edu/mit/broad/sting/utils/WhitespaceTextFileParser.java @@ -0,0 +1,43 @@ +/* +* The Broad Institute +* SOFTWARE COPYRIGHT NOTICE AGREEMENT +* This software and its documentation are copyright 2009 by the +* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. +* +* This software is supplied without any warranty or guaranteed support whatsoever. Neither +* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. +*/ +package edu.mit.broad.sting.utils; + +import edu.mit.broad.picard.util.BasicTextFileParser; + +import java.io.File; +import java.util.List; +import java.util.Arrays; + +/** + * Parser for tab-delimited files + * + * @author Kathleen Tibbetts + */ +public class WhitespaceTextFileParser extends BasicTextFileParser { + + /** + * Constructor + * + * @param file The file to parse + */ + public WhitespaceTextFileParser(boolean treatGroupedDelimitersAsOne, File... file) { + super(treatGroupedDelimitersAsOne, file); + } + + /** + * Determines whether a given character is a delimiter + * + * @param b the character to evaluate + * @return true if b is a delimiter; otherwise false + */ + protected boolean isDelimiter(byte b) { + return Character.isWhitespace((char)b); + } +} \ No newline at end of file diff --git a/java/src/edu/mit/broad/sting/utils/rodDbSNP.java b/java/src/edu/mit/broad/sting/utils/rodDbSNP.java new file mode 100644 index 000000000..695ec9c9d --- /dev/null +++ b/java/src/edu/mit/broad/sting/utils/rodDbSNP.java @@ -0,0 +1,102 @@ +package edu.mit.broad.sting.utils; + +import edu.mit.broad.sam.SAMRecord; +import edu.mit.broad.sam.util.CloseableIterator; +import edu.mit.broad.picard.util.TabbedTextFileParser; + +import java.io.File; +import java.io.InputStream; +import java.io.FileInputStream; +import java.io.BufferedInputStream; +import java.util.Iterator; +import java.util.HashMap; + +/** + * Example format: + * 585 chr1 433 433 rs56289060 0 + - - -/C genomic insertion unknown 0 0 unknown between 1 + * 585 chr1 491 492 rs55998931 0 + C C C/T genomic single unknown 0 0 unknown exact 1 + * + * User: mdepristo + * Date: Feb 27, 2009 + * Time: 10:47:14 AM + * To change this template use File | Settings | File Templates. + */ +public class rodDbSNP extends ReferenceOrderedDatum { + public String contig; // Reference sequence chromosome or scaffold + public long start, stop; // Start and stop positions in chrom + + public String name; // Reference SNP identifier or Affy SNP name + public String strand; // Which DNA strand contains the observed alleles + public String observed; // The sequences of the observed alleles from rs-fasta files + public char[] observedBases; // The sequences of the observed alleles from rs-fasta files + public String molType; // Sample type from exemplar ss + public String varType; // The class of variant (simple, insertion, deletion, range, etc.) + // Can be 'unknown','single','in-del','het','microsatellite','named','mixed','mnp','insertion','deletion' + public String validationStatus; // The validation status of the SNP + // one of set('unknown','by-cluster','by-frequency','by-submitter','by-2hit-2allele','by-hapmap') + + public double avHet; // The average heterozygosity from all observations + public double avHetSE; // The Standard Error for the average heterozygosity + + public String func; // The functional category of the SNP (coding-synon, coding-nonsynon, intron, etc.) + // set('unknown','coding-synon','intron','cds-reference','near-gene-3','near-gene-5', + // 'nonsense','missense','frameshift','untranslated-3','untranslated-5','splice-3','splice-5') + public String locType; // How the variant affects the reference sequence + // enum('range','exact','between','rangeInsertion','rangeSubstitution','rangeDeletion') + + public int weight; // The quality of the alignment + + // ---------------------------------------------------------------------- + // + // Constructors + // + // ---------------------------------------------------------------------- + public rodDbSNP() {} + + public String getContig() { return this.contig; } + public long getStart() { return start; } + public long getStop() { return stop; } + + // ---------------------------------------------------------------------- + // + // formatting + // + // ---------------------------------------------------------------------- + public String toString() { + return String.format("%s\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d", + contig, start, stop, name, strand, observed, molType, + varType, validationStatus, avHet, avHetSE, func, locType, weight ); + } + + public String toSimpleString() { + return String.format("%s:%s", name, observed); + } + + public void parseLine(final String[] parts) { + //System.out.printf("Parsing GFFLine %s%n", Utils.join(" <=> ", parts)); + contig = parts[1]; + start = Long.parseLong(parts[2]) + 1; // The final is 0 based + stop = Long.parseLong(parts[3]) + 1; // The final is 0 based + name = parts[4]; + strand = parts[5]; + observed = parts[9]; + molType = parts[10]; + varType = parts[11]; + validationStatus = parts[12]; + avHet = Double.parseDouble(parts[13]); + avHetSE = Double.parseDouble(parts[14]); + func = parts[15]; + locType = parts[16]; + weight = Integer.parseInt(parts[17]); + + // Cut up the observed bases string into an array of individual bases + String[] bases = observed.split("/"); + observedBases = new char[bases.length]; + for ( String elt : bases ) { + observedBases[0] = (char)elt.getBytes()[0]; + //System.out.printf(" Bases %s %d %c%n", elt, elt.getBytes()[0], (char)elt.getBytes()[0]); + } + //System.out.printf(" => Observed bases are %s%n", Utils.join(" B ", bases)); + + } +} \ No newline at end of file diff --git a/java/src/edu/mit/broad/sting/utils/rodGFF.java b/java/src/edu/mit/broad/sting/utils/rodGFF.java new file mode 100644 index 000000000..86fdfa4fa --- /dev/null +++ b/java/src/edu/mit/broad/sting/utils/rodGFF.java @@ -0,0 +1,123 @@ +package edu.mit.broad.sting.utils; + +import edu.mit.broad.sam.SAMRecord; +import edu.mit.broad.sam.util.CloseableIterator; +import edu.mit.broad.picard.util.TabbedTextFileParser; + +import java.io.File; +import java.io.InputStream; +import java.io.FileInputStream; +import java.io.BufferedInputStream; +import java.util.Iterator; +import java.util.HashMap; + +/** + * Class for representing arbitrary reference ordered data sets + * + * User: mdepristo + * Date: Feb 27, 2009 + * Time: 10:47:14 AM + * To change this template use File | Settings | File Templates. + */ +public class rodGFF extends ReferenceOrderedDatum { + private String contig, source, feature, strand, frame; + private long start, stop; + private double score; + private HashMap attributes; + + // ---------------------------------------------------------------------- + // + // Constructors + // + // ---------------------------------------------------------------------- + public rodGFF() { + + } + + public void setValues(final String contig, final String source, final String feature, + final long start, final long stop, final double score, + final String strand, final String frame, HashMap attributes) { + this.contig = contig; + this.source = source; + this.feature = feature; + this.start = start; + this.stop= stop; + this.score = score; + this.strand = strand; + this.frame = frame; + this.attributes = attributes; + } + + // ---------------------------------------------------------------------- + // + // Accessors + // + // ---------------------------------------------------------------------- + public String getContig() { + return this.contig; + } + + public String getSource() { + return source; + } + + public String getFeature() { + return feature; + } + + public String getStrand() { + return strand; + } + + public String getFrame() { + return frame; + } + + public long getStart() { + return start; + } + + public long getStop() { + return stop; + } + + public double getScore() { + return score; + } + + public String getAttribute(final String key) { + return attributes.get(key); + } + + // ---------------------------------------------------------------------- + // + // formatting + // + // ---------------------------------------------------------------------- + public String toString() { + return String.format("%s\t%s\t%s\t%d\t%d\t%f\t%s\t%s", contig, source, feature, start, stop, score, strand, frame); + } + + public String toSimpleString() { + return String.format("%s", feature); + } + + public void parseLine(final String[] parts) { + //System.out.printf("Parsing GFFLine %s%n", Utils.join(" ", parts)); + + final String contig = parts[0]; + final String source = parts[1]; + final String feature = parts[2]; + final long start = Long.parseLong(parts[3]); + final long stop = Long.parseLong(parts[4]); + + double score = Double.NaN; + if ( ! parts[5].equals(".") ) + score = Double.parseDouble(parts[5]); + + final String strand = parts[6]; + final String frame = parts[7]; + HashMap attributes = null; + setValues(contig, source, feature, start, stop, score, strand, frame, attributes); + } +} \ No newline at end of file