Initial support for reference ordered data sets

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@7 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-02-27 17:07:57 +00:00
parent bb55947e2e
commit c9f0161e24
4 changed files with 201 additions and 0 deletions

View File

@ -6,6 +6,7 @@ import edu.mit.broad.picard.cmdline.Usage;
import edu.mit.broad.picard.cmdline.Option;
import edu.mit.broad.sting.atk.modules.*;
import edu.mit.broad.sting.utils.ReferenceOrderedData;
import java.io.*;
import java.util.HashMap;
@ -43,6 +44,9 @@ public class AnalysisTK extends CommandLineProgram {
}
protected int doWork() {
ReferenceOrderedData rod = new ReferenceOrderedData(new File("trunk/data/gFFTest.gff"));
rod.testMe();
this.engine = new TraversalEngine(INPUT_FILE, REF_FILE_ARG);
ValidationStringency strictness;

View File

@ -0,0 +1,93 @@
package edu.mit.broad.sting.utils;
import edu.mit.broad.sam.SAMRecord;
import edu.mit.broad.sam.util.CloseableIterator;
import edu.mit.broad.picard.util.TabbedTextFileParser;
import java.io.File;
import java.io.InputStream;
import java.io.FileInputStream;
import java.io.BufferedInputStream;
import java.util.Iterator;
import java.util.HashMap;
/**
* Class for representing arbitrary reference ordered data sets
*
* User: mdepristo
* Date: Feb 27, 2009
* Time: 10:47:14 AM
* To change this template use File | Settings | File Templates.
*/
public class ReferenceOrderedData implements Iterable<ReferenceOrderedDatum> {
private File file = null;
public ReferenceOrderedData(File file) {
this.file = file;
}
// ----------------------------------------------------------------------
//
// Iteration
//
// ----------------------------------------------------------------------
private class RODIterator implements Iterator<ReferenceOrderedDatum> {
TabbedTextFileParser parser = null;
public RODIterator() {
parser = new TabbedTextFileParser(true, file);
}
public boolean hasNext() {
return parser.hasNext();
}
public ReferenceOrderedDatum next() {
String parts[] = parser.next();
return parseGFFLine(parts);
}
public void remove () {
throw new UnsupportedOperationException();
}
}
public RODIterator iterator() {
return new RODIterator();
}
// ----------------------------------------------------------------------
//
// Testing
//
// ----------------------------------------------------------------------
public void testMe() {
for ( ReferenceOrderedDatum rec : this ) {
System.out.println(rec.toString());
}
}
// ----------------------------------------------------------------------
//
// Parsing
//
// ----------------------------------------------------------------------
ReferenceOrderedDatum parseGFFLine(final String[] parts) {
//System.out.printf("Parsing GFFLine %s%n", Utils.join(" ", parts));
final String contig = parts[0];
final String source = parts[1];
final String feature = parts[2];
final long start = Long.parseLong(parts[3]);
final long stop = Long.parseLong(parts[4]);
double score = Double.NaN;
if ( ! parts[5].equals(".") )
score = Double.parseDouble(parts[5]);
final String strand = parts[6];
final String frame = parts[7];
HashMap<String, String> attributes = null;
return new ReferenceOrderedDatum(contig, source, feature, start, stop, score, strand, frame, attributes);
}
}

View File

@ -0,0 +1,93 @@
package edu.mit.broad.sting.utils;
import java.util.HashMap;
/**
* Created by IntelliJ IDEA.
* User: mdepristo
* Date: Feb 27, 2009
* Time: 10:49:47 AM
* To change this template use File | Settings | File Templates.
*/
public class ReferenceOrderedDatum {
private String contig, source, feature, strand, frame;
private long start, stop;
private double score;
private HashMap<String, String> attributes;
// ----------------------------------------------------------------------
//
// Constructors
//
// ----------------------------------------------------------------------
public ReferenceOrderedDatum(final String contig, final String source, final String feature,
final long start, final long stop, final double score,
final String strand, final String frame, HashMap<String, String> attributes) {
this.contig = contig;
this.source = source;
this.feature = feature;
this.start = start;
this.stop= stop;
this.score = score;
this.strand = strand;
this.frame = frame;
this.attributes = attributes;
}
// public ReferenceOrderedDatum(final String contig, final String source, final String feature,
// final long start, final long stop, final double score,
// final String strand, final String frame) {
// ReferenceOrderedDatum(contig, source, feature, start, stop, score, strand, frame, null);
// }
// ----------------------------------------------------------------------
//
// Accessors
//
// ----------------------------------------------------------------------
public String getContig() {
return this.contig;
}
public String getSource() {
return source;
}
public String getFeature() {
return feature;
}
public String getStrand() {
return strand;
}
public String getFrame() {
return frame;
}
public long getStart() {
return start;
}
public long getStop() {
return stop;
}
public double getScore() {
return score;
}
public String getAttribute(final String key) {
return attributes.get(key);
}
// ----------------------------------------------------------------------
//
// formatting
//
// ----------------------------------------------------------------------
public String toString() {
return String.format("%s\t%s\t%s\t%d\t%d\t%f\t%s\t%s", contig, source, feature, start, stop, score, strand, frame);
}
}

View File

@ -52,4 +52,15 @@ public class Utils {
return flags;
}
public static String join(String separator, String[] strings) {
if (strings.length == 0) {
return "";
}
StringBuilder ret = new StringBuilder(strings[0]);
for (int i = 1; i < strings.length; ++i) {
ret.append(separator);
ret.append(strings[i]);
}
return ret.toString();
}
}