diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java new file mode 100755 index 000000000..145593c8c --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java @@ -0,0 +1,105 @@ +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.wiggle.WiggleHeader; +import org.broadinstitute.sting.utils.wiggle.WiggleWriter; + +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.HashSet; + +/** + * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl + * + * @Author chartl + * @Date Jul 21, 2010 + */ +public class CreateTiTvTrack extends RodWalker { + @Argument(shortName="size",doc="Size of the window",required = true) + int size = -1; + + private WiggleWriter writer; + + public TiTvWindow reduceInit() { + writer = new WiggleWriter(out); + writer.writeHeader(new WiggleHeader("TiTv",String.format("The Transition Transversion rate across the genome using variants windows of size %d",size))); + return new TiTvWindow(size); + } + + // public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + // return tracker == null || tracker.getVariantContext(ref, "variant", null, context.getLocation(), true).isFiltered(); + //} + + public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + VariantContext vc; + if ( tracker != null ) { + vc = tracker.getVariantContext(ref, "variant", null, context.getLocation(), true); + return vc; + } else { + return null; + } + } + + public TiTvWindow reduce(VariantContext vc, TiTvWindow window) { + if ( vc == null || ! vc.isSNP() || vc.getAlternateAlleles().size() > 1) { + return window; + } + + window.update(vc.isTransition()); + if ( window.getTiTv() != null ) { + writer.writeData(vc.getLocation(),window.getTiTv()); + } + + return window; + } + + public void onTraversalDone() { + + } + +} + +class TiTvWindow { + long nTi; + long nTv; + ArrayList variants; + int maxSize; + + public TiTvWindow(int size) { + maxSize = size; + variants = new ArrayList(size); + } + + public void update(Boolean isTi) { + if ( variants.size() == maxSize ) { + Boolean first = variants.remove(0); + if ( first ) { + nTi--; + } else { + nTv--; + } + } + + variants.add(isTi); + if ( isTi ) { + nTi++; + } else { + nTv++; + } + + //System.out.println(variants.size()); + } + + public Double getTiTv() { + if ( variants.size() == maxSize ) { + return ( nTi + 1.0 )/(nTv + 1.0); + } else { + return null; // window not full + } + } +} diff --git a/java/src/org/broadinstitute/sting/utils/wiggle/WiggleHeader.java b/java/src/org/broadinstitute/sting/utils/wiggle/WiggleHeader.java new file mode 100755 index 000000000..cafd2db72 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/wiggle/WiggleHeader.java @@ -0,0 +1,31 @@ +package org.broadinstitute.sting.utils.wiggle; + +/** + * A class for defining the header values for a wiggle graph file (see UCSC). The optional fields are: + * name, description, visibility, color, altColor, priority, autoscale, alwaysZero, gridDefault, + * maxHeightPixels,graphType,viewLimits,yLineMark,yLineOnOff,windowingFunction,smoothingWindow + * + * For now only support name, description + * + * @Author chartl + * @Date Jul 21, 2010 + */ +public class WiggleHeader { + static String type = "wiggle_0"; + // defines the type of the track (for IGV or UCSC), wiggle_0 is the 'only' type of wiggle + private String name; + // a label for the track + private String description; + // a description of what the track is + + public WiggleHeader(String name, String description) { + this.name = name; + this.description = description; + } + + public String toString() { + return String.format("track type=%s name=\"%s\" description=\"%s\"",type,name,description); + } + +} + diff --git a/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java b/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java new file mode 100755 index 000000000..ed657e0c1 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java @@ -0,0 +1,88 @@ +package org.broadinstitute.sting.utils.wiggle; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; + +import java.io.*; + +/** + * Manages the output of wiggle files. Due to the wiggle spec (each wiggle file must be one chromosome), this writer + * will throw exceptions (or output multiple files?) + * + * todo -- currently no support for fixed step (special case of variable step) + * todo -- currently no support for span, start, or step + * + * @Author chartl + * @Date Jul 21, 2010 + */ +public class WiggleWriter { + + enum StepType { + fixed("fixedStep"),variable("variableStep"); + + String repr; + + StepType(String repr) { + this.repr = repr; + } + + public String toString() { + return repr; + } + } + + private WiggleHeader wHeader = null; + // the header that we need to write prior to the file; and on future files (if multiple outputs ??) + private BufferedWriter wWriter = null; + // the file to which we are writing + private GenomeLoc firstLoc = null; + // the first genome loc the writer saw; need to cache this to compare contigs to preserve spec + private StepType type = StepType.variable; + // the type of step for the wiggle file, todo -- allow this to change + + public WiggleWriter(File outputFile) { + FileOutputStream outputStream; + try { + outputStream = new FileOutputStream(outputFile); + } catch ( FileNotFoundException e ) { + throw new StingException("Unable to create a wiggle file at location: %s"+outputFile.getAbsolutePath(),e); + } + + wWriter = new BufferedWriter(new OutputStreamWriter(outputStream)); + } + + public WiggleWriter(OutputStream out) { + wWriter = new BufferedWriter(new OutputStreamWriter(out)); + } + + public void writeHeader(WiggleHeader header) { + wHeader = header; + write(wWriter,header.toString()); + } + + public void writeData(GenomeLoc loc, Object dataPoint) { + if ( this.firstLoc == null ) { + firstLoc = loc; + write(wWriter,String.format("%n")); + write(wWriter,String.format("%s\tchrom=%s",type.toString(),firstLoc.getContig())); + write(wWriter,String.format("%n")); + write(wWriter,String.format("%d\t%s",loc.getStart(),dataPoint.toString())); + } else if ( loc.compareContigs(firstLoc) == 0 ) { + write(wWriter,String.format("%n")); + write(wWriter,String.format("%d\t%s",loc.getStart(),dataPoint.toString())); + } else { + // todo -- maybe allow this to open a new file for the new chromosome? + throw new StingException("Attempting to write multiple contigs into wiggle file, first contig was "+firstLoc.getContig()+" most recent "+loc.getContig()); + } + } + + private void write(BufferedWriter w, String s) { + try { + w.write(s); + w.flush(); + // flush required so writing to output stream will work + } catch (IOException e) { + throw new StingException(String.format("Error writing the wiggle line %s",s),e); + } + } +}