Support for generating (very basic) wiggle files for use with IGV (see UCSC for wiggle spec); and a walker to take in a variant track and create a transition transversion rate track for the whole genome (due to the wiggle spec, this has to be done by chromosome). It's interesting to see the effect of genes!
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3848 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3596b1529f
commit
365b42390d
|
|
@ -0,0 +1,105 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
import org.broadinstitute.sting.utils.wiggle.WiggleHeader;
|
||||||
|
import org.broadinstitute.sting.utils.wiggle.WiggleWriter;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.EnumSet;
|
||||||
|
import java.util.HashSet;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
||||||
|
*
|
||||||
|
* @Author chartl
|
||||||
|
* @Date Jul 21, 2010
|
||||||
|
*/
|
||||||
|
public class CreateTiTvTrack extends RodWalker<VariantContext,TiTvWindow> {
|
||||||
|
@Argument(shortName="size",doc="Size of the window",required = true)
|
||||||
|
int size = -1;
|
||||||
|
|
||||||
|
private WiggleWriter writer;
|
||||||
|
|
||||||
|
public TiTvWindow reduceInit() {
|
||||||
|
writer = new WiggleWriter(out);
|
||||||
|
writer.writeHeader(new WiggleHeader("TiTv",String.format("The Transition Transversion rate across the genome using variants windows of size %d",size)));
|
||||||
|
return new TiTvWindow(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
// return tracker == null || tracker.getVariantContext(ref, "variant", null, context.getLocation(), true).isFiltered();
|
||||||
|
//}
|
||||||
|
|
||||||
|
public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
VariantContext vc;
|
||||||
|
if ( tracker != null ) {
|
||||||
|
vc = tracker.getVariantContext(ref, "variant", null, context.getLocation(), true);
|
||||||
|
return vc;
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public TiTvWindow reduce(VariantContext vc, TiTvWindow window) {
|
||||||
|
if ( vc == null || ! vc.isSNP() || vc.getAlternateAlleles().size() > 1) {
|
||||||
|
return window;
|
||||||
|
}
|
||||||
|
|
||||||
|
window.update(vc.isTransition());
|
||||||
|
if ( window.getTiTv() != null ) {
|
||||||
|
writer.writeData(vc.getLocation(),window.getTiTv());
|
||||||
|
}
|
||||||
|
|
||||||
|
return window;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class TiTvWindow {
|
||||||
|
long nTi;
|
||||||
|
long nTv;
|
||||||
|
ArrayList<Boolean> variants;
|
||||||
|
int maxSize;
|
||||||
|
|
||||||
|
public TiTvWindow(int size) {
|
||||||
|
maxSize = size;
|
||||||
|
variants = new ArrayList<Boolean>(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void update(Boolean isTi) {
|
||||||
|
if ( variants.size() == maxSize ) {
|
||||||
|
Boolean first = variants.remove(0);
|
||||||
|
if ( first ) {
|
||||||
|
nTi--;
|
||||||
|
} else {
|
||||||
|
nTv--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variants.add(isTi);
|
||||||
|
if ( isTi ) {
|
||||||
|
nTi++;
|
||||||
|
} else {
|
||||||
|
nTv++;
|
||||||
|
}
|
||||||
|
|
||||||
|
//System.out.println(variants.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
public Double getTiTv() {
|
||||||
|
if ( variants.size() == maxSize ) {
|
||||||
|
return ( nTi + 1.0 )/(nTv + 1.0);
|
||||||
|
} else {
|
||||||
|
return null; // window not full
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
package org.broadinstitute.sting.utils.wiggle;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class for defining the header values for a wiggle graph file (see UCSC). The optional fields are:
|
||||||
|
* name, description, visibility, color, altColor, priority, autoscale, alwaysZero, gridDefault,
|
||||||
|
* maxHeightPixels,graphType,viewLimits,yLineMark,yLineOnOff,windowingFunction,smoothingWindow
|
||||||
|
*
|
||||||
|
* For now only support name, description
|
||||||
|
*
|
||||||
|
* @Author chartl
|
||||||
|
* @Date Jul 21, 2010
|
||||||
|
*/
|
||||||
|
public class WiggleHeader {
|
||||||
|
static String type = "wiggle_0";
|
||||||
|
// defines the type of the track (for IGV or UCSC), wiggle_0 is the 'only' type of wiggle
|
||||||
|
private String name;
|
||||||
|
// a label for the track
|
||||||
|
private String description;
|
||||||
|
// a description of what the track is
|
||||||
|
|
||||||
|
public WiggleHeader(String name, String description) {
|
||||||
|
this.name = name;
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("track type=%s name=\"%s\" description=\"%s\"",type,name,description);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,88 @@
|
||||||
|
package org.broadinstitute.sting.utils.wiggle;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Manages the output of wiggle files. Due to the wiggle spec (each wiggle file must be one chromosome), this writer
|
||||||
|
* will throw exceptions (or output multiple files?)
|
||||||
|
*
|
||||||
|
* todo -- currently no support for fixed step (special case of variable step)
|
||||||
|
* todo -- currently no support for span, start, or step
|
||||||
|
*
|
||||||
|
* @Author chartl
|
||||||
|
* @Date Jul 21, 2010
|
||||||
|
*/
|
||||||
|
public class WiggleWriter {
|
||||||
|
|
||||||
|
enum StepType {
|
||||||
|
fixed("fixedStep"),variable("variableStep");
|
||||||
|
|
||||||
|
String repr;
|
||||||
|
|
||||||
|
StepType(String repr) {
|
||||||
|
this.repr = repr;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return repr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private WiggleHeader wHeader = null;
|
||||||
|
// the header that we need to write prior to the file; and on future files (if multiple outputs ??)
|
||||||
|
private BufferedWriter wWriter = null;
|
||||||
|
// the file to which we are writing
|
||||||
|
private GenomeLoc firstLoc = null;
|
||||||
|
// the first genome loc the writer saw; need to cache this to compare contigs to preserve spec
|
||||||
|
private StepType type = StepType.variable;
|
||||||
|
// the type of step for the wiggle file, todo -- allow this to change
|
||||||
|
|
||||||
|
public WiggleWriter(File outputFile) {
|
||||||
|
FileOutputStream outputStream;
|
||||||
|
try {
|
||||||
|
outputStream = new FileOutputStream(outputFile);
|
||||||
|
} catch ( FileNotFoundException e ) {
|
||||||
|
throw new StingException("Unable to create a wiggle file at location: %s"+outputFile.getAbsolutePath(),e);
|
||||||
|
}
|
||||||
|
|
||||||
|
wWriter = new BufferedWriter(new OutputStreamWriter(outputStream));
|
||||||
|
}
|
||||||
|
|
||||||
|
public WiggleWriter(OutputStream out) {
|
||||||
|
wWriter = new BufferedWriter(new OutputStreamWriter(out));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void writeHeader(WiggleHeader header) {
|
||||||
|
wHeader = header;
|
||||||
|
write(wWriter,header.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void writeData(GenomeLoc loc, Object dataPoint) {
|
||||||
|
if ( this.firstLoc == null ) {
|
||||||
|
firstLoc = loc;
|
||||||
|
write(wWriter,String.format("%n"));
|
||||||
|
write(wWriter,String.format("%s\tchrom=%s",type.toString(),firstLoc.getContig()));
|
||||||
|
write(wWriter,String.format("%n"));
|
||||||
|
write(wWriter,String.format("%d\t%s",loc.getStart(),dataPoint.toString()));
|
||||||
|
} else if ( loc.compareContigs(firstLoc) == 0 ) {
|
||||||
|
write(wWriter,String.format("%n"));
|
||||||
|
write(wWriter,String.format("%d\t%s",loc.getStart(),dataPoint.toString()));
|
||||||
|
} else {
|
||||||
|
// todo -- maybe allow this to open a new file for the new chromosome?
|
||||||
|
throw new StingException("Attempting to write multiple contigs into wiggle file, first contig was "+firstLoc.getContig()+" most recent "+loc.getContig());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void write(BufferedWriter w, String s) {
|
||||||
|
try {
|
||||||
|
w.write(s);
|
||||||
|
w.flush();
|
||||||
|
// flush required so writing to output stream will work
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new StingException(String.format("Error writing the wiggle line %s",s),e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue