Added -name argument to AnalyzeAnnotations that allows one to specify the name of the annotation to be used on the plots. Instead of seeing AB and DP, one can add -name AB,AlleleBalance -name DP,Depth
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2742 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
62a80f2b6f
commit
c6cc844e55
|
|
@ -24,7 +24,7 @@ xmax = max(d$value)
|
|||
# Plot TiTv ratio as a function of the annotation
|
||||
#
|
||||
|
||||
outfile = paste(input, ".TiTv.", annotationName, ".pdf", sep="")
|
||||
outfile = paste(input, ".TiTv.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
|
||||
|
|
@ -51,7 +51,7 @@ dev.off()
|
|||
# Plot TiTv ratio as a function of the annotation, log scale on the x-axis
|
||||
#
|
||||
|
||||
outfile = paste(input, ".TiTv_log.", annotationName, ".pdf", sep="")
|
||||
outfile = paste(input, ".TiTv_log.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
|
||||
|
|
@ -75,7 +75,7 @@ dev.off()
|
|||
|
||||
ymin = min(all$dbsnp)
|
||||
ymax = max(all$dbsnp)
|
||||
outfile = paste(input, ".truthRate.", annotationName, ".pdf", sep="")
|
||||
outfile = paste(input, ".truthRate.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
yLabel = "DBsnp Rate"
|
||||
|
|
@ -101,7 +101,7 @@ dev.off()
|
|||
# Plot dbsnp and true positive rate as a function of the annotation, log scale on the x-axis
|
||||
#
|
||||
|
||||
outfile = paste(input, ".truthRate_log.", annotationName, ".pdf", sep="")
|
||||
outfile = paste(input, ".truthRate_log.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
yLabel = "DBsnp Rate"
|
||||
|
|
@ -123,7 +123,7 @@ dev.off()
|
|||
# Plot histogram of the annotation's value
|
||||
#
|
||||
|
||||
outfile = paste(input, "annotationHistogram.", annotationName, ".pdf", sep="")
|
||||
outfile = paste(input, ".Histogram.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(all$value,all$numVariants,xlab=annotationName,ylab="Num variants in bin",type="h",xaxt="n",ps=14);
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import net.sf.samtools.SAMRecord;
|
|||
* User: rpoplin
|
||||
* Date: Jan 29, 2010
|
||||
*
|
||||
* The number of previous N bases (along the direction of the read) that contain G's and C's.
|
||||
* The number of previous N bases (along the direction of the read) that contain G's and C's. The goal is to correct for dye slippage.
|
||||
* Only valid for Illumina reads. Otherwise return -1.
|
||||
*/
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
|
|
@ -61,12 +63,14 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
|
|||
private int MAX_VARIANTS_PER_BIN = 20000;
|
||||
@Argument(fullName = "sampleName", shortName = "sampleName", doc = "If supplied, only process variants found in this sample.", required = false)
|
||||
private String SAMPLE_NAME = null;
|
||||
@Argument(fullName = "name", shortName = "name", doc = "Labels for the annotations to make plots look nicer. Each name is a separate -name argument. For example, -name DP,Depth -name AB,AlleleBalance", required = false)
|
||||
private String[] ANNOTATION_NAMES = null;
|
||||
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
/////////////////////////////
|
||||
private final AnnotationDataManager dataManager = new AnnotationDataManager();
|
||||
private AnnotationDataManager dataManager;
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -76,6 +80,17 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
|
|||
|
||||
public void initialize() {
|
||||
|
||||
// Create a HashMap associating the names of the annotations to full Strings that can be used as labels on plots
|
||||
HashMap<String,String> nameMap = null;
|
||||
if( ANNOTATION_NAMES != null ) {
|
||||
nameMap = new HashMap<String,String>();
|
||||
for( String nameLine : ANNOTATION_NAMES ) {
|
||||
String[] vals = nameLine.split(",");
|
||||
nameMap.put(vals[0],vals[1]);
|
||||
}
|
||||
}
|
||||
dataManager = new AnnotationDataManager( nameMap );
|
||||
|
||||
if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; }
|
||||
}
|
||||
|
||||
|
|
@ -89,13 +104,14 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
|
|||
|
||||
if( tracker != null ) {
|
||||
|
||||
// First find out if this variant is in the truth set
|
||||
// First find out if this variant is in the truth sets
|
||||
boolean isInTruthSet = false;
|
||||
boolean isTrueVariant = false;
|
||||
for( ReferenceOrderedDatum rod : tracker.getAllRods() ) {
|
||||
if( rod != null && rod.getName().toUpperCase().startsWith("TRUTH") ) {
|
||||
isInTruthSet = true;
|
||||
|
||||
// Next see if the truth sets say this site is variant or reference
|
||||
if( rod instanceof RodVCF ) {
|
||||
if( ((RodVCF) rod).isSNP() ) {
|
||||
isTrueVariant = true;
|
||||
|
|
|
|||
|
|
@ -43,9 +43,11 @@ import java.io.FileNotFoundException;
|
|||
public class AnnotationDataManager {
|
||||
|
||||
public final HashMap<String, TreeSet<AnnotationDatum>> data;
|
||||
public final HashMap<String,String> nameMap;
|
||||
|
||||
public AnnotationDataManager() {
|
||||
public AnnotationDataManager( HashMap<String,String> _nameMap ) {
|
||||
data = new HashMap<String, TreeSet<AnnotationDatum>>();
|
||||
nameMap = _nameMap;
|
||||
}
|
||||
|
||||
public void addAnnotations( final RodVCF variant, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) {
|
||||
|
|
@ -136,10 +138,15 @@ public class AnnotationDataManager {
|
|||
|
||||
// Close the PrintStream
|
||||
output.close();
|
||||
|
||||
String annotationName = nameMap.get(annotationKey);
|
||||
if( annotationName == null ) { // name is not in the map so use the key
|
||||
annotationName = annotationKey;
|
||||
}
|
||||
|
||||
// Print out the command line to make it clear to the user what is being executed and how one might modify it
|
||||
final String rScriptCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTruthMetrics.R" + " " +
|
||||
OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationKey + " " + MIN_VARIANTS_PER_BIN;
|
||||
OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationName + " " + MIN_VARIANTS_PER_BIN;
|
||||
System.out.println( rScriptCommandLine );
|
||||
|
||||
// Execute the RScript command to plot the table of TiTv values
|
||||
|
|
|
|||
Loading…
Reference in New Issue