Added -name argument to AnalyzeAnnotations that allows one to specify the name of the annotation to be used on the plots. Instead of seeing AB and DP, one can add -name AB,AlleleBalance -name DP,Depth

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2742 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-01-29 20:48:53 +00:00
parent 62a80f2b6f
commit c6cc844e55
4 changed files with 33 additions and 10 deletions

View File

@ -24,7 +24,7 @@ xmax = max(d$value)
# Plot TiTv ratio as a function of the annotation
#
outfile = paste(input, ".TiTv.", annotationName, ".pdf", sep="")
outfile = paste(input, ".TiTv.pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
@ -51,7 +51,7 @@ dev.off()
# Plot TiTv ratio as a function of the annotation, log scale on the x-axis
#
outfile = paste(input, ".TiTv_log.", annotationName, ".pdf", sep="")
outfile = paste(input, ".TiTv_log.pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
@ -75,7 +75,7 @@ dev.off()
ymin = min(all$dbsnp)
ymax = max(all$dbsnp)
outfile = paste(input, ".truthRate.", annotationName, ".pdf", sep="")
outfile = paste(input, ".truthRate.pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
yLabel = "DBsnp Rate"
@ -101,7 +101,7 @@ dev.off()
# Plot dbsnp and true positive rate as a function of the annotation, log scale on the x-axis
#
outfile = paste(input, ".truthRate_log.", annotationName, ".pdf", sep="")
outfile = paste(input, ".truthRate_log.pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
yLabel = "DBsnp Rate"
@ -123,7 +123,7 @@ dev.off()
# Plot histogram of the annotation's value
#
outfile = paste(input, "annotationHistogram.", annotationName, ".pdf", sep="")
outfile = paste(input, ".Histogram.pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(all$value,all$numVariants,xlab=annotationName,ylab="Num variants in bin",type="h",xaxt="n",ps=14);

View File

@ -32,7 +32,7 @@ import net.sf.samtools.SAMRecord;
* User: rpoplin
* Date: Jan 29, 2010
*
* The number of previous N bases (along the direction of the read) that contain G's and C's.
* The number of previous N bases (along the direction of the read) that contain G's and C's. The goal is to correct for dye slippage.
* Only valid for Illumina reads. Otherwise return -1.
*/

View File

@ -11,6 +11,8 @@ import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import java.util.HashMap;
/*
* Copyright (c) 2010 The Broad Institute
*
@ -61,12 +63,14 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
private int MAX_VARIANTS_PER_BIN = 20000;
@Argument(fullName = "sampleName", shortName = "sampleName", doc = "If supplied, only process variants found in this sample.", required = false)
private String SAMPLE_NAME = null;
@Argument(fullName = "name", shortName = "name", doc = "Labels for the annotations to make plots look nicer. Each name is a separate -name argument. For example, -name DP,Depth -name AB,AlleleBalance", required = false)
private String[] ANNOTATION_NAMES = null;
/////////////////////////////
// Private Member Variables
/////////////////////////////
private final AnnotationDataManager dataManager = new AnnotationDataManager();
private AnnotationDataManager dataManager;
//---------------------------------------------------------------------------------------------------------------
//
@ -76,6 +80,17 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
public void initialize() {
// Create a HashMap associating the names of the annotations to full Strings that can be used as labels on plots
HashMap<String,String> nameMap = null;
if( ANNOTATION_NAMES != null ) {
nameMap = new HashMap<String,String>();
for( String nameLine : ANNOTATION_NAMES ) {
String[] vals = nameLine.split(",");
nameMap.put(vals[0],vals[1]);
}
}
dataManager = new AnnotationDataManager( nameMap );
if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; }
}
@ -89,13 +104,14 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
if( tracker != null ) {
// First find out if this variant is in the truth set
// First find out if this variant is in the truth sets
boolean isInTruthSet = false;
boolean isTrueVariant = false;
for( ReferenceOrderedDatum rod : tracker.getAllRods() ) {
if( rod != null && rod.getName().toUpperCase().startsWith("TRUTH") ) {
isInTruthSet = true;
// Next see if the truth sets say this site is variant or reference
if( rod instanceof RodVCF ) {
if( ((RodVCF) rod).isSNP() ) {
isTrueVariant = true;

View File

@ -43,9 +43,11 @@ import java.io.FileNotFoundException;
public class AnnotationDataManager {
public final HashMap<String, TreeSet<AnnotationDatum>> data;
public final HashMap<String,String> nameMap;
public AnnotationDataManager() {
public AnnotationDataManager( HashMap<String,String> _nameMap ) {
data = new HashMap<String, TreeSet<AnnotationDatum>>();
nameMap = _nameMap;
}
public void addAnnotations( final RodVCF variant, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) {
@ -136,10 +138,15 @@ public class AnnotationDataManager {
// Close the PrintStream
output.close();
String annotationName = nameMap.get(annotationKey);
if( annotationName == null ) { // name is not in the map so use the key
annotationName = annotationKey;
}
// Print out the command line to make it clear to the user what is being executed and how one might modify it
final String rScriptCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTruthMetrics.R" + " " +
OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationKey + " " + MIN_VARIANTS_PER_BIN;
OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationName + " " + MIN_VARIANTS_PER_BIN;
System.out.println( rScriptCommandLine );
// Execute the RScript command to plot the table of TiTv values