Moving AnalyzeAnnotations into the archive because it has outlived its usefulness.

This commit is contained in:
Ryan Poplin 2011-07-02 10:39:53 -04:00
parent 43959e6780
commit 5faf40b79d
5 changed files with 0 additions and 684 deletions

View File

@ -1,190 +0,0 @@
#!/bin/env Rscript
args <- commandArgs(TRUE)
verbose = TRUE
input = args[1]
annotationName = args[2]
minBinCutoff = as.numeric(args[3])
medianNumVariants = args[4]
c <- read.table(input, header=T)
all = c[c$numVariants>minBinCutoff & c$category=="all",]
novel = c[c$numVariants>minBinCutoff & c$category=="novel",]
dbsnp = c[c$numVariants>minBinCutoff & c$category=="dbsnp",]
truth = c[c$numVariants>minBinCutoff & c$category=="truth",]
#
# Calculate min, max, medians
#
d = c[c$numVariants>minBinCutoff,]
ymin = min(d$titv)
ymax = max(d$titv)
xmin = min(d$value)
xmax = max(d$value)
m = weighted.mean(all$value,all$numVariants/sum(all$numVariants))
ma = all[all$value > m,]
mb = all[all$value < m,]
m75 = weighted.mean(ma$value,ma$numVariants/sum(ma$numVariants))
m25 = weighted.mean(mb$value,mb$numVariants/sum(mb$numVariants))
if(medianNumVariants == "true") {
vc = cumsum( all$numVariants/sum(all$numVariants) )
m10 = all$value[ max(which(vc<=0.10)) ]
m25 = all$value[ max(which(vc<=0.25)) ]
m = all$value[ max(which(vc<=0.5)) ]
m75 = all$value[ min(which(vc>=0.75)) ]
m90 = all$value[ min(which(vc>=0.90)) ]
}
#
# Plot TiTv ratio as a function of the annotation
#
outfile = paste(input, ".TiTv.pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
axis(1,axTicks(1), format(axTicks(1), scientific=F))
abline(v=m,lty=2,col="red")
abline(v=m75,lty=3)
abline(v=m25,lty=3)
text(m, ymin, "50", col="red", cex=0.6);
text(m75, ymin, "75", col="black", cex=0.6);
text(m25, ymin, "25", col="black", cex=0.6);
if(medianNumVariants == "true") {
abline(v=m90,lty=3)
abline(v=m10,lty=3)
text(m10, ymin, "10", col="black", cex=0.6);
text(m90, ymin, "90", col="black", cex=0.6);
}
points(novel$value,novel$titv,col="green",pch=20)
points(dbsnp$value,dbsnp$titv,col="blue",pch=20)
if( sum(all$truePositive==0) != length(all$truePositive) ) {
points(truth$value,truth$titv,col="magenta",pch=20)
legend("topleft", c("all","novel","dbsnp","truth"),col=c("black","green","blue","magenta"),pch=c(20,20,20,20))
} else {
legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20))
}
dev.off()
#
# Plot TiTv ratio as a function of the annotation, log scale on the x-axis
#
outfile = paste(input, ".TiTv_log.pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
axis(1,axTicks(1), format(axTicks(1), scientific=F))
abline(v=m,lty=2,col="red")
abline(v=m75,lty=3)
abline(v=m25,lty=3)
text(m, ymin, "50", col="red", cex=0.6);
text(m75, ymin, "75", col="black", cex=0.6);
text(m25, ymin, "25", col="black", cex=0.6);
if(medianNumVariants == "true") {
abline(v=m90,lty=3)
abline(v=m10,lty=3)
text(m10, ymin, "10", col="black", cex=0.6);
text(m90, ymin, "90", col="black", cex=0.6);
}
points(novel$value,novel$titv,col="green",pch=20)
points(dbsnp$value,dbsnp$titv,col="blue",pch=20)
if( sum(all$truePositive==0) != length(all$truePositive) ) {
points(truth$value,truth$titv,col="magenta",pch=20)
legend("topleft", c("all","novel","dbsnp","truth"),col=c("black","green","blue","magenta"),pch=c(20,20,20,20))
} else {
legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20))
}
dev.off()
#
# Plot dbsnp and true positive rate as a function of the annotation
#
ymin = min(all$dbsnp)
ymax = max(all$dbsnp)
outfile = paste(input, ".truthRate.pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
yLabel = "DBsnp Rate"
if( sum(all$truePositive==0) != length(all$truePositive) ) {
t = all[all$truePositive>0,]
yLabel = "DBsnp/True Positive Rate"
ymin = min(min(all$dbsnp),min(t$truePositive))
ymax = max(max(all$dbsnp),max(t$truePositive))
}
plot(all$value,all$dbsnp,xlab=annotationName,ylab=yLabel,pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
axis(1,axTicks(1), format(axTicks(1), scientific=F))
abline(v=m,lty=2,col="red")
abline(v=m75,lty=3)
abline(v=m25,lty=3)
text(m, ymin, "50", col="red", cex=0.6);
text(m75, ymin, "75", col="black", cex=0.6);
text(m25, ymin, "25", col="black", cex=0.6);
if(medianNumVariants == "true") {
abline(v=m90,lty=3)
abline(v=m10,lty=3)
text(m10, ymin, "10", col="black", cex=0.6);
text(m90, ymin, "90", col="black", cex=0.6);
}
if( sum(all$truePositive==0) != length(all$truePositive) ) {
points(t$value,t$truePositive,col="magenta",pch=20);
legend("topleft", c("dbsnp","truth"),col=c("black","magenta"),pch=c(20,20))
}
dev.off()
#
# Plot dbsnp and true positive rate as a function of the annotation, log scale on the x-axis
#
outfile = paste(input, ".truthRate_log.pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
yLabel = "DBsnp Rate"
if( sum(all$truePositive==0) != length(all$truePositive) ) {
yLabel = "DBsnp/Truth Rate"
}
plot(all$value,all$dbsnp,xlab=annotationName,log="x",ylab=yLabel,ylim=c(ymin,ymax),pch=20,xaxt="n",ps=14);
axis(1,axTicks(1), format(axTicks(1), scientific=F))
abline(v=m,lty=2,col="red")
abline(v=m75,lty=3)
abline(v=m25,lty=3)
text(m, ymin, "50", col="red", cex=0.6);
text(m75, ymin, "75", col="black", cex=0.6);
text(m25, ymin, "25", col="black", cex=0.6);
if(medianNumVariants == "true") {
abline(v=m90,lty=3)
abline(v=m10,lty=3)
text(m10, ymin, "10", col="black", cex=0.6);
text(m90, ymin, "90", col="black", cex=0.6);
}
if( sum(all$truePositive==0) != length(all$truePositive) ) {
points(t$value,t$truePositive,col="magenta",pch=20);
legend("topleft", c("dbsnp","truth"),col=c("black","magenta"),pch=c(20,20))
}
dev.off()
#
# Plot histogram of the annotation's value
#
outfile = paste(input, ".Histogram.pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(all$value,all$numVariants,xlab=annotationName,ylab="Num variants in bin",type="h",xaxt="n",ps=14,lwd=4);
axis(1,axTicks(1), format(axTicks(1), scientific=F))
dev.off()
#
# Plot histogram of the annotation's value, log scale on x-axis
#
outfile = paste(input, ".Histogram_log.pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(all$value,all$numVariants,xlab=annotationName,log="x",ylab="Num variants in bin",type="h",xaxt="n",ps=14,lwd=4);
axis(1,axTicks(1), format(axTicks(1), scientific=F))
dev.off()

View File

@ -1,154 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.analyzeannotations;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.commandline.Argument;
import java.util.HashMap;
import java.util.Collection;
/**
* Takes variant calls as .vcf files and creates plots of truth metrics as a function of the various annotations found in the INFO field.
*
* @author rpoplin
* @since Jan 15, 2010
* @help.summary Takes variant calls as .vcf files and creates plots of truth metrics as a function of the various annotations found in the INFO field.
*/
public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
/////////////////////////////
// Command Line Arguments
/////////////////////////////
@Argument(fullName = "output_prefix", shortName = "output", doc = "The output path and name to prepend to all plots and intermediate data files", required = false)
private String OUTPUT_PREFIX = "analyzeAnnotations/";
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
private String PATH_TO_RSCRIPT = "Rscript";
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
private String PATH_TO_RESOURCES = "R/";
@Argument(fullName = "min_variants_per_bin", shortName = "minBinSize", doc = "The minimum number of variants in a bin in order to calculate truth metrics.", required = false)
private int MIN_VARIANTS_PER_BIN = 1000;
@Argument(fullName = "max_variants_per_bin", shortName = "maxBinSize", doc = "The maximum number of variants in a bin.", required = false)
private int MAX_VARIANTS_PER_BIN = 20000;
@Argument(fullName = "sampleName", shortName = "sampleName", doc = "If supplied, only process variants found in this sample.", required = false)
private String SAMPLE_NAME = null;
@Argument(fullName = "name", shortName = "name", doc = "Labels for the annotations to make plots look nicer. Each name is a separate -name argument. For example, -name DP,Depth -name AB,AlleleBalance", required = false)
private String[] ANNOTATION_NAMES = null;
@Argument(fullName = "indicate_mean_num_vars", shortName = "meanNumVars", doc = "If supplied, plots will indicate the distribution of number of variants instead of distribution of value of annotation", required = false)
private boolean INDICATE_MEAN_NUM_VARS = false;
/////////////////////////////
// Private Member Variables
/////////////////////////////
private AnnotationDataManager dataManager;
//---------------------------------------------------------------------------------------------------------------
//
// initialize
//
//---------------------------------------------------------------------------------------------------------------
public void initialize() {
// Create a HashMap associating the names of the annotations to full Strings that can be used as labels on plots
HashMap<String,String> nameMap = null;
if( ANNOTATION_NAMES != null ) {
nameMap = new HashMap<String,String>();
for( String nameLine : ANNOTATION_NAMES ) {
String[] vals = nameLine.split(",");
nameMap.put(vals[0],vals[1]);
}
}
dataManager = new AnnotationDataManager( nameMap, INDICATE_MEAN_NUM_VARS );
if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; }
}
//---------------------------------------------------------------------------------------------------------------
//
// map
//
//---------------------------------------------------------------------------------------------------------------
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
if( tracker != null ) {
Collection<VariantContext> VCs = tracker.getAllVariantContexts(ref);
// First find out if this variant is in the truth sets
boolean isInTruthSet = false;
boolean isTrueVariant = false;
for ( VariantContext vc : VCs ) {
if( vc != null && vc.isSNP() && !vc.isFiltered() ) {
if( vc.getSource().toUpperCase().startsWith("TRUTH") ) {
isInTruthSet = true;
if( vc.isBiallelic() && vc.isVariant() ) {
isTrueVariant = true;
}
}
}
}
// Add each annotation in this VCF Record to the dataManager
for ( VariantContext vc : VCs ) {
if( vc != null && vc.isSNP() && vc.isBiallelic() && !vc.isFiltered() ) {
if( !vc.getSource().toUpperCase().startsWith("TRUTH") ) {
if( vc.isVariant() ) {
dataManager.addAnnotations( vc, SAMPLE_NAME, isInTruthSet, isTrueVariant );
}
}
}
}
}
return 1; // This value isn't actually used for anything
}
//---------------------------------------------------------------------------------------------------------------
//
// reduce
//
//---------------------------------------------------------------------------------------------------------------
public Integer reduceInit() {
return 0; // Nothing to do here
}
public Integer reduce( Integer value, Integer sum ) {
return 0; // Nothing to do here
}
public void onTraversalDone( Integer sum ) {
// For each annotation, decide how to cut up the data, output intermediate cumulative p(true) tables, and call RScript to plot the tables
dataManager.plotCumulativeTables(PATH_TO_RSCRIPT, PATH_TO_RESOURCES, OUTPUT_PREFIX, MIN_VARIANTS_PER_BIN, MAX_VARIANTS_PER_BIN);
}
}

View File

@ -1,169 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.analyzeannotations;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.util.*;
import java.io.IOException;
import java.io.PrintStream;
import java.io.FileNotFoundException;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Jan 18, 2010
*/
public class AnnotationDataManager {
private final HashMap<String, TreeSet<AnnotationDatum>> data;
private final HashMap<String,String> nameMap;
private final boolean INDICATE_MEAN_NUM_VARS;
public AnnotationDataManager( HashMap<String,String> _nameMap, boolean _INDICATE_MEAN_NUM_VARS ) {
data = new HashMap<String, TreeSet<AnnotationDatum>>();
nameMap = _nameMap;
INDICATE_MEAN_NUM_VARS = _INDICATE_MEAN_NUM_VARS;
}
public void addAnnotations( final VariantContext vc, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) {
if( sampleName != null ) { // Only process variants that are found in the sample with this sampleName
if( vc.getGenotype(sampleName).isNoCall() ) { // This variant isn't found in this sample so break out
return;
}
} // else, process all samples
// Loop over each annotation in the vcf record
final Map<String,Object> infoField = new HashMap<String, Object>(vc.getAttributes());
infoField.put("QUAL", ((Double)vc.getPhredScaledQual()).toString() ); // add QUAL field to annotations
for( Map.Entry<String, Object> annotation : infoField.entrySet() ) {
float value;
try {
value = Float.parseFloat( annotation.getValue().toString() );
} catch( NumberFormatException e ) {
continue; // Skip over annotations that aren't floats, like "DB"
}
TreeSet<AnnotationDatum> treeSet = data.get( annotation.getKey() );
if( treeSet == null ) { // This annotation hasn't been seen before
treeSet = new TreeSet<AnnotationDatum>( new AnnotationDatum() ); // AnnotationDatum is a Comparator that orders variants by the value of the Annotation
data.put( annotation.getKey(), treeSet );
}
AnnotationDatum datum = new AnnotationDatum( value );
if( treeSet.contains(datum) ) { // contains() uses AnnotationDatum's equals function, so it only checks if the value field is already present
datum = treeSet.tailSet(datum).first();
} else {
treeSet.add(datum);
}
final boolean isNovelVariant = !vc.hasID() || !vc.getID().contains("rs");
// Decide if the variant is a transition or transversion
if( VariantContextUtils.getSNPSubstitutionType(vc).compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0 ) {
datum.incrementTi( isNovelVariant, isInTruthSet, isTrueVariant );
} else {
datum.incrementTv( isNovelVariant, isInTruthSet, isTrueVariant );
}
}
}
public void plotCumulativeTables( final String PATH_TO_RSCRIPT, final String PATH_TO_RESOURCES, final String OUTPUT_PREFIX,
final int MIN_VARIANTS_PER_BIN, final int MAX_VARIANTS_PER_BIN ) {
final AnnotationDatum thisAnnotationBin = new AnnotationDatum();
System.out.println( "\nFinished reading variants into memory. Executing RScript commands:" );
// For each annotation we've seen
for( final String annotationKey : data.keySet() ) {
String filename = OUTPUT_PREFIX + annotationKey + ".dat";
PrintStream output;
try {
output = new PrintStream(filename); // Create the intermediate data file for this annotation
} catch ( FileNotFoundException e ) {
throw new UserException.CouldNotCreateOutputFile(new File(filename), "Can't create intermediate output annotation data file. Does the output directory exist?", e);
}
// Output a header line
output.println("value\ttitv\tdbsnp\ttruePositive\tnumVariants\tcategory");
// Bin SNPs and calculate truth metrics for each bin
thisAnnotationBin.clearBin();
for( final AnnotationDatum datum : data.get( annotationKey ) ) {
thisAnnotationBin.combine( datum );
if( thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) >= MAX_VARIANTS_PER_BIN ) { // This annotation bin is full
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.FULL_SET ) + "\t" + thisAnnotationBin.calcDBsnpRate() + "\t" + thisAnnotationBin.calcTPrate() +
"\t" + thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) + "\tall");
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.NOVEL_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.NOVEL_SET ) + "\tnovel");
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.DBSNP_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.DBSNP_SET ) + "\tdbsnp");
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.TRUTH_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.TRUTH_SET ) + "\ttruth");
thisAnnotationBin.clearBin();
}
// else, continue accumulating variants because this bin isn't full yet
}
// One final bin that may not have been dumped out
if( thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) != 0 ) {
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.FULL_SET ) + "\t" + thisAnnotationBin.calcDBsnpRate() + "\t" + thisAnnotationBin.calcTPrate() +
"\t" + thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) + "\tall");
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.NOVEL_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.NOVEL_SET ) + "\tnovel");
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.DBSNP_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.DBSNP_SET ) + "\tdbsnp");
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.TRUTH_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.TRUTH_SET ) + "\ttruth");
thisAnnotationBin.clearBin();
}
// Close the PrintStream
output.close();
String annotationName = null;
if( nameMap != null ) {
annotationName = nameMap.get(annotationKey);
}
if( annotationName == null ) { // This annotation is not in the name map so use the key instead
annotationName = annotationKey;
}
// Print out the command line to make it clear to the user what is being executed and how one might modify it
final String rScriptCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTruthMetrics.R" + " " +
OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationName + " " + MIN_VARIANTS_PER_BIN + " " + INDICATE_MEAN_NUM_VARS;
System.out.println( rScriptCommandLine );
// Execute the RScript command to plot the table of truth values
try {
Runtime.getRuntime().exec( rScriptCommandLine );
} catch ( IOException e ) {
throw new UserException.CannotExecuteRScript( rScriptCommandLine, e );
}
}
}
}

View File

@ -1,170 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.analyzeannotations;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Comparator;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Jan 18, 2010
*/
public class AnnotationDatum implements Comparator<AnnotationDatum> {
public float value;
private final int[] ti;
private final int[] tv;
public static final int FULL_SET = 0;
public static final int NOVEL_SET = 1;
public static final int DBSNP_SET = 2;
public static final int TRUTH_SET = 3;
public static final int TRUE_POSITIVE = 4;
private static final int NUM_SETS = 5;
public AnnotationDatum() {
value = 0.0f;
ti = new int[NUM_SETS];
tv = new int[NUM_SETS];
for( int iii = 0; iii < NUM_SETS; iii++ ) {
ti[iii] = 0;
tv[iii] = 0;
}
}
public AnnotationDatum( final float _value ) {
value = _value;
ti = new int[NUM_SETS];
tv = new int[NUM_SETS];
for( int iii = 0; iii < NUM_SETS; iii++ ) {
ti[iii] = 0;
tv[iii] = 0;
}
}
final public void incrementTi( final boolean isNovelVariant, final boolean isInTruthSet, final boolean isTrueVariant ) {
ti[FULL_SET]++;
if( isNovelVariant ) {
ti[NOVEL_SET]++;
} else { // Is known, in DBsnp
ti[DBSNP_SET]++;
}
if( isInTruthSet ) {
ti[TRUTH_SET]++;
if( isTrueVariant ) {
ti[TRUE_POSITIVE]++;
}
}
}
final public void incrementTv( final boolean isNovelVariant, final boolean isInTruthSet, final boolean isTrueVariant ) {
tv[FULL_SET]++;
if( isNovelVariant ) {
tv[NOVEL_SET]++;
} else { // Is known, in DBsnp
tv[DBSNP_SET]++;
}
if( isInTruthSet ) {
tv[TRUTH_SET]++;
if( isTrueVariant ) {
tv[TRUE_POSITIVE]++;
}
}
}
final public void combine( final AnnotationDatum that ) {
for( int iii = 0; iii < NUM_SETS; iii++ ) {
this.ti[iii] += that.ti[iii];
this.tv[iii] += that.tv[iii];
}
this.value = that.value; // Overwrite this bin's value
}
final public float calcTiTv( final int INDEX ) {
if( ti[INDEX] < 0 || tv[INDEX] < 0 ) {
throw new ReviewedStingException( "Integer overflow detected! There are too many variants piled up in one annotation bin." );
}
if( tv[INDEX] == 0 ) { // Don't divide by zero
return 0.0f;
}
return ((float) ti[INDEX]) / ((float) tv[INDEX]);
}
final public float calcDBsnpRate() {
if( ti[FULL_SET] + tv[FULL_SET] == 0 ) { // Don't divide by zero
return 0.0f;
}
return 100.0f * ((float) ti[DBSNP_SET] + tv[DBSNP_SET]) /
((float) ti[FULL_SET] + tv[FULL_SET]);
}
final public float calcTPrate() {
if( ti[TRUTH_SET] + tv[TRUTH_SET] == 0 ) { // Don't divide by zero
return 0.0f;
}
return 100.0f * ((float) ti[TRUE_POSITIVE] + tv[TRUE_POSITIVE]) /
((float) ti[TRUTH_SET] + tv[TRUTH_SET]);
}
final public int numVariants( final int INDEX ) {
return ti[INDEX] + tv[INDEX];
}
final public void clearBin() {
value = 0.0f;
for( int iii = 0; iii < NUM_SETS; iii++ ) {
ti[iii] = 0;
tv[iii] = 0;
}
}
public int compare( AnnotationDatum a1, AnnotationDatum a2 ) { // Function needed for this to be a Comparator
if( a1.value < a2.value ) { return -1; }
if( a1.value > a2.value ) { return 1; }
return 0;
}
public int equals( AnnotationDatum that ) { // Function needed for this to be sorted correctly in a TreeSet
if( this.value < that.value ) { return -1; }
if( this.value > that.value ) { return 1; }
return 0;
}
}

View File

@ -19,7 +19,6 @@
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java" />
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java" />
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java" />
<file name="public/R/plot_Annotations_BinnedTruthMetrics.R" />
<file name="public/R/plot_Tranches.R" />
<file name="public/R/titvFPEst.R" />
</resources>