From 2b51cf18f03f9f52784ea369b709a13a80053084 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Mon, 25 Jan 2010 19:39:18 +0000 Subject: [PATCH] AnalyzeAnnotations now outputs plots with log x-axis in addition to standard x-axis so things like DP and MQ0 are easier to see. AnalyzeAnnotations now skips over all annotations that aren't floating point values. Recalibrator now warns users if PL tags are missing and so therefore it is reverting to illumina. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2681 348d0f76-0448-11de-a6fe-93d51630548a --- R/plot_Annotations_BinnedTiTv.R | 43 ++++++++----------- .../recalibration/RecalDataManager.java | 10 ++++- .../AnnotationDataManager.java | 15 +++++-- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/R/plot_Annotations_BinnedTiTv.R b/R/plot_Annotations_BinnedTiTv.R index ee74e8c55..89c5f1c69 100644 --- a/R/plot_Annotations_BinnedTiTv.R +++ b/R/plot_Annotations_BinnedTiTv.R @@ -15,35 +15,23 @@ c <- read.table(input, header=T) # Plot TiTv ratio as a function of the annotation # -gt = c[c$numVariants>minBinCutoff & c$category==0,] +all = c[c$numVariants>minBinCutoff & c$category==0,] novel = c[c$numVariants>minBinCutoff & c$category==1,] dbsnp = c[c$numVariants>minBinCutoff & c$category==2,] d = c[c$numVariants>minBinCutoff,] -cmin = min(d$titv) -cmax = max(d$titv) +ymin = min(d$titv) +ymax = max(d$titv) +xmin = min(d$value) +xmax = max(d$value) outfile = paste(outputDir, "binnedTiTv.", annotationName, ".pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) -plot(gt$value,gt$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20); -m = weighted.mean(gt$value,gt$numVariants/sum(gt$numVariants)) -ma = gt[gt$value > m,] -mb = gt[gt$value < m,] -m75 = weighted.mean(ma$value,ma$numVariants/sum(ma$numVariants)) -m25 = weighted.mean(mb$value,mb$numVariants/sum(mb$numVariants)) -abline(v=m,lty=2) -abline(v=m75,lty=2) -abline(v=m25,lty=2) -dev.off() - -outfile = paste(outputDir, "binnedTiTv_novel.", annotationName, ".pdf", sep="") -pdf(outfile, height=7, width=7) -par(cex=1.1) -plot(gt$value,gt$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,yim=c(cmin,cmax)); -m = weighted.mean(gt$value,gt$numVariants/sum(gt$numVariants)) -ma = gt[gt$value > m,] -mb = gt[gt$value < m,] +plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax)); +m = weighted.mean(all$value,all$numVariants/sum(all$numVariants)) +ma = all[all$value > m,] +mb = all[all$value < m,] m75 = weighted.mean(ma$value,ma$numVariants/sum(ma$numVariants)) m25 = weighted.mean(mb$value,mb$numVariants/sum(mb$numVariants)) abline(v=m,lty=2) @@ -51,21 +39,24 @@ abline(v=m75,lty=2) abline(v=m25,lty=2) points(novel$value,novel$titv,col="green",pch=20) points(dbsnp$value,dbsnp$titv,col="blue",pch=20) -legend("topleft", c("overall","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20)) +legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20)) dev.off() - -outfile = paste(outputDir, "binnedTiTv_quartiles.", annotationName, ".pdf", sep="") +outfile = paste(outputDir, "binnedTiTv_log.", annotationName, ".pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) -plot(gt$value,gt$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,xlim=c(0,80)) +plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax)); abline(v=m,lty=2) abline(v=m75,lty=2) abline(v=m25,lty=2) +points(novel$value,novel$titv,col="green",pch=20) +points(dbsnp$value,dbsnp$titv,col="blue",pch=20) +legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20)) dev.off() + outfile = paste(outputDir, "binnedTiTv_hist.", annotationName, ".pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) -plot(gt$value,gt$numVariants,xlab=annotationName,ylab="num Variants in bin",type="h"); +plot(all$value,all$numVariants,xlab=annotationName,ylab="num Variants in bin",type="h"); dev.off() \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index d8cc5593f..f404e2b30 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -56,8 +56,9 @@ public class RecalDataManager { public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color private static boolean warnUserNullReadGroup = false; private static boolean warnUserNoColorSpace = false; + private static boolean warnUserNullPlatform = false; - public static final String versionString = "v2.2.16"; // Major version, minor version, and build number + public static final String versionString = "v2.2.17"; // Major version, minor version, and build number RecalDataManager() { data = new NestedHashMap(); @@ -242,6 +243,13 @@ public class RecalDataManager { } if ( readGroup.getPlatform() == null ) { + if( !warnUserNullPlatform ) { + Utils.warnUser("The input .bam file contains reads with no read group. " + + "Defaulting to platform = " + RAC.DEFAULT_PLATFORM + ". " + + "First observed at read with name = " + read.getReadName() ); + Utils.warnUser("Users may set the default platform using the --default_platform argument."); + warnUserNullPlatform = true; + } readGroup.setPlatform( RAC.DEFAULT_PLATFORM ); } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java index 2ecff405b..3b4ca52e9 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java @@ -58,10 +58,17 @@ public class AnnotationDataManager { // Loop over each annotation in the vcf record final Map infoField = variant.getInfoValues(); for( String annotationKey : infoField.keySet() ) { - if( annotationKey.equalsIgnoreCase("AC") ) { - continue; // This annotation isn't a float value + + float value = 0.0f; + boolean skipThisAnnotation = false; + try { + value = Float.parseFloat( infoField.get( annotationKey ) ); + } catch( Exception e ) { // BUGBUG: Make this exception more specific. NumberFormatException?? + skipThisAnnotation = true; // skip over annotations that aren't floats, like "AC"?? and "DB" + } + if( skipThisAnnotation ) { + continue; // skip over annotations that aren't floats, like "AC"?? and "DB" } - final float value = Float.parseFloat( infoField.get( annotationKey ) ); if( variant.getID().equals(".") ) { // This is a novel variant TreeSet treeSet = dataNovel.get( annotationKey ); @@ -212,7 +219,7 @@ public class AnnotationDataManager { numTi += datum.numTransitions; numTv += datum.numTransversions; lastDatum = datum; - if( numTi + numTv >= MAX_VARIANTS_PER_BIN/3 ) { // This annotation bin is full + if( numTi + numTv >= MAX_VARIANTS_PER_BIN/2 ) { // This annotation bin is full float titv; if( numTv == 0) { titv = 0.0f; } else { titv = ((float) numTi) / ((float) numTv); }