AnalyzeAnnotations now outputs plots with log x-axis in addition to standard x-axis so things like DP and MQ0 are easier to see. AnalyzeAnnotations now skips over all annotations that aren't floating point values. Recalibrator now warns users if PL tags are missing and so therefore it is reverting to illumina.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2681 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-01-25 19:39:18 +00:00
parent 6cf413e630
commit 2b51cf18f0
3 changed files with 37 additions and 31 deletions

View File

@ -15,35 +15,23 @@ c <- read.table(input, header=T)
# Plot TiTv ratio as a function of the annotation
#
gt = c[c$numVariants>minBinCutoff & c$category==0,]
all = c[c$numVariants>minBinCutoff & c$category==0,]
novel = c[c$numVariants>minBinCutoff & c$category==1,]
dbsnp = c[c$numVariants>minBinCutoff & c$category==2,]
d = c[c$numVariants>minBinCutoff,]
cmin = min(d$titv)
cmax = max(d$titv)
ymin = min(d$titv)
ymax = max(d$titv)
xmin = min(d$value)
xmax = max(d$value)
outfile = paste(outputDir, "binnedTiTv.", annotationName, ".pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(gt$value,gt$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20);
m = weighted.mean(gt$value,gt$numVariants/sum(gt$numVariants))
ma = gt[gt$value > m,]
mb = gt[gt$value < m,]
m75 = weighted.mean(ma$value,ma$numVariants/sum(ma$numVariants))
m25 = weighted.mean(mb$value,mb$numVariants/sum(mb$numVariants))
abline(v=m,lty=2)
abline(v=m75,lty=2)
abline(v=m25,lty=2)
dev.off()
outfile = paste(outputDir, "binnedTiTv_novel.", annotationName, ".pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(gt$value,gt$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,yim=c(cmin,cmax));
m = weighted.mean(gt$value,gt$numVariants/sum(gt$numVariants))
ma = gt[gt$value > m,]
mb = gt[gt$value < m,]
plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax));
m = weighted.mean(all$value,all$numVariants/sum(all$numVariants))
ma = all[all$value > m,]
mb = all[all$value < m,]
m75 = weighted.mean(ma$value,ma$numVariants/sum(ma$numVariants))
m25 = weighted.mean(mb$value,mb$numVariants/sum(mb$numVariants))
abline(v=m,lty=2)
@ -51,21 +39,24 @@ abline(v=m75,lty=2)
abline(v=m25,lty=2)
points(novel$value,novel$titv,col="green",pch=20)
points(dbsnp$value,dbsnp$titv,col="blue",pch=20)
legend("topleft", c("overall","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20))
legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20))
dev.off()
outfile = paste(outputDir, "binnedTiTv_quartiles.", annotationName, ".pdf", sep="")
outfile = paste(outputDir, "binnedTiTv_log.", annotationName, ".pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(gt$value,gt$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,xlim=c(0,80))
plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax));
abline(v=m,lty=2)
abline(v=m75,lty=2)
abline(v=m25,lty=2)
points(novel$value,novel$titv,col="green",pch=20)
points(dbsnp$value,dbsnp$titv,col="blue",pch=20)
legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20))
dev.off()
outfile = paste(outputDir, "binnedTiTv_hist.", annotationName, ".pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
plot(gt$value,gt$numVariants,xlab=annotationName,ylab="num Variants in bin",type="h");
plot(all$value,all$numVariants,xlab=annotationName,ylab="num Variants in bin",type="h");
dev.off()

View File

@ -56,8 +56,9 @@ public class RecalDataManager {
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
private static boolean warnUserNullReadGroup = false;
private static boolean warnUserNoColorSpace = false;
private static boolean warnUserNullPlatform = false;
public static final String versionString = "v2.2.16"; // Major version, minor version, and build number
public static final String versionString = "v2.2.17"; // Major version, minor version, and build number
RecalDataManager() {
data = new NestedHashMap();
@ -242,6 +243,13 @@ public class RecalDataManager {
}
if ( readGroup.getPlatform() == null ) {
if( !warnUserNullPlatform ) {
Utils.warnUser("The input .bam file contains reads with no read group. " +
"Defaulting to platform = " + RAC.DEFAULT_PLATFORM + ". " +
"First observed at read with name = " + read.getReadName() );
Utils.warnUser("Users may set the default platform using the --default_platform <String> argument.");
warnUserNullPlatform = true;
}
readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
}
}

View File

@ -58,10 +58,17 @@ public class AnnotationDataManager {
// Loop over each annotation in the vcf record
final Map<String,String> infoField = variant.getInfoValues();
for( String annotationKey : infoField.keySet() ) {
if( annotationKey.equalsIgnoreCase("AC") ) {
continue; // This annotation isn't a float value
float value = 0.0f;
boolean skipThisAnnotation = false;
try {
value = Float.parseFloat( infoField.get( annotationKey ) );
} catch( Exception e ) { // BUGBUG: Make this exception more specific. NumberFormatException??
skipThisAnnotation = true; // skip over annotations that aren't floats, like "AC"?? and "DB"
}
if( skipThisAnnotation ) {
continue; // skip over annotations that aren't floats, like "AC"?? and "DB"
}
final float value = Float.parseFloat( infoField.get( annotationKey ) );
if( variant.getID().equals(".") ) { // This is a novel variant
TreeSet<AnnotationDatum> treeSet = dataNovel.get( annotationKey );
@ -212,7 +219,7 @@ public class AnnotationDataManager {
numTi += datum.numTransitions;
numTv += datum.numTransversions;
lastDatum = datum;
if( numTi + numTv >= MAX_VARIANTS_PER_BIN/3 ) { // This annotation bin is full
if( numTi + numTv >= MAX_VARIANTS_PER_BIN/2 ) { // This annotation bin is full
float titv;
if( numTv == 0) { titv = 0.0f; }
else { titv = ((float) numTi) / ((float) numTv); }