Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
91f8ab115d
|
|
@ -0,0 +1,190 @@
|
|||
#!/bin/env Rscript
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
verbose = TRUE
|
||||
|
||||
input = args[1]
|
||||
annotationName = args[2]
|
||||
minBinCutoff = as.numeric(args[3])
|
||||
medianNumVariants = args[4]
|
||||
|
||||
c <- read.table(input, header=T)
|
||||
|
||||
all = c[c$numVariants>minBinCutoff & c$category=="all",]
|
||||
novel = c[c$numVariants>minBinCutoff & c$category=="novel",]
|
||||
dbsnp = c[c$numVariants>minBinCutoff & c$category=="dbsnp",]
|
||||
truth = c[c$numVariants>minBinCutoff & c$category=="truth",]
|
||||
|
||||
#
|
||||
# Calculate min, max, medians
|
||||
#
|
||||
|
||||
d = c[c$numVariants>minBinCutoff,]
|
||||
ymin = min(d$titv)
|
||||
ymax = max(d$titv)
|
||||
xmin = min(d$value)
|
||||
xmax = max(d$value)
|
||||
m = weighted.mean(all$value,all$numVariants/sum(all$numVariants))
|
||||
ma = all[all$value > m,]
|
||||
mb = all[all$value < m,]
|
||||
m75 = weighted.mean(ma$value,ma$numVariants/sum(ma$numVariants))
|
||||
m25 = weighted.mean(mb$value,mb$numVariants/sum(mb$numVariants))
|
||||
if(medianNumVariants == "true") {
|
||||
vc = cumsum( all$numVariants/sum(all$numVariants) )
|
||||
m10 = all$value[ max(which(vc<=0.10)) ]
|
||||
m25 = all$value[ max(which(vc<=0.25)) ]
|
||||
m = all$value[ max(which(vc<=0.5)) ]
|
||||
m75 = all$value[ min(which(vc>=0.75)) ]
|
||||
m90 = all$value[ min(which(vc>=0.90)) ]
|
||||
}
|
||||
|
||||
#
|
||||
# Plot TiTv ratio as a function of the annotation
|
||||
#
|
||||
|
||||
outfile = paste(input, ".TiTv.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
abline(v=m,lty=2,col="red")
|
||||
abline(v=m75,lty=3)
|
||||
abline(v=m25,lty=3)
|
||||
text(m, ymin, "50", col="red", cex=0.6);
|
||||
text(m75, ymin, "75", col="black", cex=0.6);
|
||||
text(m25, ymin, "25", col="black", cex=0.6);
|
||||
if(medianNumVariants == "true") {
|
||||
abline(v=m90,lty=3)
|
||||
abline(v=m10,lty=3)
|
||||
text(m10, ymin, "10", col="black", cex=0.6);
|
||||
text(m90, ymin, "90", col="black", cex=0.6);
|
||||
}
|
||||
points(novel$value,novel$titv,col="green",pch=20)
|
||||
points(dbsnp$value,dbsnp$titv,col="blue",pch=20)
|
||||
if( sum(all$truePositive==0) != length(all$truePositive) ) {
|
||||
points(truth$value,truth$titv,col="magenta",pch=20)
|
||||
legend("topleft", c("all","novel","dbsnp","truth"),col=c("black","green","blue","magenta"),pch=c(20,20,20,20))
|
||||
} else {
|
||||
legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20))
|
||||
}
|
||||
dev.off()
|
||||
|
||||
#
|
||||
# Plot TiTv ratio as a function of the annotation, log scale on the x-axis
|
||||
#
|
||||
|
||||
outfile = paste(input, ".TiTv_log.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
abline(v=m,lty=2,col="red")
|
||||
abline(v=m75,lty=3)
|
||||
abline(v=m25,lty=3)
|
||||
text(m, ymin, "50", col="red", cex=0.6);
|
||||
text(m75, ymin, "75", col="black", cex=0.6);
|
||||
text(m25, ymin, "25", col="black", cex=0.6);
|
||||
if(medianNumVariants == "true") {
|
||||
abline(v=m90,lty=3)
|
||||
abline(v=m10,lty=3)
|
||||
text(m10, ymin, "10", col="black", cex=0.6);
|
||||
text(m90, ymin, "90", col="black", cex=0.6);
|
||||
}
|
||||
points(novel$value,novel$titv,col="green",pch=20)
|
||||
points(dbsnp$value,dbsnp$titv,col="blue",pch=20)
|
||||
if( sum(all$truePositive==0) != length(all$truePositive) ) {
|
||||
points(truth$value,truth$titv,col="magenta",pch=20)
|
||||
legend("topleft", c("all","novel","dbsnp","truth"),col=c("black","green","blue","magenta"),pch=c(20,20,20,20))
|
||||
} else {
|
||||
legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20))
|
||||
}
|
||||
dev.off()
|
||||
|
||||
#
|
||||
# Plot dbsnp and true positive rate as a function of the annotation
|
||||
#
|
||||
|
||||
ymin = min(all$dbsnp)
|
||||
ymax = max(all$dbsnp)
|
||||
outfile = paste(input, ".truthRate.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
yLabel = "DBsnp Rate"
|
||||
if( sum(all$truePositive==0) != length(all$truePositive) ) {
|
||||
t = all[all$truePositive>0,]
|
||||
yLabel = "DBsnp/True Positive Rate"
|
||||
ymin = min(min(all$dbsnp),min(t$truePositive))
|
||||
ymax = max(max(all$dbsnp),max(t$truePositive))
|
||||
}
|
||||
plot(all$value,all$dbsnp,xlab=annotationName,ylab=yLabel,pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
abline(v=m,lty=2,col="red")
|
||||
abline(v=m75,lty=3)
|
||||
abline(v=m25,lty=3)
|
||||
text(m, ymin, "50", col="red", cex=0.6);
|
||||
text(m75, ymin, "75", col="black", cex=0.6);
|
||||
text(m25, ymin, "25", col="black", cex=0.6);
|
||||
if(medianNumVariants == "true") {
|
||||
abline(v=m90,lty=3)
|
||||
abline(v=m10,lty=3)
|
||||
text(m10, ymin, "10", col="black", cex=0.6);
|
||||
text(m90, ymin, "90", col="black", cex=0.6);
|
||||
}
|
||||
if( sum(all$truePositive==0) != length(all$truePositive) ) {
|
||||
points(t$value,t$truePositive,col="magenta",pch=20);
|
||||
legend("topleft", c("dbsnp","truth"),col=c("black","magenta"),pch=c(20,20))
|
||||
}
|
||||
dev.off()
|
||||
|
||||
#
|
||||
# Plot dbsnp and true positive rate as a function of the annotation, log scale on the x-axis
|
||||
#
|
||||
|
||||
outfile = paste(input, ".truthRate_log.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
yLabel = "DBsnp Rate"
|
||||
if( sum(all$truePositive==0) != length(all$truePositive) ) {
|
||||
yLabel = "DBsnp/Truth Rate"
|
||||
}
|
||||
plot(all$value,all$dbsnp,xlab=annotationName,log="x",ylab=yLabel,ylim=c(ymin,ymax),pch=20,xaxt="n",ps=14);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
abline(v=m,lty=2,col="red")
|
||||
abline(v=m75,lty=3)
|
||||
abline(v=m25,lty=3)
|
||||
text(m, ymin, "50", col="red", cex=0.6);
|
||||
text(m75, ymin, "75", col="black", cex=0.6);
|
||||
text(m25, ymin, "25", col="black", cex=0.6);
|
||||
if(medianNumVariants == "true") {
|
||||
abline(v=m90,lty=3)
|
||||
abline(v=m10,lty=3)
|
||||
text(m10, ymin, "10", col="black", cex=0.6);
|
||||
text(m90, ymin, "90", col="black", cex=0.6);
|
||||
}
|
||||
if( sum(all$truePositive==0) != length(all$truePositive) ) {
|
||||
points(t$value,t$truePositive,col="magenta",pch=20);
|
||||
legend("topleft", c("dbsnp","truth"),col=c("black","magenta"),pch=c(20,20))
|
||||
}
|
||||
dev.off()
|
||||
|
||||
#
|
||||
# Plot histogram of the annotation's value
|
||||
#
|
||||
|
||||
outfile = paste(input, ".Histogram.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(all$value,all$numVariants,xlab=annotationName,ylab="Num variants in bin",type="h",xaxt="n",ps=14,lwd=4);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
dev.off()
|
||||
|
||||
#
|
||||
# Plot histogram of the annotation's value, log scale on x-axis
|
||||
#
|
||||
|
||||
outfile = paste(input, ".Histogram_log.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(all$value,all$numVariants,xlab=annotationName,log="x",ylab="Num variants in bin",type="h",xaxt="n",ps=14,lwd=4);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
dev.off()
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
#!/bin/env Rscript
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
verbose = TRUE
|
||||
|
||||
tranchesFile = args[1]
|
||||
targetTITV = as.numeric(args[2])
|
||||
targetSensitivity = as.numeric(args[3])
|
||||
suppressLegend = ! is.na(args[4])
|
||||
|
||||
# -----------------------------------------------------------------------------------------------
|
||||
# Useful general routines
|
||||
# -----------------------------------------------------------------------------------------------
|
||||
|
||||
MIN_FP_RATE = 0.001 # 1 / 1000 is min error rate
|
||||
|
||||
titvFPEst <- function(titvExpected, titvObserved) {
|
||||
max(min(1 - (titvObserved - 0.5) / (titvExpected - 0.5), 1), MIN_FP_RATE)
|
||||
}
|
||||
|
||||
titvFPEstV <- function(titvExpected, titvs) {
|
||||
sapply(titvs, function(x) titvFPEst(titvExpected, x))
|
||||
}
|
||||
|
||||
nTPFP <- function(nVariants, FDR) {
|
||||
return(list(TP = nVariants * (1 - FDR/100), FP = nVariants * (FDR / 100)))
|
||||
}
|
||||
|
||||
leftShift <- function(x, leftValue = 0) {
|
||||
r = rep(leftValue, length(x))
|
||||
for ( i in 1:(length(x)-1) ) {
|
||||
#print(list(i=i))
|
||||
r[i] = x[i+1]
|
||||
}
|
||||
r
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------------------------
|
||||
# Tranches plot
|
||||
# -----------------------------------------------------------------------------------------------
|
||||
data2 = read.table(tranchesFile,sep=",",head=T)
|
||||
data2 = data2[order(data2$novelTiTv, decreasing=F),]
|
||||
#data2 = data2[order(data2$FDRtranche, decreasing=T),]
|
||||
cols = c("cornflowerblue", "cornflowerblue", "darkorange", "darkorange")
|
||||
density=c(20, -1, -1, 20)
|
||||
outfile = paste(tranchesFile, ".pdf", sep="")
|
||||
pdf(outfile, height=5, width=8)
|
||||
par(mar = c(5, 5, 4, 2) + 0.1)
|
||||
novelTiTv = c(data2$novelTITV,data2$novelTiTv)
|
||||
alpha = 1 - titvFPEstV(targetTITV, novelTiTv)
|
||||
#print(alpha)
|
||||
|
||||
numGood = round(alpha * data2$numNovel);
|
||||
|
||||
#numGood = round(data2$numNovel * (1-data2$targetTruthSensitivity/100))
|
||||
numBad = data2$numNovel - numGood;
|
||||
|
||||
numPrevGood = leftShift(numGood, 0)
|
||||
numNewGood = numGood - numPrevGood
|
||||
numPrevBad = leftShift(numBad, 0)
|
||||
numNewBad = numBad - numPrevBad
|
||||
|
||||
d=matrix(c(numPrevGood,numNewGood, numNewBad, numPrevBad),4,byrow=TRUE)
|
||||
#print(d)
|
||||
barplot(d/1000,horiz=TRUE,col=cols,space=0.2,xlab="Number of Novel Variants (1000s)", density=density, cex.axis=1.25, cex.lab=1.25) # , xlim=c(250000,350000))
|
||||
#abline(v= d[2,dim(d)[2]], lty=2)
|
||||
#abline(v= d[1,3], lty=2)
|
||||
if ( ! suppressLegend )
|
||||
legend(3, length(data2$targetTruthSensitivity)/3 +1, c('Cumulative TPs','Tranch-specific TPs', 'Tranch-specific FPs', 'Cumulative FPs' ), fill=cols, density=density, bg='white', cex=1.25)
|
||||
|
||||
mtext("Ti/Tv",2,line=2.25,at=length(data2$targetTruthSensitivity)*1.2,las=1, cex=1)
|
||||
mtext("truth",2,line=0,at=length(data2$targetTruthSensitivity)*1.2,las=1, cex=1)
|
||||
axis(2,line=-1,at=0.7+(0:(length(data2$targetTruthSensitivity)-1))*1.2,tick=FALSE,labels=data2$targetTruthSensitivity, las=1, cex.axis=1.0)
|
||||
axis(2,line=1,at=0.7+(0:(length(data2$targetTruthSensitivity)-1))*1.2,tick=FALSE,labels=round(novelTiTv,3), las=1, cex.axis=1.0)
|
||||
|
||||
# plot sensitivity vs. specificity
|
||||
sensitivity = data2$truthSensitivity
|
||||
if ( ! is.null(sensitivity) ) {
|
||||
#specificity = titvFPEstV(targetTITV, novelTiTv)
|
||||
specificity = novelTiTv
|
||||
plot(sensitivity, specificity, type="b", col="cornflowerblue", xlab="Tranche truth sensitivity", ylab="Specificity (Novel Ti/Tv ratio)")
|
||||
abline(h=targetTITV, lty=2)
|
||||
abline(v=targetSensitivity, lty=2)
|
||||
#text(max(sensitivity), targetTITV-0.05, labels="Expected novel Ti/Tv", pos=2)
|
||||
}
|
||||
|
||||
dev.off()
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
#!/bin/env Rscript
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
verbose = TRUE
|
||||
|
||||
input = args[1]
|
||||
covariateName = args[2]
|
||||
|
||||
outfile = paste(input, ".qual_diff_v_", covariateName, ".pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
c <- read.table(input, header=T)
|
||||
c <- c[sort.list(c[,1]),]
|
||||
|
||||
#
|
||||
# Plot residual error as a function of the covariate
|
||||
#
|
||||
|
||||
d.good <- c[c$nBases >= 1000,]
|
||||
d.1000 <- c[c$nBases < 1000,]
|
||||
rmseGood = sqrt( sum(as.numeric((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases)) / sum(as.numeric(d.good$nBases)) ) # prevent integer overflow with as.numeric, ugh
|
||||
rmseAll = sqrt( sum(as.numeric((c$Qempirical-c$Qreported)^2 * c$nBases)) / sum(as.numeric(c$nBases)) )
|
||||
theTitle = paste("RMSE_good =", round(rmseGood,digits=3), ", RMSE_all =", round(rmseAll,digits=3))
|
||||
if( length(d.good$nBases) == length(c$nBases) ) {
|
||||
theTitle = paste("RMSE =", round(rmseAll,digits=3))
|
||||
}
|
||||
# Don't let residual error go off the edge of the plot
|
||||
d.good$residualError = d.good$Qempirical-d.good$Qreported
|
||||
d.good$residualError[which(d.good$residualError > 10)] = 10
|
||||
d.good$residualError[which(d.good$residualError < -10)] = -10
|
||||
d.1000$residualError = d.1000$Qempirical-d.1000$Qreported
|
||||
d.1000$residualError[which(d.1000$residualError > 10)] = 10
|
||||
d.1000$residualError[which(d.1000$residualError < -10)] = -10
|
||||
c$residualError = c$Qempirical-c$Qreported
|
||||
c$residualError[which(c$residualError > 10)] = 10
|
||||
c$residualError[which(c$residualError < -10)] = -10
|
||||
pointType = "p"
|
||||
if( length(c$Covariate) <= 20 ) {
|
||||
pointType = "o"
|
||||
}
|
||||
if( is.numeric(c$Covariate) ) {
|
||||
plot(d.good$Covariate, d.good$residualError, type=pointType, main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", pch=20, ylim=c(-10, 10), xlim=c(min(c$Covariate),max(c$Covariate)))
|
||||
points(d.1000$Covariate, d.1000$residualError, type=pointType, col="cornflowerblue", pch=20)
|
||||
} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice
|
||||
plot(c$Covariate, c$residualError, type="l", main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", ylim=c(-10, 10))
|
||||
points(d.1000$Covariate, d.1000$residualError, type="l", col="cornflowerblue")
|
||||
}
|
||||
dev.off()
|
||||
|
||||
|
||||
#
|
||||
# Plot mean quality versus the covariate
|
||||
#
|
||||
|
||||
outfile = paste(input, ".reported_qual_v_", covariateName, ".pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
pointType = "p"
|
||||
if( length(c$Covariate) <= 20 ) {
|
||||
pointType = "o"
|
||||
}
|
||||
theTitle = paste("Quality By", covariateName);
|
||||
if( is.numeric(c$Covariate) ) {
|
||||
plot(d.good$Covariate, d.good$Qreported, type=pointType, main=theTitle, ylab="Mean Reported Quality", xlab=covariateName, col="blue", pch=20, ylim=c(0, 40), xlim=c(min(c$Covariate),max(c$Covariate)))
|
||||
points(d.1000$Covariate, d.1000$Qreported, type=pointType, col="cornflowerblue", pch=20)
|
||||
} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice
|
||||
plot(c$Covariate, c$Qreported, type="l", main=theTitle, ylab="Mean Reported Quality", xlab=covariateName, col="blue", ylim=c(0, 40))
|
||||
points(d.1000$Covariate, d.1000$Qreported, type="l", col="cornflowerblue")
|
||||
}
|
||||
dev.off()
|
||||
|
||||
#
|
||||
# Plot histogram of the covariate
|
||||
#
|
||||
|
||||
e = d.good
|
||||
f = d.1000
|
||||
outfile = paste(input, ".", covariateName,"_hist.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
hst=subset(data.frame(e$Covariate, e$nBases), e.nBases != 0)
|
||||
hst2=subset(data.frame(f$Covariate, f$nBases), f.nBases != 0)
|
||||
|
||||
lwdSize=2
|
||||
if( length(c$Covariate) <= 20 ) {
|
||||
lwdSize=7
|
||||
} else if( length(c$Covariate) <= 70 ) {
|
||||
lwdSize=4
|
||||
}
|
||||
|
||||
if( is.numeric(c$Covariate) ) {
|
||||
if( length(hst$e.Covariate) == 0 ) {
|
||||
plot(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=lwdSize, col="cornflowerblue", main=paste(covariateName,"histogram"), ylim=c(0, max(hst2$f.nBases)), xlab=covariateName, ylab="Count",yaxt="n",xlim=c(min(c$Covariate),max(c$Covariate)))
|
||||
} else {
|
||||
plot(hst$e.Covariate, hst$e.nBases, type="h", lwd=lwdSize, main=paste(covariateName,"histogram"), xlab=covariateName, ylim=c(0, max(hst$e.nBases)),ylab="Number of Bases",yaxt="n",xlim=c(min(c$Covariate),max(c$Covariate)))
|
||||
points(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=lwdSize, col="cornflowerblue")
|
||||
}
|
||||
axis(2,axTicks(2), format(axTicks(2), scientific=F))
|
||||
} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice
|
||||
hst=subset(data.frame(c$Covariate, c$nBases), c.nBases != 0)
|
||||
plot(1:length(hst$c.Covariate), hst$c.nBases, type="h", lwd=lwdSize, main=paste(covariateName,"histogram"), ylim=c(0, max(hst$c.nBases)),xlab=covariateName, ylab="Number of Bases",yaxt="n",xaxt="n")
|
||||
if( length(hst$c.Covariate) > 9 ) {
|
||||
axis(1, at=seq(1,length(hst$c.Covariate),2), labels = hst$c.Covariate[seq(1,length(hst$c.Covariate),2)])
|
||||
} else {
|
||||
axis(1, at=seq(1,length(hst$c.Covariate),1), labels = hst$c.Covariate)
|
||||
}
|
||||
axis(2,axTicks(2), format(axTicks(2), scientific=F))
|
||||
}
|
||||
dev.off()
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
#!/bin/env Rscript
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
|
||||
input = args[1]
|
||||
Qcutoff = as.numeric(args[2])
|
||||
maxQ = as.numeric(args[3])
|
||||
maxHist = as.numeric(args[4])
|
||||
|
||||
t=read.table(input, header=T)
|
||||
|
||||
#
|
||||
# Plot of reported quality versus empirical quality
|
||||
#
|
||||
|
||||
outfile = paste(input, ".quality_emp_v_stated.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
d.good <- t[t$nBases >= 10000 & t$Qreported >= Qcutoff,]
|
||||
d.1000 <- t[t$nBases < 1000 & t$Qreported >= Qcutoff,]
|
||||
d.10000 <- t[t$nBases < 10000 & t$nBases >= 1000 & t$Qreported >= Qcutoff,]
|
||||
f <- t[t$Qreported < Qcutoff,]
|
||||
e <- rbind(d.good, d.1000, d.10000)
|
||||
rmseGood = sqrt( sum(as.numeric((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases)) / sum(as.numeric(d.good$nBases)) ) # prevent integer overflow with as.numeric, ugh
|
||||
rmseAll = sqrt( sum(as.numeric((e$Qempirical-e$Qreported)^2 * e$nBases)) / sum(as.numeric(e$nBases)) )
|
||||
theTitle = paste("RMSE_good =", round(rmseGood,digits=3), ", RMSE_all =", round(rmseAll,digits=3))
|
||||
if( length(t$nBases) - length(f$nBases) == length(d.good$nBases) ) {
|
||||
theTitle = paste("RMSE =", round(rmseAll,digits=3));
|
||||
}
|
||||
plot(d.good$Qreported, d.good$Qempirical, type="p", col="blue", main=theTitle, xlim=c(0,maxQ), ylim=c(0,maxQ), pch=16, xlab="Reported quality score", ylab="Empirical quality score")
|
||||
points(d.1000$Qreported, d.1000$Qempirical, type="p", col="lightblue", pch=16)
|
||||
points(d.10000$Qreported, d.10000$Qempirical, type="p", col="cornflowerblue", pch=16)
|
||||
points(f$Qreported, f$Qempirical, type="p", col="maroon1", pch=16)
|
||||
abline(0,1, lty=2)
|
||||
dev.off()
|
||||
|
||||
#
|
||||
# Plot Q empirical histogram
|
||||
#
|
||||
|
||||
outfile = paste(input, ".quality_emp_hist.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
hst=subset(data.frame(e$Qempirical, e$nBases), e.nBases != 0)
|
||||
hst2=subset(data.frame(f$Qempirical, f$nBases), f.nBases != 0)
|
||||
percentBases=hst$e.nBases / sum(as.numeric(hst$e.nBases))
|
||||
entropy = -sum(log2(percentBases)*percentBases)
|
||||
yMax = max(hst$e.nBases)
|
||||
if(maxHist != 0) {
|
||||
yMax = maxHist
|
||||
}
|
||||
plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Empirical quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Empirical quality score", ylab="Number of Bases",yaxt="n")
|
||||
points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1")
|
||||
axis(2,axTicks(2), format(axTicks(2), scientific=F))
|
||||
dev.off()
|
||||
|
||||
#
|
||||
# Plot Q reported histogram
|
||||
#
|
||||
|
||||
outfile = paste(input, ".quality_rep_hist.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
hst=subset(data.frame(e$Qreported, e$nBases), e.nBases != 0)
|
||||
hst2=subset(data.frame(f$Qreported, f$nBases), f.nBases != 0)
|
||||
yMax = max(hst$e.nBases)
|
||||
if(maxHist != 0) {
|
||||
yMax = maxHist
|
||||
}
|
||||
plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Reported quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Reported quality score", ylab="Number of Bases",yaxt="n")
|
||||
points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1")
|
||||
axis(2,axTicks(2), format(axTicks(2), scientific=F))
|
||||
dev.off()
|
||||
|
|
@ -0,0 +1,138 @@
|
|||
titvFPEst <- function(titvExpected, titvObserved) { max(min(1 - (titvObserved - 0.5) / (titvExpected - 0.5), 1), 0.001) }
|
||||
|
||||
titvFPEstV <- function(titvExpected, titvs) {
|
||||
sapply(titvs, function(x) titvFPEst(titvExpected, x))
|
||||
}
|
||||
|
||||
calcHet <- function(nknown, knownTiTv, nnovel, novelTiTv, callable) {
|
||||
TP <- nknown + (1-titvFPEst(knownTiTv, novelTiTv)) * nnovel
|
||||
2 * TP / 3 / callable
|
||||
}
|
||||
|
||||
marginalTiTv <- function( nx, titvx, ny, titvy ) {
|
||||
tvx = nx / (titvx + 1)
|
||||
tix = nx - tvx
|
||||
tvy = ny / (titvy + 1)
|
||||
tiy = ny - tvy
|
||||
tiz = tix - tiy
|
||||
tvz = tvx - tvy
|
||||
return(tiz / tvz)
|
||||
}
|
||||
marginaldbSNPRate <- function( nx, dbx, ny, dby ) {
|
||||
knownx = nx * dbx / 100
|
||||
novelx = nx - knownx
|
||||
knowny = ny * dby / 100
|
||||
novely = ny - knowny
|
||||
knownz = knownx - knowny
|
||||
novelz = novelx - novely
|
||||
return(knownz / ( knownz + novelz ) * 100)
|
||||
}
|
||||
|
||||
numExpectedCalls <- function(L, theta, calledFractionOfRegion, nIndividuals, dbSNPRate) {
|
||||
nCalls <- L * theta * calledFractionOfRegion * sum(1 / seq(1, 2 * nIndividuals))
|
||||
return(list(nCalls = nCalls, nKnown = dbSNPRate * nCalls, nNovel = (1-dbSNPRate) * nCalls))
|
||||
}
|
||||
|
||||
normalize <- function(x) {
|
||||
x / sum(x)
|
||||
}
|
||||
|
||||
normcumsum <- function(x) {
|
||||
cumsum(normalize(x))
|
||||
}
|
||||
|
||||
cumhist <- function(d, ...) {
|
||||
plot(d[order(d)], type="b", col="orange", lwd=2, ...)
|
||||
}
|
||||
|
||||
revcumsum <- function(x) {
|
||||
return(rev(cumsum(rev(x))))
|
||||
}
|
||||
|
||||
phred <- function(x) {
|
||||
log10(max(x,10^(-9.9)))*-10
|
||||
}
|
||||
|
||||
pOfB <- function(b, B, Q) {
|
||||
#print(paste(b, B, Q))
|
||||
p = 1 - 10^(-Q/10)
|
||||
if ( b == B )
|
||||
return(p)
|
||||
else
|
||||
return(1 - p)
|
||||
}
|
||||
|
||||
pOfG <- function(bs, qs, G) {
|
||||
a1 = G[1]
|
||||
a2 = G[2]
|
||||
|
||||
log10p = 0
|
||||
for ( i in 1:length(bs) ) {
|
||||
b = bs[i]
|
||||
q = qs[i]
|
||||
p1 = pOfB(b, a1, q) / 2 + pOfB(b, a2, q) / 2
|
||||
log10p = log10p + log10(p1)
|
||||
}
|
||||
|
||||
return(log10p)
|
||||
}
|
||||
|
||||
pOfGs <- function(nAs, nBs, Q) {
|
||||
bs = c(rep("a", nAs), rep("t", nBs))
|
||||
qs = rep(Q, nAs + nBs)
|
||||
G1 = c("a", "a")
|
||||
G2 = c("a", "t")
|
||||
G3 = c("t", "t")
|
||||
|
||||
log10p1 = pOfG(bs, qs, G1)
|
||||
log10p2 = pOfG(bs, qs, G2)
|
||||
log10p3 = pOfG(bs, qs, G3)
|
||||
Qsample = phred(1 - 10^log10p2 / sum(10^(c(log10p1, log10p2, log10p3))))
|
||||
|
||||
return(list(p1=log10p1, p2=log10p2, p3=log10p3, Qsample=Qsample))
|
||||
}
|
||||
|
||||
QsampleExpected <- function(depth, Q) {
|
||||
weightedAvg = 0
|
||||
for ( d in 1:(depth*3) ) {
|
||||
Qsample = 0
|
||||
pOfD = dpois(d, depth)
|
||||
for ( nBs in 0:d ) {
|
||||
pOfnB = dbinom(nBs, d, 0.5)
|
||||
nAs = d - nBs
|
||||
Qsample = pOfGs(nAs, nBs, Q)$Qsample
|
||||
#Qsample = 1
|
||||
weightedAvg = weightedAvg + Qsample * pOfD * pOfnB
|
||||
print(as.data.frame(list(d=d, nBs = nBs, pOfD=pOfD, pOfnB = pOfnB, Qsample=Qsample, weightedAvg = weightedAvg)))
|
||||
}
|
||||
}
|
||||
|
||||
return(weightedAvg)
|
||||
}
|
||||
|
||||
plotQsamples <- function(depths, Qs, Qmax) {
|
||||
cols = rainbow(length(Qs))
|
||||
plot(depths, rep(Qmax, length(depths)), type="n", ylim=c(0,Qmax), xlab="Average sequencing coverage", ylab="Qsample", main = "Expected Qsample values, including depth and allele sampling")
|
||||
|
||||
for ( i in 1:length(Qs) ) {
|
||||
Q = Qs[i]
|
||||
y = as.numeric(lapply(depths, function(x) QsampleExpected(x, Q)))
|
||||
points(depths, y, col=cols[i], type="b")
|
||||
}
|
||||
|
||||
legend("topleft", paste("Q", Qs), fill=cols)
|
||||
}
|
||||
|
||||
pCallHetGivenDepth <- function(depth, nallelesToCall) {
|
||||
depths = 0:(2*depth)
|
||||
pNoAllelesToCall = apply(as.matrix(depths),1,function(d) sum(dbinom(0:nallelesToCall,d,0.5)))
|
||||
dpois(depths,depth)*(1-pNoAllelesToCall)
|
||||
}
|
||||
|
||||
pCallHets <- function(depth, nallelesToCall) {
|
||||
sum(pCallHetGivenDepth(depth,nallelesToCall))
|
||||
}
|
||||
|
||||
pCallHetMultiSample <- function(depth, nallelesToCall, nsamples) {
|
||||
1-(1-pCallHets(depth,nallelesToCall))^nsamples
|
||||
}
|
||||
|
|
@ -798,11 +798,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
|
||||
private void generateAlternateConsensesFromKnownIndels(final Set<Consensus> altConsensesToPopulate, final int leftmostIndex, final byte[] reference) {
|
||||
for ( VariantContext knownIndel : knownIndelsToTry ) {
|
||||
if ( knownIndel == null || !knownIndel.isIndel() )
|
||||
if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() )
|
||||
continue;
|
||||
byte[] indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length());
|
||||
int start = knownIndel.getStart() - leftmostIndex + 1;
|
||||
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel.isDeletion());
|
||||
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel);
|
||||
if ( c != null )
|
||||
altConsensesToPopulate.add(c);
|
||||
}
|
||||
|
|
@ -988,7 +988,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
// create a Consensus from just the indel string that falls on the reference
|
||||
private Consensus createAlternateConsensus(final int indexOnRef, final byte[] reference, final byte[] indelStr, final boolean isDeletion) {
|
||||
private Consensus createAlternateConsensus(final int indexOnRef, final byte[] reference, final byte[] indelStr, final VariantContext indel) {
|
||||
if ( indexOnRef < 0 || indexOnRef >= reference.length )
|
||||
return null;
|
||||
|
||||
|
|
@ -1002,14 +1002,16 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
if ( indexOnRef > 0 )
|
||||
cigar.add(new CigarElement(indexOnRef, CigarOperator.M));
|
||||
|
||||
if ( isDeletion ) {
|
||||
if ( indel.isDeletion() ) {
|
||||
refIdx += indelStr.length;
|
||||
cigar.add(new CigarElement(indelStr.length, CigarOperator.D));
|
||||
}
|
||||
else {
|
||||
else if ( indel.isInsertion() ) {
|
||||
for ( byte b : indelStr )
|
||||
sb.append((char)b);
|
||||
cigar.add(new CigarElement(indelStr.length, CigarOperator.I));
|
||||
} else {
|
||||
throw new IllegalStateException("Creating an alternate consensus from a complex indel is not allows");
|
||||
}
|
||||
|
||||
if ( reference.length - refIdx > 0 )
|
||||
|
|
|
|||
|
|
@ -0,0 +1,143 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
@By(DataSource.READS)
|
||||
// walker to count realigned reads
|
||||
public class RealignedReadCounter extends ReadWalker<Integer, Integer> {
|
||||
|
||||
public static final String ORIGINAL_CIGAR_TAG = "OC";
|
||||
public static final String ORIGINAL_POSITION_TAG = "OP";
|
||||
|
||||
@Argument(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true)
|
||||
protected String intervalsFile = null;
|
||||
|
||||
// the intervals input by the user
|
||||
private Iterator<GenomeLoc> intervals = null;
|
||||
|
||||
// the current interval in the list
|
||||
private GenomeLoc currentInterval = null;
|
||||
|
||||
private long updatedIntervals = 0, updatedReads = 0, affectedBases = 0;
|
||||
private boolean intervalWasUpdated = false;
|
||||
|
||||
public void initialize() {
|
||||
// prepare to read intervals one-by-one, as needed (assuming they are sorted).
|
||||
intervals = new IntervalFileMergingIterator( getToolkit().getGenomeLocParser(), new File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
|
||||
currentInterval = intervals.hasNext() ? intervals.next() : null;
|
||||
}
|
||||
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
if ( currentInterval == null ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
GenomeLoc readLoc = ref.getGenomeLocParser().createGenomeLoc(read);
|
||||
// hack to get around unmapped reads having screwy locations
|
||||
if ( readLoc.getStop() == 0 )
|
||||
readLoc = ref.getGenomeLocParser().createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart());
|
||||
|
||||
if ( readLoc.isBefore(currentInterval) || ReadUtils.is454Read(read) )
|
||||
return 0;
|
||||
|
||||
if ( readLoc.overlapsP(currentInterval) ) {
|
||||
if ( doNotTryToClean(read) )
|
||||
return 0;
|
||||
|
||||
if ( read.getAttribute(ORIGINAL_CIGAR_TAG) != null ) {
|
||||
String newCigar = (String)read.getAttribute(ORIGINAL_CIGAR_TAG);
|
||||
// deal with an old bug
|
||||
if ( read.getCigar().toString().equals(newCigar) ) {
|
||||
//System.out.println(currentInterval + ": " + read.getReadName() + " " + read.getCigarString() + " " + newCigar);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( !intervalWasUpdated ) {
|
||||
intervalWasUpdated = true;
|
||||
updatedIntervals++;
|
||||
affectedBases += 20 + getIndelSize(read);
|
||||
}
|
||||
updatedReads++;
|
||||
|
||||
}
|
||||
} else {
|
||||
do {
|
||||
intervalWasUpdated = false;
|
||||
currentInterval = intervals.hasNext() ? intervals.next() : null;
|
||||
} while ( currentInterval != null && currentInterval.isBefore(readLoc) );
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private int getIndelSize(SAMRecord read) {
|
||||
for ( CigarElement ce : read.getCigar().getCigarElements() ) {
|
||||
if ( ce.getOperator() == CigarOperator.I )
|
||||
return 0;
|
||||
if ( ce.getOperator() == CigarOperator.D )
|
||||
return ce.getLength();
|
||||
}
|
||||
logger.warn("We didn't see an indel for this read: " + read.getReadName() + " " + read.getAlignmentStart() + " " + read.getCigar());
|
||||
return 0;
|
||||
}
|
||||
|
||||
private boolean doNotTryToClean(SAMRecord read) {
|
||||
return read.getReadUnmappedFlag() ||
|
||||
read.getNotPrimaryAlignmentFlag() ||
|
||||
read.getReadFailsVendorQualityCheckFlag() ||
|
||||
read.getMappingQuality() == 0 ||
|
||||
read.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START ||
|
||||
(BadMateFilter.hasBadMate(read));
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return sum + value;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
System.out.println(updatedIntervals + " intervals were updated");
|
||||
System.out.println(updatedReads + " reads were updated");
|
||||
System.out.println(affectedBases + " bases were affected");
|
||||
}
|
||||
}
|
||||
|
|
@ -161,7 +161,7 @@ public class AnnotateMNPsWalker extends RodWalker<Integer, Integer> {
|
|||
boolean atStartOfVc = curLocus.getStart() == vcLoc.getStart();
|
||||
boolean atEndOfVc = curLocus.getStart() == vcLoc.getStop();
|
||||
|
||||
if (vc.getType() == VariantContext.Type.MNP) {
|
||||
if (vc.isMNP()) {
|
||||
logger.debug("Observed MNP at " + vcLoc);
|
||||
|
||||
if (isChrM(vc)) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,62 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.List;
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Counts the number of contiguous regions the walker traverses over. Slower than it needs to be, but
|
||||
* very useful since overlapping intervals get merged, so you can count the number of intervals the GATK merges down to.
|
||||
* This was its very first use.
|
||||
*/
|
||||
public class CountIntervals extends RefWalker<Long, Long> {
|
||||
@Output
|
||||
PrintStream out;
|
||||
|
||||
@Argument(fullName="numOverlaps",shortName="no",doc="Count all occurrences of X or more overlapping intervals; defaults to 2", required=false)
|
||||
int numOverlaps = 2;
|
||||
|
||||
public Long reduceInit() {
|
||||
return 0l;
|
||||
}
|
||||
|
||||
public boolean isReduceByInterval() { return true; }
|
||||
|
||||
public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<GATKFeature> checkIntervals = tracker.getGATKFeatureMetaData("check",false);
|
||||
return (long) checkIntervals.size();
|
||||
}
|
||||
|
||||
public Long reduce(Long loc, Long prev) {
|
||||
if ( loc == null ) {
|
||||
return 0l;
|
||||
} else {
|
||||
return Math.max(prev,loc);
|
||||
}
|
||||
}
|
||||
|
||||
public void onTraversalDone(List<Pair<GenomeLoc,Long>> finalReduce) {
|
||||
long count = 0;
|
||||
for ( Pair<GenomeLoc,Long> g : finalReduce ) {
|
||||
if ( g.second >= numOverlaps) {
|
||||
count ++;
|
||||
}
|
||||
}
|
||||
out.printf("Number of contiguous intervals: %d",count);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,157 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Emits specific fields as dictated by the user from one or more VCF files.
|
||||
*/
|
||||
@Requires(value={})
|
||||
public class ProfileRodSystem extends RodWalker<Integer, Integer> {
|
||||
@Output(doc="File to which results should be written",required=true)
|
||||
protected PrintStream out;
|
||||
|
||||
@Argument(fullName="nIterations", shortName="N", doc="Number of raw reading iterations to perform", required=false)
|
||||
int nIterations = 1;
|
||||
|
||||
@Argument(fullName="maxRecords", shortName="M", doc="Max. number of records to process", required=false)
|
||||
int MAX_RECORDS = -1;
|
||||
|
||||
SimpleTimer timer = new SimpleTimer("myTimer");
|
||||
|
||||
public void initialize() {
|
||||
File rodFile = getRodFile();
|
||||
|
||||
out.printf("# walltime is in seconds%n");
|
||||
out.printf("# file is %s%n", rodFile);
|
||||
out.printf("# file size is %d bytes%n", rodFile.length());
|
||||
out.printf("operation\titeration\twalltime%n");
|
||||
for ( int i = 0; i < nIterations; i++ ) {
|
||||
out.printf("read.bytes\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.BY_BYTE));
|
||||
out.printf("read.line\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.BY_LINE));
|
||||
out.printf("line.and.parts\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.BY_PARTS));
|
||||
out.printf("decode.loc\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.DECODE_LOC));
|
||||
out.printf("full.decode\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.DECODE));
|
||||
}
|
||||
|
||||
timer.start(); // start up timer for map itself
|
||||
}
|
||||
|
||||
private enum ReadMode { BY_BYTE, BY_LINE, BY_PARTS, DECODE_LOC, DECODE };
|
||||
|
||||
private final double readFile(File f, ReadMode mode) {
|
||||
timer.start();
|
||||
|
||||
try {
|
||||
byte[] data = new byte[100000];
|
||||
FileInputStream s = new FileInputStream(f);
|
||||
|
||||
if ( mode == ReadMode.BY_BYTE ) {
|
||||
while (true) {
|
||||
if ( s.read(data) == -1 )
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
int counter = 0;
|
||||
VCFCodec codec = new VCFCodec();
|
||||
String[] parts = new String[100000];
|
||||
AsciiLineReader lineReader = new AsciiLineReader(s);
|
||||
|
||||
if ( mode == ReadMode.DECODE_LOC || mode == ReadMode.DECODE )
|
||||
codec.readHeader(lineReader);
|
||||
|
||||
while (counter++ < MAX_RECORDS || MAX_RECORDS == -1) {
|
||||
String line = lineReader.readLine();
|
||||
if ( line == null )
|
||||
break;
|
||||
else if ( mode == ReadMode.BY_PARTS ) {
|
||||
ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR);
|
||||
}
|
||||
else if ( mode == ReadMode.DECODE_LOC ) {
|
||||
codec.decodeLoc(line);
|
||||
}
|
||||
else if ( mode == ReadMode.DECODE ) {
|
||||
processOneVC((VariantContext)codec.decode(line));
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch ( Exception e ) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
return timer.getElapsedTime();
|
||||
}
|
||||
|
||||
private File getRodFile() {
|
||||
List<ReferenceOrderedDataSource> rods = this.getToolkit().getRodDataSources();
|
||||
ReferenceOrderedDataSource rod = rods.get(0);
|
||||
return rod.getFile();
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null ) // RodWalkers can make funky map calls
|
||||
return 0;
|
||||
|
||||
VariantContext vc = tracker.getVariantContext(ref, "rod", context.getLocation());
|
||||
processOneVC(vc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static final void processOneVC(VariantContext vc) {
|
||||
vc.getNSamples(); // force us to parse the samples
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer counter, Integer sum) {
|
||||
return counter + sum;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer sum) {
|
||||
out.printf("gatk.traversal\t%d\t%.2f%n", 0, timer.getElapsedTime());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,153 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.*;
|
||||
import java.math.BigInteger;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* a walker for validating (in the style of validating pile-up) the ROD system.
|
||||
*/
|
||||
@Reference(window=@Window(start=-40,stop=40))
|
||||
public class RodSystemValidationWalker extends RodWalker<Integer,Integer> {
|
||||
|
||||
// the divider to use in some of the text output
|
||||
private static final String DIVIDER = ",";
|
||||
|
||||
@Output
|
||||
public PrintStream out;
|
||||
|
||||
@Argument(fullName="PerLocusEqual",required=false,doc="Should we check that all records at the same site produce equivilent variant contexts")
|
||||
public boolean allRecordsVariantContextEquivalent = false;
|
||||
|
||||
// used to calculate the MD5 of a file
|
||||
MessageDigest digest = null;
|
||||
|
||||
// we sometimes need to know what rods the engine's seen
|
||||
List<ReferenceOrderedDataSource> rodList;
|
||||
|
||||
/**
|
||||
* emit the md5 sums for each of the input ROD files (will save up a lot of time if and when the ROD files change
|
||||
* underneath us).
|
||||
*/
|
||||
public void initialize() {
|
||||
// setup the MD5-er
|
||||
try {
|
||||
digest = MessageDigest.getInstance("MD5");
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new ReviewedStingException("Unable to find MD5 checksumer");
|
||||
}
|
||||
out.println("Header:");
|
||||
// enumerate the list of ROD's we've loaded
|
||||
rodList = this.getToolkit().getRodDataSources();
|
||||
for (ReferenceOrderedDataSource rod : rodList) {
|
||||
out.println(rod.getName() + DIVIDER + rod.getType());
|
||||
out.println(rod.getName() + DIVIDER + rod.getFile());
|
||||
out.println(rod.getName() + DIVIDER + md5sum(rod.getFile()));
|
||||
}
|
||||
out.println("Data:");
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param tracker the ref meta data tracker to get RODs
|
||||
* @param ref reference context
|
||||
* @param context the reads
|
||||
* @return an 1 for each site with a rod(s), 0 otherwise
|
||||
*/
|
||||
@Override
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
int ret = 0;
|
||||
if (tracker != null && tracker.getAllRods().size() > 0) {
|
||||
out.print(context.getLocation() + DIVIDER);
|
||||
Collection<GATKFeature> features = tracker.getAllRods();
|
||||
for (GATKFeature feat : features)
|
||||
out.print(feat.getName() + DIVIDER);
|
||||
out.println(";");
|
||||
ret++;
|
||||
}
|
||||
|
||||
// if the argument was set, check for equivalence
|
||||
if (allRecordsVariantContextEquivalent && tracker != null) {
|
||||
Collection<VariantContext> col = tracker.getAllVariantContexts(ref);
|
||||
VariantContext con = null;
|
||||
for (VariantContext contextInList : col)
|
||||
if (con == null) con = contextInList;
|
||||
else if (!con.equals(col)) out.println("FAIL: context " + col + " doesn't match " + con);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Integer result) {
|
||||
// Double check traversal result to make count is the same.
|
||||
// TODO: Is this check necessary?
|
||||
out.println("[REDUCE RESULT] Traversal result is: " + result);
|
||||
}
|
||||
|
||||
// shamelessly absconded and adapted from http://www.javalobby.org/java/forums/t84420.html
|
||||
private String md5sum(File f) {
|
||||
InputStream is;
|
||||
try {
|
||||
is = new FileInputStream(f);
|
||||
} catch (FileNotFoundException e) {
|
||||
return "Not a file";
|
||||
}
|
||||
byte[] buffer = new byte[8192];
|
||||
int read = 0;
|
||||
try {
|
||||
while ((read = is.read(buffer)) > 0) {
|
||||
digest.update(buffer, 0, read);
|
||||
}
|
||||
byte[] md5sum = digest.digest();
|
||||
BigInteger bigInt = new BigInteger(1, md5sum);
|
||||
return bigInt.toString(16);
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException("Unable to process file for MD5", e);
|
||||
}
|
||||
finally {
|
||||
try {
|
||||
is.close();
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException("Unable to close input stream for MD5 calculation", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,298 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
|
||||
* Can also count the number of reads matching a given criterion using read filters (see the
|
||||
* --read-filter command line argument). Simplest example of a read-backed analysis.
|
||||
*/
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.DONT_MODIFY, ApplicationTime = BAQ.ApplicationTime.HANDLED_IN_WALKER)
|
||||
@Reference(window=@Window(start=-5,stop=5))
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||
public class ValidateBAQWalker extends ReadWalker<Integer, Integer> {
|
||||
@Output(doc="File to which results should be written",required=true)
|
||||
protected PrintStream out;
|
||||
|
||||
@Argument(doc="maximum read length to apply the BAQ calculation too",required=false)
|
||||
protected int maxReadLen = 1000;
|
||||
|
||||
@Argument(doc="",required=false)
|
||||
protected int bw = 7;
|
||||
|
||||
@Argument(doc="",required=false)
|
||||
protected boolean samtoolsMode = false;
|
||||
|
||||
@Argument(doc="only operates on reads with this name",required=false)
|
||||
protected String readName = null;
|
||||
|
||||
@Argument(doc="If true, all differences are errors", required=false)
|
||||
protected boolean strict = false;
|
||||
|
||||
@Argument(doc="prints info for each read", required=false)
|
||||
protected boolean printEachRead = false;
|
||||
|
||||
@Argument(doc="Also prints out detailed comparison information when for known calculation differences", required=false)
|
||||
protected boolean alsoPrintWarnings = false;
|
||||
|
||||
@Argument(doc="Include reads without BAQ tag", required=false)
|
||||
protected boolean includeReadsWithoutBAQTag = false;
|
||||
|
||||
@Argument(doc="x each read is processed", required=false)
|
||||
protected int magnification = 1;
|
||||
|
||||
@Argument(doc="Profile performance", required=false)
|
||||
protected boolean profile = false;
|
||||
|
||||
int counter = 0;
|
||||
|
||||
BAQ baqHMM = null; // matches current samtools parameters
|
||||
|
||||
public void initialize() {
|
||||
if ( samtoolsMode )
|
||||
baqHMM = new BAQ(1e-3, 0.1, bw, (byte)0, true);
|
||||
else
|
||||
baqHMM = new BAQ();
|
||||
}
|
||||
|
||||
long goodReads = 0, badReads = 0;
|
||||
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||
|
||||
if ( (readName == null || readName.equals(read.getReadName())) && read.getReadLength() <= maxReadLen && (includeReadsWithoutBAQTag || BAQ.hasBAQTag(read) ) ) {
|
||||
if ( baqHMM.excludeReadFromBAQ(read) )
|
||||
return 0;
|
||||
|
||||
if ( profile ) {
|
||||
profileBAQ(ref, read);
|
||||
} else {
|
||||
validateBAQ(ref, read);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SimpleTimer tagTimer = new SimpleTimer("from.tag");
|
||||
SimpleTimer baqReadTimer = new SimpleTimer("baq.read");
|
||||
SimpleTimer glocalTimer = new SimpleTimer("hmm.glocal");
|
||||
|
||||
private void profileBAQ(ReferenceContext ref, SAMRecord read) {
|
||||
IndexedFastaSequenceFile refReader = this.getToolkit().getReferenceDataSource().getReference();
|
||||
BAQ.BAQCalculationResult baq = null;
|
||||
|
||||
tagTimer.restart();
|
||||
for ( int i = 0; i < magnification; i++ ) { BAQ.calcBAQFromTag(read, false, includeReadsWithoutBAQTag); }
|
||||
tagTimer.stop();
|
||||
|
||||
baqReadTimer.restart();
|
||||
for ( int i = 0; i < magnification; i++ ) { baqHMM.baqRead(read, refReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.DONT_MODIFY ); }
|
||||
baqReadTimer.stop();
|
||||
|
||||
glocalTimer.restart();
|
||||
for ( int i = 0; i < magnification; i++ )
|
||||
baqHMM.baqRead(read, refReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.DONT_MODIFY);
|
||||
glocalTimer.stop();
|
||||
}
|
||||
|
||||
|
||||
private void validateBAQ(ReferenceContext ref, SAMRecord read) {
|
||||
IndexedFastaSequenceFile refReader = this.getToolkit().getReferenceDataSource().getReference();
|
||||
byte[] baqFromTag = BAQ.calcBAQFromTag(read, false, includeReadsWithoutBAQTag);
|
||||
if (counter++ % 1000 == 0 || printEachRead) out.printf("Checking read %s (%d)%n", read.getReadName(), counter);
|
||||
|
||||
BAQ.BAQCalculationResult baq = baqHMM.calcBAQFromHMM(read, refReader);
|
||||
|
||||
boolean fail = false;
|
||||
boolean print = false;
|
||||
int badi = 0;
|
||||
|
||||
if ( BAQ.hasBAQTag(read) ) {
|
||||
for ( badi = 0; badi < baqFromTag.length; badi++ ) {
|
||||
if ( baqFromTag[badi] != baq.bq[badi] ) {
|
||||
if ( cigarLength(read) != read.getReadLength() ) {
|
||||
print = true;
|
||||
fail = false;
|
||||
out.printf(" different, but cigar length != read length%n");
|
||||
break;
|
||||
}
|
||||
if (MathUtils.arrayMin(read.getBaseQualities()) == 0) {
|
||||
print = true;
|
||||
fail = strict;
|
||||
out.printf(" different, but Q0 base detected%n");
|
||||
break;
|
||||
}
|
||||
else if (readHasSoftClip(read) && ! samtoolsMode) {
|
||||
print = true;
|
||||
fail = strict;
|
||||
out.printf(" different, but soft clip detected%n");
|
||||
break;
|
||||
} else if (readHasDeletion(read) ) { // && ! samtoolsMode) {
|
||||
print = true;
|
||||
fail = strict;
|
||||
out.printf(" different, but deletion detected%n");
|
||||
break;
|
||||
} else if ( baq.bq[badi] < baqHMM.getMinBaseQual() ) {
|
||||
print = fail = true;
|
||||
out.printf(" Base quality %d < min %d", baq.bq[badi], baqHMM.getMinBaseQual());
|
||||
break;
|
||||
} else {
|
||||
print = fail = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( fail || print )
|
||||
badReads++;
|
||||
else
|
||||
goodReads++;
|
||||
}
|
||||
|
||||
if ( fail || printEachRead || ( print && alsoPrintWarnings ) ) {
|
||||
byte[] pos = new byte[baq.bq.length];
|
||||
for ( int i = 0; i < pos.length; i++ ) pos[i] = (byte)i;
|
||||
|
||||
out.printf(" read length : %d%n", read.getReadLength());
|
||||
out.printf(" read start : %d (%d unclipped)%n", read.getAlignmentStart(), read.getUnclippedStart());
|
||||
out.printf(" cigar : %s%n", read.getCigarString());
|
||||
out.printf(" ref bases : %s%n", new String(baq.refBases));
|
||||
out.printf(" read bases : %s%n", new String(read.getReadBases()));
|
||||
out.printf(" ref length : %d%n", baq.refBases.length);
|
||||
out.printf(" BQ tag : %s%n", read.getStringAttribute(BAQ.BAQ_TAG));
|
||||
if ( BAQ.hasBAQTag(read) ) printQuals(" BQ deltas : ", getBAQDeltas(read), true);
|
||||
printQuals(" original quals: ", read.getBaseQualities(), true);
|
||||
printQuals(" baq quals: ", baq.bq, true);
|
||||
printQuals(" positions : ", pos, true);
|
||||
printQuals(" original quals: ", read.getBaseQualities());
|
||||
if ( BAQ.hasBAQTag(read) ) printQuals(" tag quals: ", baqFromTag);
|
||||
printQuals(" hmm quals: ", baq.bq);
|
||||
out.printf(" read bases : %s%n", new String(read.getReadBases()));
|
||||
out.println(Utils.dupString('-', 80));
|
||||
}
|
||||
|
||||
|
||||
if ( fail )
|
||||
throw new StingException(String.format("BAQ from read and from HMM differ in read %s at position %d: tag qual = %d, hmm qual = %d",
|
||||
read.getReadName(), badi, baqFromTag[badi], baq.bq[badi]));
|
||||
}
|
||||
|
||||
private final static boolean readHasSoftClip(SAMRecord read) {
|
||||
for (CigarElement e : read.getCigar().getCigarElements()) {
|
||||
if ( e.getOperator() == CigarOperator.SOFT_CLIP )
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private final static boolean readHasDeletion(SAMRecord read) {
|
||||
for (CigarElement e : read.getCigar().getCigarElements()) {
|
||||
if ( e.getOperator() == CigarOperator.DELETION )
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public final void printQuals( String prefix, byte[] quals ) {
|
||||
printQuals(prefix, quals, false);
|
||||
}
|
||||
|
||||
public final void printQuals( String prefix, byte[] quals, boolean asInt ) {
|
||||
printQuals(out, prefix, quals, asInt);
|
||||
}
|
||||
|
||||
public final static void printQuals( PrintStream out, String prefix, byte[] quals, boolean asInt ) {
|
||||
out.print(prefix);
|
||||
for ( int i = 0; i < quals.length; i++) {
|
||||
if ( asInt ) {
|
||||
out.printf("%2d", (int)quals[i]);
|
||||
if ( i+1 != quals.length ) out.print(",");
|
||||
} else
|
||||
out.print((char)(quals[i]+33));
|
||||
}
|
||||
out.println();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the BAQ delta bytes from the tag in read. Returns null if no BAQ tag is present.
|
||||
* @param read
|
||||
* @return
|
||||
*/
|
||||
public static byte[] getBAQDeltas(SAMRecord read) {
|
||||
byte[] baq = BAQ.getBAQTag(read);
|
||||
if ( baq != null ) {
|
||||
byte[] deltas = new byte[baq.length];
|
||||
for ( int i = 0; i < deltas.length; i++)
|
||||
deltas[i] = (byte)(-1 * (baq[i] - 64));
|
||||
return deltas;
|
||||
} else
|
||||
return null;
|
||||
}
|
||||
|
||||
private int cigarLength(SAMRecord read) {
|
||||
int readI = 0;
|
||||
for ( CigarElement elt : read.getCigar().getCigarElements() ) {
|
||||
int l = elt.getLength();
|
||||
switch (elt.getOperator()) {
|
||||
case N: // cannot handle these
|
||||
return 0;
|
||||
case H : case P : // ignore pads and hard clips
|
||||
break;
|
||||
case S :
|
||||
case I :
|
||||
readI += l;
|
||||
break;
|
||||
case D : break;
|
||||
case M :
|
||||
readI += l;
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("BUG: Unexpected CIGAR element " + elt + " in read " + read.getReadName());
|
||||
}
|
||||
}
|
||||
return readI;
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer nreads) {
|
||||
if ( profile ) {
|
||||
out.printf("n.reads baq.per.read calculation time.in.secs%n");
|
||||
printTimer(nreads, tagTimer);
|
||||
printTimer(nreads, glocalTimer);
|
||||
printTimer(nreads, baqReadTimer);
|
||||
} else {
|
||||
out.printf("total reads BAQ'd %d; concordant BAQ reads %d %.4f; discordant BAQ reads %d %.4f%n", nreads,
|
||||
goodReads, (100.0 * goodReads) / nreads,
|
||||
badReads, (100.0 * badReads) / nreads);
|
||||
}
|
||||
}
|
||||
|
||||
private final void printTimer(int nreads, SimpleTimer timer) {
|
||||
out.printf("%d %d %s %.2f%n", nreads, magnification, timer.getName(), timer.getElapsedTime());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -209,7 +209,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
|||
String assay_sequence;
|
||||
if ( vc.isSNP() )
|
||||
assay_sequence = leading_bases + "[" + (char)ref.getBase() + "/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases;
|
||||
else if ( vc.getType() == VariantContext.Type.MNP )
|
||||
else if ( vc.isMNP() )
|
||||
assay_sequence = leading_bases + "[" + new String(vc.getReference().getBases()) + "/" + new String(vc.getAlternateAllele(0).getBases())+"]"+trailing_bases.substring(vc.getReference().length()-1);
|
||||
else if ( vc.isInsertion() )
|
||||
assay_sequence = leading_bases + (char)ref.getBase() + "[-/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases;
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING);
|
||||
}
|
||||
// deletions
|
||||
else if ( vc.isDeletion() || vc.isMixed() || vc.getType() == VariantContext.Type.MNP ) {
|
||||
else if ( vc.isDeletion() || vc.isMixed() || vc.isMNP() ) {
|
||||
// we can't validate arbitrarily long deletions
|
||||
if ( reportedRefAllele.length() > 100 ) {
|
||||
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart()));
|
||||
|
|
@ -123,7 +123,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
// deletions are associated with the (position of) the last (preceding) non-deleted base;
|
||||
// hence to get actually deleted bases we need offset = 1
|
||||
int offset = 1 ;
|
||||
if ( vc.getType() == VariantContext.Type.MNP ) offset = 0; // if it's an MNP, the reported position IS the first modified base
|
||||
if ( vc.isMNP() ) offset = 0; // if it's an MNP, the reported position IS the first modified base
|
||||
byte[] refBytes = ref.getBases();
|
||||
byte[] trueRef = new byte[reportedRefAllele.length()];
|
||||
for (int i = 0; i < reportedRefAllele.length(); i++)
|
||||
|
|
|
|||
|
|
@ -414,7 +414,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @return vc subcontext
|
||||
*/
|
||||
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Set<Allele> alleles) {
|
||||
return new VariantContext(getSource(), contig, start, stop, alleles, genotypes, getNegLog10PError(), getFilters(), getAttributes());
|
||||
return new VariantContext(getSource(), contig, start, stop, alleles, genotypes, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes());
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -566,10 +566,21 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
return getType() == Type.INDEL && getAlternateAllele(0).isNull();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the alleles indicate neither a simple deletion nor a simple insertion
|
||||
*/
|
||||
public boolean isComplexIndel() {
|
||||
return isIndel() && !isDeletion() && !isInsertion();
|
||||
}
|
||||
|
||||
public boolean isSymbolic() {
|
||||
return getType() == Type.SYMBOLIC;
|
||||
}
|
||||
|
||||
public boolean isMNP() {
|
||||
return getType() == Type.MNP;
|
||||
}
|
||||
|
||||
/**
|
||||
* convenience method for indels
|
||||
*
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ public abstract class BaseTest {
|
|||
*/
|
||||
public static final String MD5_FILE_DB_SUBDIR = "integrationtests";
|
||||
|
||||
public static final String testDir = "testdata/";
|
||||
public static final String testDir = "public/testdata/";
|
||||
|
||||
/** before the class starts up */
|
||||
static {
|
||||
|
|
|
|||
|
|
@ -60,8 +60,8 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine();
|
||||
|
||||
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
|
||||
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
|
||||
|
||||
testEngine.setSAMFileIDs(samFiles);
|
||||
testEngine.checkForDuplicateSamFiles();
|
||||
|
|
@ -72,10 +72,10 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine();
|
||||
|
||||
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
|
||||
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("testdata/exampleNORG.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("testdata/exampleNORG.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("public/testdata/exampleNORG.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("public/testdata/exampleNORG.bam"), new Tags()));
|
||||
|
||||
testEngine.setSAMFileIDs(samFiles);
|
||||
testEngine.checkForDuplicateSamFiles();
|
||||
|
|
@ -97,7 +97,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
GATKArgumentCollection argCollection = new GATKArgumentCollection();
|
||||
testEngine.setArguments(argCollection);
|
||||
|
||||
File fastaFile = new File("testdata/exampleFASTA.fasta");
|
||||
File fastaFile = new File("public/testdata/exampleFASTA.fasta");
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile));
|
||||
testEngine.setGenomeLocParser(genomeLocParser);
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
"-T RealignerTargetCreator -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_b36.rod -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s",
|
||||
1,
|
||||
Arrays.asList("fd2d9dbf718f7a6d82ae1787ac1fe61e"));
|
||||
Arrays.asList("f23ba17ee0f9573dd307708175d90cd2"));
|
||||
executeTest("test dbsnp", spec2);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
|
|||
" --NO_HEADER" +
|
||||
" -o %s",
|
||||
1,
|
||||
Arrays.asList("2cae3d16f9ed00b07d87e9c49272d877")
|
||||
Arrays.asList("debbbf3e661b6857cc8d99ff7635bb1d")
|
||||
);
|
||||
|
||||
executeTest("testSimpleVCFStreaming", spec);
|
||||
|
|
@ -98,7 +98,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
|
|||
" -EV CompOverlap -noEV -noST" +
|
||||
" -o %s",
|
||||
1,
|
||||
Arrays.asList("f60729c900bc8368717653b3fad80d1e")
|
||||
Arrays.asList("f60729c900bc8368717653b3fad80d1e") //"f60729c900bc8368717653b3fad80d1e"
|
||||
);
|
||||
executeTest("testVCFStreamingChain", selectTestSpec);
|
||||
|
||||
|
|
|
|||
|
|
@ -12,19 +12,16 @@ import org.testng.annotations.DataProvider;
|
|||
import org.testng.annotations.BeforeMethod;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.walkers.qc.ValidateBAQWalker;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.Arrays;
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import net.sf.samtools.*;
|
||||
|
||||
/**
|
||||
|
|
@ -188,8 +185,8 @@ public class BAQUnitTest extends BaseTest {
|
|||
|
||||
System.out.println(Utils.dupString('-', 40));
|
||||
System.out.println("reads : " + new String(test.readBases));
|
||||
ValidateBAQWalker.printQuals(System.out, "in-quals:", test.quals, false);
|
||||
ValidateBAQWalker.printQuals(System.out, "bq-quals:", result.bq, false);
|
||||
printQuals(System.out, "in-quals:", test.quals, false);
|
||||
printQuals(System.out, "bq-quals:", result.bq, false);
|
||||
for (int i = 0; i < test.quals.length; i++) {
|
||||
//result.bq[i] = baqHMM.capBaseByBAQ(result.rawQuals[i], result.bq[i], result.state[i], i);
|
||||
Assert.assertTrue(result.bq[i] >= baqHMM.getMinBaseQual() || test.expected[i] < baqHMM.getMinBaseQual(), "BQ < min base quality");
|
||||
|
|
@ -197,4 +194,16 @@ public class BAQUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
public final static void printQuals( PrintStream out, String prefix, byte[] quals, boolean asInt ) {
|
||||
out.print(prefix);
|
||||
for ( int i = 0; i < quals.length; i++) {
|
||||
if ( asInt ) {
|
||||
out.printf("%2d", (int)quals[i]);
|
||||
if ( i+1 != quals.length ) out.print(",");
|
||||
} else
|
||||
out.print((char)(quals[i]+33));
|
||||
}
|
||||
out.println();
|
||||
}
|
||||
}
|
||||
|
|
@ -21,7 +21,7 @@ public class BedParserUnitTest extends BaseTest {
|
|||
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private File bedFile = new File("testdata/sampleBedFile.bed");
|
||||
private File bedFile = new File("public/testdata/sampleBedFile.bed");
|
||||
|
||||
@BeforeClass
|
||||
public void beforeTests() {
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ import java.io.IOException;
|
|||
*/
|
||||
public class HapMapUnitTest {
|
||||
// our sample hapmap file
|
||||
private final static File hapMapFile = new File("testdata/genotypes_chr1_ASW_phase3.3_first500.hapmap");
|
||||
private final static File hapMapFile = new File("public/testdata/genotypes_chr1_ASW_phase3.3_first500.hapmap");
|
||||
private final static String knownLine = "rs2185539 C/T chr1 556738 + ncbi_b36 bbs urn:lsid:bbs.hapmap.org:Protocol:Phase3.r3:1 urn:lsid:bbs.hapmap.org:Assay:Phase3.r3_r" +
|
||||
"s2185539:1 urn:lsid:dcc.hapmap.org:Panel:US_African-30-trios:4 QC+ CC TC TT CT CC CC CC CC CC CC CC CC CC";
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -17,8 +17,8 @@ import java.util.*;
|
|||
*/
|
||||
public class IndexFactoryUnitTest {
|
||||
|
||||
File inputFile = new File("testdata/HiSeq.10000.vcf");
|
||||
File outputFile = new File("testdata/onTheFlyOutputTest.vcf");
|
||||
File inputFile = new File("public/testdata/HiSeq.10000.vcf");
|
||||
File outputFile = new File("public/testdata/onTheFlyOutputTest.vcf");
|
||||
File outputFileIndex = Tribble.indexFile(outputFile);
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -49,12 +49,12 @@ public class ListFileUtilsUnitTest extends BaseTest {
|
|||
public void testIgnoreBlankLinesInBAMListFiles() throws Exception {
|
||||
File tempListFile = createTempListFile("testIgnoreBlankLines",
|
||||
"",
|
||||
"testdata/exampleBAM.bam",
|
||||
"public/testdata/exampleBAM.bam",
|
||||
" "
|
||||
);
|
||||
|
||||
List<SAMReaderID> expectedBAMFileListAfterUnpacking = new ArrayList<SAMReaderID>();
|
||||
expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
|
||||
expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
|
||||
|
||||
performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking);
|
||||
}
|
||||
|
|
@ -63,13 +63,13 @@ public class ListFileUtilsUnitTest extends BaseTest {
|
|||
public void testCommentSupportInBAMListFiles() throws Exception {
|
||||
File tempListFile = createTempListFile("testCommentSupport",
|
||||
"#",
|
||||
"testdata/exampleBAM.bam",
|
||||
"#testdata/foo.bam",
|
||||
" # testdata/bar.bam"
|
||||
"public/testdata/exampleBAM.bam",
|
||||
"#public/testdata/foo.bam",
|
||||
" # public/testdata/bar.bam"
|
||||
);
|
||||
|
||||
List<SAMReaderID> expectedBAMFileListAfterUnpacking = new ArrayList<SAMReaderID>();
|
||||
expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
|
||||
expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
|
||||
|
||||
performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest {
|
|||
IndexedFastaSequenceFile fasta = null;
|
||||
GenomeLocParser genomeLocParser = null;
|
||||
String chr1 = null;
|
||||
private final static String FILE_ROOT = "testdata/GLPTFile";
|
||||
private final static String FILE_ROOT = "public/testdata/GLPTFile";
|
||||
|
||||
@BeforeTest
|
||||
public void before() {
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
package core
|
||||
package org.broadinstitute.sting.queue.qscripts
|
||||
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.function.ListWriterFunction
|
||||
|
||||
import net.sf.samtools.{SAMFileReader,SAMReadGroupRecord}
|
||||
|
||||
import scala.io.Source._
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
|
||||
import org.broadinstitute.sting.queue.extensions.picard._
|
||||
import net.sf.samtools.{SAMFileReader, SAMReadGroupRecord}
|
||||
import net.sf.samtools.SAMFileHeader.SortOrder
|
||||
|
||||
|
||||
class DataProcessingPipeline extends QScript {
|
||||
|
|
@ -69,6 +69,8 @@ class DataProcessingPipeline extends QScript {
|
|||
@Input(doc="Decompose input BAM file and fully realign it using BWA and assume Pair Ended reads", fullName="use_bwa_pair_ended", shortName="bwape", required=false)
|
||||
var useBWApe: Boolean = false
|
||||
|
||||
@Input(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false)
|
||||
var bwaThreads: Int = 1
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
|
|
@ -180,6 +182,7 @@ class DataProcessingPipeline extends QScript {
|
|||
var realignedBams: List[File] = List()
|
||||
var index = 1
|
||||
for (bam <- bams) {
|
||||
val readSortedBam = swapExt(bam, ".bam", "." + index + ".sorted.bam" )
|
||||
val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai")
|
||||
val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai")
|
||||
val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam")
|
||||
|
|
@ -190,11 +193,12 @@ class DataProcessingPipeline extends QScript {
|
|||
bwa_sam_se(bam, saiFile1, realignedSamFile))
|
||||
}
|
||||
else {
|
||||
add(bwa_aln_pe(bam, saiFile1, 1),
|
||||
bwa_aln_pe(bam, saiFile2, 2),
|
||||
bwa_sam_pe(bam, saiFile1, saiFile2, realignedSamFile))
|
||||
add(sortSam(bam, readSortedBam, SortOrder.queryname),
|
||||
bwa_aln_pe(readSortedBam, saiFile1, 1),
|
||||
bwa_aln_pe(readSortedBam, saiFile2, 2),
|
||||
bwa_sam_pe(readSortedBam, saiFile1, saiFile2, realignedSamFile))
|
||||
}
|
||||
add(sortSam(realignedSamFile, realignedBamFile))
|
||||
add(sortSam(realignedSamFile, realignedBamFile, SortOrder.coordinate))
|
||||
addReadGroups(realignedBamFile, rgRealignedBamFile, new SAMFileReader(bam))
|
||||
realignedBams :+= rgRealignedBamFile
|
||||
index = index + 1
|
||||
|
|
@ -385,9 +389,10 @@ class DataProcessingPipeline extends QScript {
|
|||
this.jobName = queueLogDir + outBam + ".joinBams"
|
||||
}
|
||||
|
||||
case class sortSam (inSam: File, outBam: File) extends SortSam {
|
||||
case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam {
|
||||
this.input = List(inSam)
|
||||
this.output = outBam
|
||||
this.sortOrder = sortOrderP
|
||||
this.memoryLimit = 4
|
||||
this.isIntermediate = true
|
||||
this.analysisName = queueLogDir + outBam + ".sortSam"
|
||||
|
|
@ -430,7 +435,7 @@ class DataProcessingPipeline extends QScript {
|
|||
case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with BWACommonArgs {
|
||||
@Input(doc="bam file to be aligned") var bam = inBam
|
||||
@Output(doc="output sai file") var sai = outSai
|
||||
def commandLine = bwaPath + " aln -q 5 " + reference + " -b " + bam + " > " + sai
|
||||
def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai
|
||||
this.analysisName = queueLogDir + outSai + ".bwa_aln_se"
|
||||
this.jobName = queueLogDir + outSai + ".bwa_aln_se"
|
||||
}
|
||||
|
|
@ -438,7 +443,7 @@ class DataProcessingPipeline extends QScript {
|
|||
case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with BWACommonArgs {
|
||||
@Input(doc="bam file to be aligned") var bam = inBam
|
||||
@Output(doc="output sai file for 1st mating pair") var sai = outSai1
|
||||
def commandLine = bwaPath + " aln -q 5 " + reference + " -b" + index + " " + bam + " > " + sai
|
||||
def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai
|
||||
this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1"
|
||||
this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1"
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package core
|
||||
package org.broadinstitute.sting.queue.qscripts
|
||||
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
package org.broadinstitute.sting.queue.qscripts
|
||||
|
||||
import org.broadinstitute.sting.commandline.Hidden
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
package org.broadinstitute.sting.queue.qscripts
|
||||
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import net.sf.samtools.SAMFileReader
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: carneiro
|
||||
* Date: 4/20/11
|
||||
* Time: 16:29 PM
|
||||
*/
|
||||
|
||||
|
||||
class RecalibrateBaseQualities extends QScript {
|
||||
|
||||
@Input(doc="path to GenomeAnalysisTK.jar", shortName="gatk", required=true)
|
||||
var GATKjar: File = _
|
||||
|
||||
@Input(doc="input BAM file - or list of BAM files", shortName="i", required=true)
|
||||
var input: File = _
|
||||
|
||||
@Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=false)
|
||||
var R: String = new File("/humgen/gsa-scr1/carneiro/stable/R")
|
||||
|
||||
@Input(doc="Reference fasta file", shortName="R", required=false)
|
||||
var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
|
||||
|
||||
@Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false)
|
||||
var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
|
||||
|
||||
val queueLogDir: String = ".qlog/"
|
||||
var nContigs: Int = 0
|
||||
|
||||
def getNumberOfContigs(bamFile: File): Int = {
|
||||
val samReader = new SAMFileReader(new File(bamFile))
|
||||
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
||||
}
|
||||
|
||||
def script = {
|
||||
|
||||
nContigs = getNumberOfContigs(input)
|
||||
|
||||
val recalFile1: File = new File("recal1.csv")
|
||||
val recalFile2: File = new File("recal2.csv")
|
||||
val recalBam: File = swapExt(input, ".bam", "recal.bam")
|
||||
val path1: String = "before"
|
||||
val path2: String = "after"
|
||||
|
||||
add(cov(input, recalFile1),
|
||||
recal(input, recalFile1, recalBam),
|
||||
cov(recalBam, recalFile2),
|
||||
analyzeCovariates(recalFile1, path1),
|
||||
analyzeCovariates(recalFile2, path2))
|
||||
}
|
||||
|
||||
trait CommandLineGATKArgs extends CommandLineGATK {
|
||||
this.jarFile = GATKjar
|
||||
this.reference_sequence = reference
|
||||
this.memoryLimit = 4
|
||||
this.isIntermediate = true
|
||||
}
|
||||
|
||||
case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
|
||||
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
||||
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
|
||||
this.input_file :+= inBam
|
||||
this.recal_file = outRecalFile
|
||||
this.analysisName = queueLogDir + outRecalFile + ".covariates"
|
||||
this.jobName = queueLogDir + outRecalFile + ".covariates"
|
||||
this.scatterCount = nContigs
|
||||
}
|
||||
|
||||
case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
|
||||
this.input_file :+= inBam
|
||||
this.recal_file = inRecalFile
|
||||
this.out = outBam
|
||||
this.isIntermediate = false
|
||||
this.analysisName = queueLogDir + outBam + ".recalibration"
|
||||
this.jobName = queueLogDir + outBam + ".recalibration"
|
||||
this.scatterCount = nContigs
|
||||
}
|
||||
|
||||
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
|
||||
this.resources = R
|
||||
this.recal_file = inRecalFile
|
||||
this.output_dir = outPath.toString
|
||||
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||
this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package core
|
||||
package org.broadinstitute.sting.queue.qscripts
|
||||
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.extensions.gatk.RodBind
|
||||
|
|
@ -1,211 +0,0 @@
|
|||
package org.broadinstitute.sting.scala
|
||||
/**
|
||||
import gatk.walkers.genotyper.{UnifiedGenotyper, GenotypeCall}
|
||||
import java.io.File
|
||||
import net.sf.samtools.SAMRecord
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext
|
||||
import org.broadinstitute.sting.utils.Pair
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeMetaData
|
||||
import utils._
|
||||
import cmdLine.Argument
|
||||
|
||||
class TransitionTable() {
|
||||
var table: Array[Array[Double]] = new Array[Array[Double]](4,4)
|
||||
|
||||
def makeKey(b: Char): Int = {
|
||||
return BaseUtils.simpleBaseToBaseIndex(b)
|
||||
}
|
||||
|
||||
def fromKey(i: Int): Char = {
|
||||
return BaseUtils.baseIndexToSimpleBase(i)
|
||||
}
|
||||
|
||||
def add(ref: Char, subst: Char) = {
|
||||
table(makeKey(ref))(makeKey(subst)) += 1
|
||||
}
|
||||
|
||||
def get(i: Int, j: Int): Double = {
|
||||
return table(i)(j)
|
||||
}
|
||||
|
||||
def set(i: Int, j: Int, d: Double) = {
|
||||
//printf("Setting %d / %d => %f%n", i, j, d)
|
||||
table(i)(j) = d
|
||||
}
|
||||
|
||||
def mapEntries[A](f: ((Int, Int, Double) => A)): List[A] = {
|
||||
return for { i <- List.range(0, 4); j <- List.range(0,4) } yield f(i, j, get(i,j))
|
||||
}
|
||||
|
||||
def header(): String = {
|
||||
//return Utils.join("\t", mapEntries((i,j,x) => "P(%c|true=%c)" format (fromKey(j), fromKey(i))).toArray)
|
||||
return Utils.join("\t", mapEntries((i,j,x) => "[%c|%c]" format (fromKey(j), fromKey(i))).toArray)
|
||||
}
|
||||
|
||||
override def toString(): String = {
|
||||
return Utils.join("\t", mapEntries((i,j,x) => "%.2f" format x).toArray)
|
||||
}
|
||||
|
||||
def sum(): Double = {
|
||||
return sumList(table.toList map (x => sumList(x.toList)))
|
||||
}
|
||||
|
||||
def sumList(xs: List[Double]): Double = {
|
||||
return (0.0 :: xs) reduceLeft {(x, y) => x + y}
|
||||
}
|
||||
|
||||
def getRateMatrix(): TransitionTable = {
|
||||
var sums = (table.toList map (x => sumList(x.toList))).toArray
|
||||
var t = new TransitionTable()
|
||||
|
||||
//println(sums.toList)
|
||||
this mapEntries ((i,j,x) => t.set(i, j, x / sums(i)))
|
||||
|
||||
return t
|
||||
}
|
||||
}
|
||||
|
||||
**/class BaseTransitionTableCalculator /**{ // extends LocusWalker[Unit,Int] {
|
||||
private var MIN_MAPPING_QUALITY = 30
|
||||
private var MIN_BASE_QUALITY = 20
|
||||
private var MIN_LOD = 5
|
||||
private var PRINT_FREQ = 1000000
|
||||
private var CARE_ABOUT_STRAND = true
|
||||
|
||||
private val SSG = new UnifiedGenotyper()
|
||||
private val table1 = new TransitionTable()
|
||||
private val tableFWD = new TransitionTable()
|
||||
private val tableREV = new TransitionTable()
|
||||
private val table2 = new TransitionTable()
|
||||
|
||||
@Argument() { val shortName = "v", val doc = "Print out verbose output", val required = false }
|
||||
var VERBOSE: Boolean = false
|
||||
|
||||
override def initialize() {
|
||||
SSG.initialize();
|
||||
}
|
||||
|
||||
override def map(tracker: RefMetaDataTracker, ref: ReferenceContext, context: AlignmentContext): Unit = {
|
||||
val callPair = SSG.map(tracker, ref, context)
|
||||
//val call = callPair.getFirst().get(0)
|
||||
val pileup = new ReadBackedPileup(ref.getBase(), context)
|
||||
|
||||
def hasNoNs(): Boolean = {
|
||||
return ! (pileup.getBases() exists ('N' ==))
|
||||
}
|
||||
|
||||
if ( isDefinitelyHomRef(callPair) && hasNoNs() ) {
|
||||
var (refBases, nonRefBases) = splitBases(ref.getBase, pileup.getBases)
|
||||
|
||||
//printf("nonRefBases %s%n", nonRefBases)
|
||||
if ( nonRefBases.length > 0 ) {
|
||||
var (nonRefReads, offsets) = getNonRefReads(ref.getBase, pileup)
|
||||
var nonRefRead: SAMRecord = nonRefReads.head
|
||||
|
||||
nonRefBases.length match {
|
||||
case 1 =>
|
||||
//if ( goodRead(nonRefRead,offsets.head) ) {
|
||||
//printf("Including site:%n nonRefBases=%s%n refBases=%s%n nonRefRead=%s%n offset=%s%n", nonRefBases, refBases, nonRefRead.format, offsets.head)
|
||||
//}
|
||||
addRead(nonRefRead, offsets.head, nonRefBases.head, ref, table1)
|
||||
addRead(nonRefRead, offsets.head, nonRefBases.head, ref, if ( nonRefRead.getReadNegativeStrandFlag ) tableREV else tableFWD )
|
||||
case 2 =>
|
||||
addRead(nonRefRead, offsets.head, nonRefBases.head, ref, table2)
|
||||
addRead(nonRefReads.tail.head, offsets.tail.head, nonRefBases.tail.head, ref, table2)
|
||||
case x =>
|
||||
}
|
||||
|
||||
if ( VERBOSE && pileup.size > 30 && nonRefReads.length < 4 )
|
||||
printNonRefBases(ref, nonRefReads, offsets, context.getReads.size())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
implicit def listToJavaList[T](l: List[T]) = java.util.Arrays.asList(l.toArray: _*)
|
||||
|
||||
def printNonRefBases(ref: ReferenceContext, nonRefReads: List[SAMRecord], offsets: List[Integer], depth: Integer): Unit = {
|
||||
out.println(new ReadBackedPileup(ref.getLocus(), ref.getBase(), listToJavaList(nonRefReads), offsets) + " " + depth)
|
||||
}
|
||||
|
||||
def addRead(nonRefRead: SAMRecord, offset: Integer, nonRefBase: Char, ref: ReferenceContext, table: TransitionTable): Unit = {
|
||||
if ( goodRead(nonRefRead, offset) ) {
|
||||
//printf("Including site:%n call=%s%n nonRefBases=%s%n refBases=%s%n nonRefRead=%s%n offset=%s%n", nonRefBases, refBases, nonRefRead.format, offsets.head)
|
||||
//println(call.getReadDepth, call.getReferencebase, nonRefBases, refBases, nonRefRead.getMappingQuality)
|
||||
if ( CARE_ABOUT_STRAND && nonRefRead.getReadNegativeStrandFlag() ) {
|
||||
// it's on the reverse strand
|
||||
val refComp: Char = BaseUtils.simpleComplement(ref.getBase)
|
||||
val nonRefComp: Char = BaseUtils.simpleComplement(nonRefBase)
|
||||
|
||||
//printf("Negative: %c/%c is actually %c/%c%n", ref.getBase, nonRefBases.head, refComp, nonRefComp)
|
||||
table.add(refComp, nonRefComp)
|
||||
} else {
|
||||
table.add(ref.getBase, nonRefBase)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def getNonRefReads(ref: Char, pileup: ReadBackedPileup): (List[SAMRecord], List[Integer]) = {
|
||||
def getBase(i: Int): Char = {
|
||||
var offset = pileup.getOffsets().get(i)
|
||||
return pileup.getReads().get(i).getReadString().charAt(offset.asInstanceOf[Int])
|
||||
}
|
||||
|
||||
var subset = for { i <- List.range(0, pileup.size) if getBase(i) != ref } yield i
|
||||
var reads = subset map (i => pileup.getReads().get(i))
|
||||
var offsets = subset map (i => pileup.getOffsets().get(i))
|
||||
|
||||
//println(reads, offsets, subset)
|
||||
return (reads, offsets)
|
||||
}
|
||||
|
||||
|
||||
def hasFewNonRefBases(refBase: Char, pileup: String): List[Char] = {
|
||||
splitBases(refBase, pileup) match {
|
||||
case (refs, nonRefs) =>
|
||||
//printf("nrf=%s, rb=%s%n", nonRefs.mkString, refs.mkString)
|
||||
return nonRefs
|
||||
}
|
||||
}
|
||||
|
||||
def goodRead( read: SAMRecord, offset: Integer ): Boolean = {
|
||||
return read.getMappingQuality() > MIN_MAPPING_QUALITY && read.getBaseQualities()(offset.intValue) > MIN_BASE_QUALITY
|
||||
}
|
||||
|
||||
def splitBases(refBase: Char, pileup: String): (List[Char], List[Char]) = {
|
||||
val refBases = pileup.toList filter (refBase ==)
|
||||
val nonRefBases = pileup.toList filter (refBase !=)
|
||||
return (refBases, nonRefBases)
|
||||
}
|
||||
|
||||
def isDefinitelyHomRef(call: Pair[java.util.List[GenotypeCall],GenotypeMetaData]): Boolean = {
|
||||
if ( call == null )
|
||||
return false;
|
||||
|
||||
return (! call.getFirst().get(0).isVariant()) && call.getFirst().get(0).getNegLog10PError() > MIN_LOD
|
||||
}
|
||||
|
||||
def reduceInit(): Int = {
|
||||
return 0;
|
||||
}
|
||||
|
||||
def reduce(m: Unit, r: Int): Int = {
|
||||
return r;
|
||||
}
|
||||
|
||||
override def onTraversalDone(r: Int) = {
|
||||
out.println("-------------------- FINAL RESULT --------------------")
|
||||
out.println("type\t" + table1.header)
|
||||
def print1(t: String, x: TransitionTable) = {
|
||||
out.println(t + "\t" + x)
|
||||
}
|
||||
|
||||
print1("1-mismatch", table1)
|
||||
print1("2-mismatch", table2)
|
||||
print1("1-mismatch-fwd", tableFWD)
|
||||
print1("1-mismatch-rev", tableREV)
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
|
@ -1,120 +0,0 @@
|
|||
package org.broadinstitute.sting.scala
|
||||
|
||||
import java.io.PrintStream
|
||||
import org.broadinstitute.sting.gatk.contexts.{ReferenceContext, AlignmentContext}
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker
|
||||
import collection.JavaConversions._
|
||||
import collection.immutable.List
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature
|
||||
import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature
|
||||
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature
|
||||
import org.broadinstitute.sting.gatk.walkers.{TreeReducible, RefWalker}
|
||||
import org.broadinstitute.sting.commandline.{Output, Argument}
|
||||
import org.broadinstitute.sting.utils.{BaseUtils, GenomeLoc}
|
||||
import collection.mutable.{ListBuffer, HashSet}
|
||||
import java.lang.Math
|
||||
|
||||
class IntervalAnnotationWalker extends RefWalker[AnnotationMetaData,List[IntervalInfoBuilder]] {
|
||||
@Argument(doc="Min proportion of bases overlapping between an interval of interest and an annotation interval for annotation to occur",shortName="mpb")
|
||||
var minPropOverlap : Double = 0.50 // default to 50%
|
||||
@Output var out : PrintStream = _
|
||||
|
||||
override def reduceInit : List[IntervalInfoBuilder] = {
|
||||
Nil
|
||||
}
|
||||
|
||||
override def map(tracker: RefMetaDataTracker, ref: ReferenceContext, context: AlignmentContext) : AnnotationMetaData = {
|
||||
val ivalsOfInterest : List[GATKFeature] = tracker.getGATKFeatureMetaData("interval_list",false).toList
|
||||
val refSeqData : List[Object] = tracker.getReferenceMetaData("refseq").toList
|
||||
val tableMetaData : List[Object] = tracker.getReferenceMetaData("table",false).toList
|
||||
var amd : AnnotationMetaData = new AnnotationMetaData(ref.getLocus,ref.getBase)
|
||||
if ( refSeqData != null ) {
|
||||
amd.refSeqFeatures = refSeqData.map( (o: Object) => o.asInstanceOf[RefSeqFeature])
|
||||
}
|
||||
if ( tableMetaData != null ) {
|
||||
amd.tableFeatures = tableMetaData.map( (o: Object) => o.asInstanceOf[TableFeature]).map( (u: TableFeature) =>
|
||||
new Tuple2(u.getLocation,u.getAllValues.toList.reduceLeft(_ + "," + _)))
|
||||
}
|
||||
amd.newIvals = ivalsOfInterest.map(_.getLocation)
|
||||
return amd
|
||||
}
|
||||
|
||||
override def reduce(mapMetaData : AnnotationMetaData, rList : List[IntervalInfoBuilder]) : List[IntervalInfoBuilder] = {
|
||||
rList.filter( (u : IntervalInfoBuilder) => u.location.isBefore(mapMetaData.locus)).foreach(u => out.print("%s%n".format(u.done)))
|
||||
val newList : List[IntervalInfoBuilder] = rList.filter( ! _.finalized ) :::
|
||||
mapMetaData.newIvals.filter( (g: GenomeLoc) => g.getStart == mapMetaData.locus.getStart ).map( u => new IntervalInfoBuilder(u,minPropOverlap) )
|
||||
newList.foreach(_.update(mapMetaData))
|
||||
return newList
|
||||
}
|
||||
|
||||
override def onTraversalDone(finalList : List[IntervalInfoBuilder]) = {
|
||||
finalList.foreach(u => out.print("%s%n".format(u.done)))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class AnnotationMetaData(loc: GenomeLoc, ref: Byte) {
|
||||
val locus : GenomeLoc = loc
|
||||
var newIvals : List[GenomeLoc] = Nil
|
||||
var refSeqFeatures : List[RefSeqFeature] = Nil
|
||||
var tableFeatures : List[(GenomeLoc,String)] = Nil
|
||||
val refBase : Byte = ref
|
||||
}
|
||||
|
||||
class IntervalInfoBuilder(loc : GenomeLoc, minProp : Double) {
|
||||
|
||||
val OVERLAP_PROP : Double = minProp
|
||||
|
||||
var metaData : HashSet[String] = new HashSet[String]
|
||||
var seenTranscripts : HashSet[String] = new HashSet[String]
|
||||
var baseContent : ListBuffer[Byte] = new ListBuffer[Byte]
|
||||
var location : GenomeLoc = loc
|
||||
var gcContent : Double = _
|
||||
var entropy : Double = _
|
||||
var finalized : Boolean = false
|
||||
|
||||
|
||||
def update(mmd : AnnotationMetaData) = {
|
||||
baseContent += mmd.refBase
|
||||
mmd.refSeqFeatures.filter( p => ! seenTranscripts.contains(p.getTranscriptUniqueGeneName)).view.foreach(u => addRefSeq(u))
|
||||
mmd.tableFeatures.filter( (t : (GenomeLoc,String)) =>
|
||||
! metaData.contains(t._2) && (t._1.intersect(location).size.asInstanceOf[Double]/location.size() >= OVERLAP_PROP) ).foreach( t => metaData.add(t._2))
|
||||
}
|
||||
|
||||
def addRefSeq( rs: RefSeqFeature ) : Unit = {
|
||||
seenTranscripts.add(rs.getTranscriptUniqueGeneName)
|
||||
val exons : List[(GenomeLoc,Int)] = rs.getExons.toList.zipWithIndex.filter( p => ((p._1.intersect(location).size+0.0)/location.size) >= minProp )
|
||||
if ( exons.size > 0 ) {
|
||||
metaData.add("%s[%s]".format(rs.getTranscriptUniqueGeneName,exons.map( p => "exon%d".format(p._2) ).reduceLeft(_ + "," + _)))
|
||||
} else {
|
||||
if ( location.isBefore(rs.getExons.get(0)) || location.isPast(rs.getExons.get(rs.getNumExons-1)) ) {
|
||||
metaData.add("%s[%s]".format(rs.getTranscriptUniqueGeneName,"UTR"))
|
||||
} else {
|
||||
metaData.add("%s[%s]".format(rs.getTranscriptUniqueGeneName,"Intron"))
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
def done : String = {
|
||||
finalized = true
|
||||
def isGC(b : Byte) : Int = if ( BaseUtils.gIndex == b || BaseUtils.cIndex == b ) { 1 } else { 0 }
|
||||
gcContent = baseContent.foldLeft[Int](0)( (a,b) => a + isGC(b)).asInstanceOf[Double]/location.size()
|
||||
entropy = calcEntropy(baseContent.map(b => ListBuffer(b))) + calcEntropy(baseContent.reverse.map(b => ListBuffer(b)))
|
||||
val meta : String = metaData.reduceLeft(_ + "\t" + _)
|
||||
return "%s\t%d\t%d\t%.2f\t%.2f\t%s".format(location.getContig,location.getStart,location.getStop,gcContent,entropy,meta)
|
||||
}
|
||||
|
||||
def calcEntropy(byteList : ListBuffer[ListBuffer[Byte]]) : Double = {
|
||||
if(byteList.size == 1) return 0
|
||||
Math.log(1+byteList.tail.size-byteList.tail.dropWhile( u => u.equals(byteList(1))).size) +
|
||||
calcEntropy(byteList.tail.foldLeft(ListBuffer(byteList(0)))( (a,b) => {
|
||||
if ( b.equals(byteList(1)) ) {
|
||||
a.dropRight(1) :+ (a.last ++ b)
|
||||
} else {
|
||||
a :+ b
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
package org.broadinstitute.sting.scala
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext
|
||||
|
||||
class ScalaCountLoci extends LocusWalker[Int,Int] {
|
||||
override def map(tracker: RefMetaDataTracker, ref: ReferenceContext, context: AlignmentContext): Int = {
|
||||
return 1
|
||||
}
|
||||
|
||||
def reduceInit(): Int = {
|
||||
return 0;
|
||||
}
|
||||
|
||||
def reduce(m: Int, r: Int): Int = {
|
||||
return m + r;
|
||||
}
|
||||
|
||||
def main(args: Array[String]) {
|
||||
println("Hello, world!")
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue