Removed 'Walker' suffix from packages/GATKEngine.xml that were breaking the packaged release.
Archived AnalyzeCovariates scripts and removed references in build packages / GATK extensions.
This commit is contained in:
parent
a9ad9b3880
commit
46ca49b63d
20
build.xml
20
build.xml
|
|
@ -679,20 +679,6 @@
|
|||
<fileset dir="${java.classes}" includes="**/alignment/**/*.class" />
|
||||
</jar>
|
||||
|
||||
<jar jarfile="${dist.dir}/AnalyzeCovariates.jar" whenmanifestonly="skip">
|
||||
<fileset dir="${java.classes}">
|
||||
<include name="**/analyzecovariates/**/*.class" />
|
||||
<include name="**/gatk/walkers/recalibration/*.class" />
|
||||
</fileset>
|
||||
<fileset dir="${R.script.staging.dir}">
|
||||
<include name="**/analyzecovariates/**/*.R"/>
|
||||
<include name="**/gatk/walkers/recalibration/**/*.R"/>
|
||||
</fileset>
|
||||
<manifest>
|
||||
<attribute name="Main-Class" value="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
|
||||
</manifest>
|
||||
</jar>
|
||||
|
||||
<subant target="dist" genericantfile="build.xml">
|
||||
<property name="build.dir" value="${external.build.dir}" />
|
||||
<property name="dist.dir" value="${external.dist.dir}" />
|
||||
|
|
@ -750,12 +736,6 @@
|
|||
<attribute name="Class-Path" value="${jar.classpath}"/>
|
||||
</manifest>
|
||||
</jar>
|
||||
|
||||
<jar jarfile="${dist.dir}/AnalyzeCovariates.jar" update="true" whenmanifestonly="skip">
|
||||
<manifest>
|
||||
<attribute name="Class-Path" value="${jar.classpath}" />
|
||||
</manifest>
|
||||
</jar>
|
||||
</target>
|
||||
|
||||
<target name="queue.manifests" depends="queue.jar, init.manifests" if="include.scala">
|
||||
|
|
|
|||
|
|
@ -1,121 +0,0 @@
|
|||
#!/bin/env Rscript
|
||||
|
||||
library(tools)
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
verbose = TRUE
|
||||
|
||||
input = args[1]
|
||||
covariateName = args[2]
|
||||
|
||||
outfile = paste(input, ".qual_diff_v_", covariateName, ".pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
c <- read.table(input, header=T)
|
||||
c <- c[sort.list(c[,1]),]
|
||||
|
||||
#
|
||||
# Plot residual error as a function of the covariate
|
||||
#
|
||||
|
||||
d.good <- c[c$nBases >= 1000,]
|
||||
d.1000 <- c[c$nBases < 1000,]
|
||||
rmseGood = sqrt( sum(as.numeric((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases)) / sum(as.numeric(d.good$nBases)) ) # prevent integer overflow with as.numeric, ugh
|
||||
rmseAll = sqrt( sum(as.numeric((c$Qempirical-c$Qreported)^2 * c$nBases)) / sum(as.numeric(c$nBases)) )
|
||||
theTitle = paste("RMSE_good =", round(rmseGood,digits=3), ", RMSE_all =", round(rmseAll,digits=3))
|
||||
if( length(d.good$nBases) == length(c$nBases) ) {
|
||||
theTitle = paste("RMSE =", round(rmseAll,digits=3))
|
||||
}
|
||||
# Don't let residual error go off the edge of the plot
|
||||
d.good$residualError = d.good$Qempirical-d.good$Qreported
|
||||
d.good$residualError[which(d.good$residualError > 10)] = 10
|
||||
d.good$residualError[which(d.good$residualError < -10)] = -10
|
||||
d.1000$residualError = d.1000$Qempirical-d.1000$Qreported
|
||||
d.1000$residualError[which(d.1000$residualError > 10)] = 10
|
||||
d.1000$residualError[which(d.1000$residualError < -10)] = -10
|
||||
c$residualError = c$Qempirical-c$Qreported
|
||||
c$residualError[which(c$residualError > 10)] = 10
|
||||
c$residualError[which(c$residualError < -10)] = -10
|
||||
pointType = "p"
|
||||
if( length(c$Covariate) <= 20 ) {
|
||||
pointType = "o"
|
||||
}
|
||||
if( is.numeric(c$Covariate) ) {
|
||||
plot(d.good$Covariate, d.good$residualError, type=pointType, main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", pch=20, ylim=c(-10, 10), xlim=c(min(c$Covariate),max(c$Covariate)))
|
||||
points(d.1000$Covariate, d.1000$residualError, type=pointType, col="cornflowerblue", pch=20)
|
||||
} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice
|
||||
plot(c$Covariate, c$residualError, type="l", main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", ylim=c(-10, 10))
|
||||
points(d.1000$Covariate, d.1000$residualError, type="l", col="cornflowerblue")
|
||||
}
|
||||
dev.off()
|
||||
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(outfile)
|
||||
}
|
||||
|
||||
#
|
||||
# Plot mean quality versus the covariate
|
||||
#
|
||||
|
||||
outfile = paste(input, ".reported_qual_v_", covariateName, ".pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
pointType = "p"
|
||||
if( length(c$Covariate) <= 20 ) {
|
||||
pointType = "o"
|
||||
}
|
||||
theTitle = paste("Quality By", covariateName);
|
||||
if( is.numeric(c$Covariate) ) {
|
||||
plot(d.good$Covariate, d.good$Qreported, type=pointType, main=theTitle, ylab="Mean Reported Quality", xlab=covariateName, col="blue", pch=20, ylim=c(0, 40), xlim=c(min(c$Covariate),max(c$Covariate)))
|
||||
points(d.1000$Covariate, d.1000$Qreported, type=pointType, col="cornflowerblue", pch=20)
|
||||
} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice
|
||||
plot(c$Covariate, c$Qreported, type="l", main=theTitle, ylab="Mean Reported Quality", xlab=covariateName, col="blue", ylim=c(0, 40))
|
||||
points(d.1000$Covariate, d.1000$Qreported, type="l", col="cornflowerblue")
|
||||
}
|
||||
dev.off()
|
||||
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(outfile)
|
||||
}
|
||||
|
||||
#
|
||||
# Plot histogram of the covariate
|
||||
#
|
||||
|
||||
e = d.good
|
||||
f = d.1000
|
||||
outfile = paste(input, ".", covariateName,"_hist.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
hst=subset(data.frame(e$Covariate, e$nBases), e.nBases != 0)
|
||||
hst2=subset(data.frame(f$Covariate, f$nBases), f.nBases != 0)
|
||||
|
||||
lwdSize=2
|
||||
if( length(c$Covariate) <= 20 ) {
|
||||
lwdSize=7
|
||||
} else if( length(c$Covariate) <= 70 ) {
|
||||
lwdSize=4
|
||||
}
|
||||
|
||||
if( is.numeric(c$Covariate) ) {
|
||||
if( length(hst$e.Covariate) == 0 ) {
|
||||
plot(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=lwdSize, col="cornflowerblue", main=paste(covariateName,"histogram"), ylim=c(0, max(hst2$f.nBases)), xlab=covariateName, ylab="Count",yaxt="n",xlim=c(min(c$Covariate),max(c$Covariate)))
|
||||
} else {
|
||||
plot(hst$e.Covariate, hst$e.nBases, type="h", lwd=lwdSize, main=paste(covariateName,"histogram"), xlab=covariateName, ylim=c(0, max(hst$e.nBases)),ylab="Number of Bases",yaxt="n",xlim=c(min(c$Covariate),max(c$Covariate)))
|
||||
points(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=lwdSize, col="cornflowerblue")
|
||||
}
|
||||
axis(2,axTicks(2), format(axTicks(2), scientific=F))
|
||||
} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice
|
||||
hst=subset(data.frame(c$Covariate, c$nBases), c.nBases != 0)
|
||||
plot(1:length(hst$c.Covariate), hst$c.nBases, type="h", lwd=lwdSize, main=paste(covariateName,"histogram"), ylim=c(0, max(hst$c.nBases)),xlab=covariateName, ylab="Number of Bases",yaxt="n",xaxt="n")
|
||||
if( length(hst$c.Covariate) > 9 ) {
|
||||
axis(1, at=seq(1,length(hst$c.Covariate),2), labels = hst$c.Covariate[seq(1,length(hst$c.Covariate),2)])
|
||||
} else {
|
||||
axis(1, at=seq(1,length(hst$c.Covariate),1), labels = hst$c.Covariate)
|
||||
}
|
||||
axis(2,axTicks(2), format(axTicks(2), scientific=F))
|
||||
}
|
||||
dev.off()
|
||||
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(outfile)
|
||||
}
|
||||
|
|
@ -1,84 +0,0 @@
|
|||
#!/bin/env Rscript
|
||||
|
||||
library(tools)
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
|
||||
input = args[1]
|
||||
Qcutoff = as.numeric(args[2])
|
||||
maxQ = as.numeric(args[3])
|
||||
maxHist = as.numeric(args[4])
|
||||
|
||||
t=read.table(input, header=T)
|
||||
|
||||
#
|
||||
# Plot of reported quality versus empirical quality
|
||||
#
|
||||
|
||||
outfile = paste(input, ".quality_emp_v_stated.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
d.good <- t[t$nBases >= 10000 & t$Qreported >= Qcutoff,]
|
||||
d.1000 <- t[t$nBases < 1000 & t$Qreported >= Qcutoff,]
|
||||
d.10000 <- t[t$nBases < 10000 & t$nBases >= 1000 & t$Qreported >= Qcutoff,]
|
||||
f <- t[t$Qreported < Qcutoff,]
|
||||
e <- rbind(d.good, d.1000, d.10000)
|
||||
rmseGood = sqrt( sum(as.numeric((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases)) / sum(as.numeric(d.good$nBases)) ) # prevent integer overflow with as.numeric, ugh
|
||||
rmseAll = sqrt( sum(as.numeric((e$Qempirical-e$Qreported)^2 * e$nBases)) / sum(as.numeric(e$nBases)) )
|
||||
theTitle = paste("RMSE_good =", round(rmseGood,digits=3), ", RMSE_all =", round(rmseAll,digits=3))
|
||||
if( length(t$nBases) - length(f$nBases) == length(d.good$nBases) ) {
|
||||
theTitle = paste("RMSE =", round(rmseAll,digits=3));
|
||||
}
|
||||
plot(d.good$Qreported, d.good$Qempirical, type="p", col="blue", main=theTitle, xlim=c(0,maxQ), ylim=c(0,maxQ), pch=16, xlab="Reported quality score", ylab="Empirical quality score")
|
||||
points(d.1000$Qreported, d.1000$Qempirical, type="p", col="lightblue", pch=16)
|
||||
points(d.10000$Qreported, d.10000$Qempirical, type="p", col="cornflowerblue", pch=16)
|
||||
points(f$Qreported, f$Qempirical, type="p", col="maroon1", pch=16)
|
||||
abline(0,1, lty=2)
|
||||
dev.off()
|
||||
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(outfile)
|
||||
}
|
||||
|
||||
#
|
||||
# Plot Q empirical histogram
|
||||
#
|
||||
|
||||
outfile = paste(input, ".quality_emp_hist.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
hst=subset(data.frame(e$Qempirical, e$nBases), e.nBases != 0)
|
||||
hst2=subset(data.frame(f$Qempirical, f$nBases), f.nBases != 0)
|
||||
percentBases=hst$e.nBases / sum(as.numeric(hst$e.nBases))
|
||||
entropy = -sum(log2(percentBases)*percentBases)
|
||||
yMax = max(hst$e.nBases)
|
||||
if(maxHist != 0) {
|
||||
yMax = maxHist
|
||||
}
|
||||
plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Empirical quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Empirical quality score", ylab="Number of Bases",yaxt="n")
|
||||
points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1")
|
||||
axis(2,axTicks(2), format(axTicks(2), scientific=F))
|
||||
dev.off()
|
||||
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(outfile)
|
||||
}
|
||||
|
||||
#
|
||||
# Plot Q reported histogram
|
||||
#
|
||||
|
||||
outfile = paste(input, ".quality_rep_hist.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
hst=subset(data.frame(e$Qreported, e$nBases), e.nBases != 0)
|
||||
hst2=subset(data.frame(f$Qreported, f$nBases), f.nBases != 0)
|
||||
yMax = max(hst$e.nBases)
|
||||
if(maxHist != 0) {
|
||||
yMax = maxHist
|
||||
}
|
||||
plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Reported quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Reported quality score", ylab="Number of Bases",yaxt="n")
|
||||
points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1")
|
||||
axis(2,axTicks(2), format(axTicks(2), scientific=F))
|
||||
dev.off()
|
||||
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(outfile)
|
||||
}
|
||||
|
|
@ -195,7 +195,6 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
|
|||
private static final List<String> gatkPackages = Arrays.asList(
|
||||
"org.broadinstitute.sting.gatk",
|
||||
"org.broadinstitute.sting.pipeline",
|
||||
"org.broadinstitute.sting.analyzecovariates",
|
||||
"org.broadinstitute.sting.gatk.datasources.reads.utilities");
|
||||
|
||||
/**
|
||||
|
|
@ -252,7 +251,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
|
|||
*/
|
||||
private void writeFilter(String className, List<? extends ArgumentField> argumentFields, Set<Class<?>> dependents) throws IOException {
|
||||
String content = getContent(TRAIT_TEMPLATE, "org.broadinstitute.sting.queue.function.CommandLineFunction",
|
||||
className, "", false, String.format(" + \" --read_filter %s\"", className), argumentFields, dependents);
|
||||
className, "", false, String.format(" + required(\"--read_filter\", \"%s\")", className), argumentFields, dependents);
|
||||
writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<package name="AnalyzeCovariates">
|
||||
<executable name="AnalyzeCovariates">
|
||||
<main-class name="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
|
||||
<resource-bundle file="StingText.properties" />
|
||||
<dependencies>
|
||||
<!-- Recalibration analysis script -->
|
||||
<class name="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
|
||||
<package name="org.broadinstitute.sting.gatk.walkers.recalibration" />
|
||||
<!-- Supplemental scripts for graph generation, etc. -->
|
||||
<dir name="org/broadinstitute/sting/analyzecovariates" includes="**/*.R" />
|
||||
</dependencies>
|
||||
</executable>
|
||||
</package>
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- In order to update this file, copy GenomeAnalysisTK.xml, change the name of the
|
||||
package and executable to GATK-Picard, and add TileCovariate to the list of dependencies.
|
||||
Remove the AnalyzeCovariates executable. -->
|
||||
<package name="GATK-Picard">
|
||||
<version file="StingText.properties" property="org.broadinstitute.sting.gatk.version" />
|
||||
<executable name="GATK-Picard">
|
||||
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
||||
<resource-bundle file="StingText.properties" />
|
||||
<modules>
|
||||
<module file="QualityScoresRecalibration.xml" />
|
||||
<module file="LocalRealignmentAroundIndels.xml" />
|
||||
<module file="UnifiedGenotyper.xml" />
|
||||
<module file="VariantFiltration.xml" />
|
||||
<module file="VariantAnnotator.xml" />
|
||||
</modules>
|
||||
<dependencies>
|
||||
<!-- Filters -->
|
||||
<package name="org.broadinstitute.sting.gatk.filters" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.coverage.DepthOfCoverage" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.Pileup" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.PrintReads" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.qc.CountLoci" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.qc.CountReads" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.qc.ValidatingPileup" />
|
||||
<!-- Recalibration Covariates -->
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.PositionCovariate" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.MinimumNQSCovariate" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.MappingQualityCovariate" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.PrimerRoundCovariate" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate" />
|
||||
</dependencies>
|
||||
</executable>
|
||||
<resources>
|
||||
<!-- GATK sample code and build scripts -->
|
||||
<file name="java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java" />
|
||||
<file name="java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java" />
|
||||
<file name="java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java" />
|
||||
<file name="java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java" />
|
||||
<file name="java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java" />
|
||||
<file name="java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java" />
|
||||
<file name="java/src/org/broadinstitute/sting/gatk/examples/HelloWalker.java" />
|
||||
<file name="java/src/org/broadinstitute/sting/gatk/examples/build.xml" />
|
||||
<!-- Sample reads and reference files -->
|
||||
<file name="testdata/exampleBAM.bam" />
|
||||
<file name="testdata/exampleBAM.bam.bai" />
|
||||
<file name="testdata/exampleFASTA.fasta" />
|
||||
<file name="testdata/exampleFASTA.fasta.fai" />
|
||||
<file name="testdata/exampleFASTA.dict" />
|
||||
</resources>
|
||||
</package>
|
||||
|
|
@ -48,11 +48,11 @@
|
|||
<file name="public/testdata/exampleFASTA.fasta.fai" />
|
||||
<file name="public/testdata/exampleFASTA.dict" />
|
||||
<!-- GATK sample code and build scripts -->
|
||||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java" />
|
||||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java" />
|
||||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java" />
|
||||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java" />
|
||||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java" />
|
||||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java" />
|
||||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java" />
|
||||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java" />
|
||||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java" />
|
||||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java" />
|
||||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java" />
|
||||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileup.java" />
|
||||
</resources>
|
||||
</package>
|
||||
|
|
|
|||
|
|
@ -33,10 +33,8 @@
|
|||
<module file="GATKEngine.xml"/>
|
||||
</modules>
|
||||
</executable>
|
||||
<modules>
|
||||
<module file="AnalyzeCovariates.xml"/>
|
||||
</modules>
|
||||
<release>
|
||||
<executable directory="/humgen/gsa-hpprojects/GATK/bin" symlink="current" />
|
||||
<archive directory="/humgen/gsa-hpprojects/GATK/bin" symlink="GenomeAnalysisTK-latest.tar.bz2" /> </release>
|
||||
<archive directory="/humgen/gsa-hpprojects/GATK/bin" symlink="GenomeAnalysisTK-latest.tar.bz2" />
|
||||
</release>
|
||||
</package>
|
||||
|
|
|
|||
|
|
@ -33,9 +33,6 @@
|
|||
<module file="GATKEngine.xml"/>
|
||||
</modules>
|
||||
</executable>
|
||||
<modules>
|
||||
<module file="AnalyzeCovariates.xml"/>
|
||||
</modules>
|
||||
<release>
|
||||
<archive directory="/web/ftp/pub/gsa/GenomeAnalysisTK" symlink="GenomeAnalysisTKLite-latest.tar.bz2" />
|
||||
</release>
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@
|
|||
<modules>
|
||||
<module file="QueueEngine.xml"/>
|
||||
<module file="GATKEngine.xml"/>
|
||||
<module file="AnalyzeCovariates.xml"/>
|
||||
</modules>
|
||||
</executable>
|
||||
<release>
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@
|
|||
<modules>
|
||||
<module file="QueueEngine.xml"/>
|
||||
<module file="GATKEngine.xml"/>
|
||||
<module file="AnalyzeCovariates.xml"/>
|
||||
</modules>
|
||||
</executable>
|
||||
<release>
|
||||
|
|
|
|||
Loading…
Reference in New Issue