Removed 'Walker' suffix from packages/GATKEngine.xml that were breaking the packaged release.

Archived AnalyzeCovariates scripts and removed references in build packages / GATK extensions.
This commit is contained in:
Khalid Shakir 2012-07-23 16:32:31 -04:00
parent a9ad9b3880
commit 46ca49b63d
11 changed files with 9 additions and 307 deletions

View File

@ -679,20 +679,6 @@
<fileset dir="${java.classes}" includes="**/alignment/**/*.class" />
</jar>
<jar jarfile="${dist.dir}/AnalyzeCovariates.jar" whenmanifestonly="skip">
<fileset dir="${java.classes}">
<include name="**/analyzecovariates/**/*.class" />
<include name="**/gatk/walkers/recalibration/*.class" />
</fileset>
<fileset dir="${R.script.staging.dir}">
<include name="**/analyzecovariates/**/*.R"/>
<include name="**/gatk/walkers/recalibration/**/*.R"/>
</fileset>
<manifest>
<attribute name="Main-Class" value="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
</manifest>
</jar>
<subant target="dist" genericantfile="build.xml">
<property name="build.dir" value="${external.build.dir}" />
<property name="dist.dir" value="${external.dist.dir}" />
@ -750,12 +736,6 @@
<attribute name="Class-Path" value="${jar.classpath}"/>
</manifest>
</jar>
<jar jarfile="${dist.dir}/AnalyzeCovariates.jar" update="true" whenmanifestonly="skip">
<manifest>
<attribute name="Class-Path" value="${jar.classpath}" />
</manifest>
</jar>
</target>
<target name="queue.manifests" depends="queue.jar, init.manifests" if="include.scala">

View File

@ -1,121 +0,0 @@
#!/bin/env Rscript
library(tools)
args <- commandArgs(TRUE)
verbose = TRUE
input = args[1]
covariateName = args[2]
outfile = paste(input, ".qual_diff_v_", covariateName, ".pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
c <- read.table(input, header=T)
c <- c[sort.list(c[,1]),]
#
# Plot residual error as a function of the covariate
#
d.good <- c[c$nBases >= 1000,]
d.1000 <- c[c$nBases < 1000,]
rmseGood = sqrt( sum(as.numeric((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases)) / sum(as.numeric(d.good$nBases)) ) # prevent integer overflow with as.numeric, ugh
rmseAll = sqrt( sum(as.numeric((c$Qempirical-c$Qreported)^2 * c$nBases)) / sum(as.numeric(c$nBases)) )
theTitle = paste("RMSE_good =", round(rmseGood,digits=3), ", RMSE_all =", round(rmseAll,digits=3))
if( length(d.good$nBases) == length(c$nBases) ) {
theTitle = paste("RMSE =", round(rmseAll,digits=3))
}
# Don't let residual error go off the edge of the plot
d.good$residualError = d.good$Qempirical-d.good$Qreported
d.good$residualError[which(d.good$residualError > 10)] = 10
d.good$residualError[which(d.good$residualError < -10)] = -10
d.1000$residualError = d.1000$Qempirical-d.1000$Qreported
d.1000$residualError[which(d.1000$residualError > 10)] = 10
d.1000$residualError[which(d.1000$residualError < -10)] = -10
c$residualError = c$Qempirical-c$Qreported
c$residualError[which(c$residualError > 10)] = 10
c$residualError[which(c$residualError < -10)] = -10
pointType = "p"
if( length(c$Covariate) <= 20 ) {
pointType = "o"
}
if( is.numeric(c$Covariate) ) {
plot(d.good$Covariate, d.good$residualError, type=pointType, main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", pch=20, ylim=c(-10, 10), xlim=c(min(c$Covariate),max(c$Covariate)))
points(d.1000$Covariate, d.1000$residualError, type=pointType, col="cornflowerblue", pch=20)
} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice
plot(c$Covariate, c$residualError, type="l", main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", ylim=c(-10, 10))
points(d.1000$Covariate, d.1000$residualError, type="l", col="cornflowerblue")
}
dev.off()
if (exists('compactPDF')) {
compactPDF(outfile)
}
#
# Plot mean quality versus the covariate
#
outfile = paste(input, ".reported_qual_v_", covariateName, ".pdf", sep="")
pdf(outfile, height=7, width=7)
par(cex=1.1)
pointType = "p"
if( length(c$Covariate) <= 20 ) {
pointType = "o"
}
theTitle = paste("Quality By", covariateName);
if( is.numeric(c$Covariate) ) {
plot(d.good$Covariate, d.good$Qreported, type=pointType, main=theTitle, ylab="Mean Reported Quality", xlab=covariateName, col="blue", pch=20, ylim=c(0, 40), xlim=c(min(c$Covariate),max(c$Covariate)))
points(d.1000$Covariate, d.1000$Qreported, type=pointType, col="cornflowerblue", pch=20)
} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice
plot(c$Covariate, c$Qreported, type="l", main=theTitle, ylab="Mean Reported Quality", xlab=covariateName, col="blue", ylim=c(0, 40))
points(d.1000$Covariate, d.1000$Qreported, type="l", col="cornflowerblue")
}
dev.off()
if (exists('compactPDF')) {
compactPDF(outfile)
}
#
# Plot histogram of the covariate
#
e = d.good
f = d.1000
outfile = paste(input, ".", covariateName,"_hist.pdf", sep="")
pdf(outfile, height=7, width=7)
hst=subset(data.frame(e$Covariate, e$nBases), e.nBases != 0)
hst2=subset(data.frame(f$Covariate, f$nBases), f.nBases != 0)
lwdSize=2
if( length(c$Covariate) <= 20 ) {
lwdSize=7
} else if( length(c$Covariate) <= 70 ) {
lwdSize=4
}
if( is.numeric(c$Covariate) ) {
if( length(hst$e.Covariate) == 0 ) {
plot(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=lwdSize, col="cornflowerblue", main=paste(covariateName,"histogram"), ylim=c(0, max(hst2$f.nBases)), xlab=covariateName, ylab="Count",yaxt="n",xlim=c(min(c$Covariate),max(c$Covariate)))
} else {
plot(hst$e.Covariate, hst$e.nBases, type="h", lwd=lwdSize, main=paste(covariateName,"histogram"), xlab=covariateName, ylim=c(0, max(hst$e.nBases)),ylab="Number of Bases",yaxt="n",xlim=c(min(c$Covariate),max(c$Covariate)))
points(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=lwdSize, col="cornflowerblue")
}
axis(2,axTicks(2), format(axTicks(2), scientific=F))
} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice
hst=subset(data.frame(c$Covariate, c$nBases), c.nBases != 0)
plot(1:length(hst$c.Covariate), hst$c.nBases, type="h", lwd=lwdSize, main=paste(covariateName,"histogram"), ylim=c(0, max(hst$c.nBases)),xlab=covariateName, ylab="Number of Bases",yaxt="n",xaxt="n")
if( length(hst$c.Covariate) > 9 ) {
axis(1, at=seq(1,length(hst$c.Covariate),2), labels = hst$c.Covariate[seq(1,length(hst$c.Covariate),2)])
} else {
axis(1, at=seq(1,length(hst$c.Covariate),1), labels = hst$c.Covariate)
}
axis(2,axTicks(2), format(axTicks(2), scientific=F))
}
dev.off()
if (exists('compactPDF')) {
compactPDF(outfile)
}

View File

@ -1,84 +0,0 @@
#!/bin/env Rscript
library(tools)
args <- commandArgs(TRUE)
input = args[1]
Qcutoff = as.numeric(args[2])
maxQ = as.numeric(args[3])
maxHist = as.numeric(args[4])
t=read.table(input, header=T)
#
# Plot of reported quality versus empirical quality
#
outfile = paste(input, ".quality_emp_v_stated.pdf", sep="")
pdf(outfile, height=7, width=7)
d.good <- t[t$nBases >= 10000 & t$Qreported >= Qcutoff,]
d.1000 <- t[t$nBases < 1000 & t$Qreported >= Qcutoff,]
d.10000 <- t[t$nBases < 10000 & t$nBases >= 1000 & t$Qreported >= Qcutoff,]
f <- t[t$Qreported < Qcutoff,]
e <- rbind(d.good, d.1000, d.10000)
rmseGood = sqrt( sum(as.numeric((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases)) / sum(as.numeric(d.good$nBases)) ) # prevent integer overflow with as.numeric, ugh
rmseAll = sqrt( sum(as.numeric((e$Qempirical-e$Qreported)^2 * e$nBases)) / sum(as.numeric(e$nBases)) )
theTitle = paste("RMSE_good =", round(rmseGood,digits=3), ", RMSE_all =", round(rmseAll,digits=3))
if( length(t$nBases) - length(f$nBases) == length(d.good$nBases) ) {
theTitle = paste("RMSE =", round(rmseAll,digits=3));
}
plot(d.good$Qreported, d.good$Qempirical, type="p", col="blue", main=theTitle, xlim=c(0,maxQ), ylim=c(0,maxQ), pch=16, xlab="Reported quality score", ylab="Empirical quality score")
points(d.1000$Qreported, d.1000$Qempirical, type="p", col="lightblue", pch=16)
points(d.10000$Qreported, d.10000$Qempirical, type="p", col="cornflowerblue", pch=16)
points(f$Qreported, f$Qempirical, type="p", col="maroon1", pch=16)
abline(0,1, lty=2)
dev.off()
if (exists('compactPDF')) {
compactPDF(outfile)
}
#
# Plot Q empirical histogram
#
outfile = paste(input, ".quality_emp_hist.pdf", sep="")
pdf(outfile, height=7, width=7)
hst=subset(data.frame(e$Qempirical, e$nBases), e.nBases != 0)
hst2=subset(data.frame(f$Qempirical, f$nBases), f.nBases != 0)
percentBases=hst$e.nBases / sum(as.numeric(hst$e.nBases))
entropy = -sum(log2(percentBases)*percentBases)
yMax = max(hst$e.nBases)
if(maxHist != 0) {
yMax = maxHist
}
plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Empirical quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Empirical quality score", ylab="Number of Bases",yaxt="n")
points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1")
axis(2,axTicks(2), format(axTicks(2), scientific=F))
dev.off()
if (exists('compactPDF')) {
compactPDF(outfile)
}
#
# Plot Q reported histogram
#
outfile = paste(input, ".quality_rep_hist.pdf", sep="")
pdf(outfile, height=7, width=7)
hst=subset(data.frame(e$Qreported, e$nBases), e.nBases != 0)
hst2=subset(data.frame(f$Qreported, f$nBases), f.nBases != 0)
yMax = max(hst$e.nBases)
if(maxHist != 0) {
yMax = maxHist
}
plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Reported quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Reported quality score", ylab="Number of Bases",yaxt="n")
points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1")
axis(2,axTicks(2), format(axTicks(2), scientific=F))
dev.off()
if (exists('compactPDF')) {
compactPDF(outfile)
}

View File

@ -195,7 +195,6 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
private static final List<String> gatkPackages = Arrays.asList(
"org.broadinstitute.sting.gatk",
"org.broadinstitute.sting.pipeline",
"org.broadinstitute.sting.analyzecovariates",
"org.broadinstitute.sting.gatk.datasources.reads.utilities");
/**
@ -252,7 +251,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
*/
private void writeFilter(String className, List<? extends ArgumentField> argumentFields, Set<Class<?>> dependents) throws IOException {
String content = getContent(TRAIT_TEMPLATE, "org.broadinstitute.sting.queue.function.CommandLineFunction",
className, "", false, String.format(" + \" --read_filter %s\"", className), argumentFields, dependents);
className, "", false, String.format(" + required(\"--read_filter\", \"%s\")", className), argumentFields, dependents);
writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content);
}

View File

@ -1,14 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<package name="AnalyzeCovariates">
<executable name="AnalyzeCovariates">
<main-class name="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
<resource-bundle file="StingText.properties" />
<dependencies>
<!-- Recalibration analysis script -->
<class name="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
<package name="org.broadinstitute.sting.gatk.walkers.recalibration" />
<!-- Supplemental scripts for graph generation, etc. -->
<dir name="org/broadinstitute/sting/analyzecovariates" includes="**/*.R" />
</dependencies>
</executable>
</package>

View File

@ -1,51 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- In order to update this file, copy GenomeAnalysisTK.xml, change the name of the
package and executable to GATK-Picard, and add TileCovariate to the list of dependencies.
Remove the AnalyzeCovariates executable. -->
<package name="GATK-Picard">
<version file="StingText.properties" property="org.broadinstitute.sting.gatk.version" />
<executable name="GATK-Picard">
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
<resource-bundle file="StingText.properties" />
<modules>
<module file="QualityScoresRecalibration.xml" />
<module file="LocalRealignmentAroundIndels.xml" />
<module file="UnifiedGenotyper.xml" />
<module file="VariantFiltration.xml" />
<module file="VariantAnnotator.xml" />
</modules>
<dependencies>
<!-- Filters -->
<package name="org.broadinstitute.sting.gatk.filters" />
<class name="org.broadinstitute.sting.gatk.walkers.coverage.DepthOfCoverage" />
<class name="org.broadinstitute.sting.gatk.walkers.Pileup" />
<class name="org.broadinstitute.sting.gatk.walkers.PrintReads" />
<class name="org.broadinstitute.sting.gatk.walkers.qc.CountLoci" />
<class name="org.broadinstitute.sting.gatk.walkers.qc.CountReads" />
<class name="org.broadinstitute.sting.gatk.walkers.qc.ValidatingPileup" />
<!-- Recalibration Covariates -->
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.PositionCovariate" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.MinimumNQSCovariate" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.MappingQualityCovariate" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.PrimerRoundCovariate" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate" />
</dependencies>
</executable>
<resources>
<!-- GATK sample code and build scripts -->
<file name="java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java" />
<file name="java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java" />
<file name="java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java" />
<file name="java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java" />
<file name="java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java" />
<file name="java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java" />
<file name="java/src/org/broadinstitute/sting/gatk/examples/HelloWalker.java" />
<file name="java/src/org/broadinstitute/sting/gatk/examples/build.xml" />
<!-- Sample reads and reference files -->
<file name="testdata/exampleBAM.bam" />
<file name="testdata/exampleBAM.bam.bai" />
<file name="testdata/exampleFASTA.fasta" />
<file name="testdata/exampleFASTA.fasta.fai" />
<file name="testdata/exampleFASTA.dict" />
</resources>
</package>

View File

@ -48,11 +48,11 @@
<file name="public/testdata/exampleFASTA.fasta.fai" />
<file name="public/testdata/exampleFASTA.dict" />
<!-- GATK sample code and build scripts -->
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java" />
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java" />
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java" />
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java" />
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java" />
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java" />
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java" />
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java" />
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java" />
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java" />
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java" />
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileup.java" />
</resources>
</package>

View File

@ -33,10 +33,8 @@
<module file="GATKEngine.xml"/>
</modules>
</executable>
<modules>
<module file="AnalyzeCovariates.xml"/>
</modules>
<release>
<executable directory="/humgen/gsa-hpprojects/GATK/bin" symlink="current" />
<archive directory="/humgen/gsa-hpprojects/GATK/bin" symlink="GenomeAnalysisTK-latest.tar.bz2" /> </release>
<archive directory="/humgen/gsa-hpprojects/GATK/bin" symlink="GenomeAnalysisTK-latest.tar.bz2" />
</release>
</package>

View File

@ -33,9 +33,6 @@
<module file="GATKEngine.xml"/>
</modules>
</executable>
<modules>
<module file="AnalyzeCovariates.xml"/>
</modules>
<release>
<archive directory="/web/ftp/pub/gsa/GenomeAnalysisTK" symlink="GenomeAnalysisTKLite-latest.tar.bz2" />
</release>

View File

@ -32,7 +32,6 @@
<modules>
<module file="QueueEngine.xml"/>
<module file="GATKEngine.xml"/>
<module file="AnalyzeCovariates.xml"/>
</modules>
</executable>
<release>

View File

@ -32,7 +32,6 @@
<modules>
<module file="QueueEngine.xml"/>
<module file="GATKEngine.xml"/>
<module file="AnalyzeCovariates.xml"/>
</modules>
</executable>
<release>