diff --git a/R/plot_residualError_OtherCovariate.R b/R/plot_residualError_OtherCovariate.R
new file mode 100644
index 000000000..7d7cd2df9
--- /dev/null
+++ b/R/plot_residualError_OtherCovariate.R
@@ -0,0 +1,55 @@
+#!/broad/tools/apps/R-2.6.0/bin/Rscript
+
+args <- commandArgs(TRUE)
+verbose = TRUE
+
+input = args[1]
+covariateName = args[2]
+
+#X11(width=7, height=14)
+#outfile = paste(input, ".qual_diff_v_cycle.png", sep="")
+#png(outfile, height=7, width=7, units="in", res=72) #height=1000, width=680)
+outfile = paste(input, ".qual_diff_v_", covariateName, ".pdf", sep="")
+pdf(outfile, height=7, width=7)
+par(cex=1.1)
+c <- read.table(input, header=T)
+c <- c[sort.list(c[,1]),]
+
+d.good <- c[c$nBases >= 1000,]
+d.1000 <- c[c$nBases < 1000,]
+rmseBlue = sqrt(sum((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases) / sum(d.good$nBases) )
+rmseAll = sqrt(sum((c$Qempirical-c$Qreported)^2 * c$nBases) / sum(c$nBases) )
+theTitle = paste("RMSE_good = ", round(rmseBlue,digits=3), ", RMSE_all = ", round(rmseAll,digits=3))
+if( length(d.good$nBases) == length(c$nBases) ) {
+ theTitle = paste("RMSE = ", round(rmseAll,digits=3))
+ }
+if( is.numeric(c$Covariate) ) {
+ plot(d.good$Covariate, d.good$Qempirical-d.good$Qreported, type="p", main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", pch=16, ylim=c(-10, 10), xlim=c(min(c$Covariate),max(c$Covariate)))
+ points(d.1000$Covariate, d.1000$Qempirical-d.1000$Qreported, type="p", col="cornflowerblue", pch=16)
+} else {
+ plot(c$Covariate, c$Qempirical-c$Qreported, type="l", main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", ylim=c(-10, 10))
+ points(d.1000$Covariate, d.1000$Qempirical-d.1000$Qreported, type="l", col="cornflowerblue")
+}
+dev.off()
+#points(d.1000$Cycle, d.1000$Qempirical_Qreported, type="p", col="cornflowerblue", pch=16)
+
+#
+# Plot histogram of the covariate
+#
+e = d.good
+f = d.1000
+outfile = paste(input, ".", covariateName,"_hist.pdf", sep="")
+pdf(outfile, height=7, width=7)
+hst=subset(data.frame(e$Covariate, e$nBases), e.nBases != 0)
+hst2=subset(data.frame(f$Covariate, f$nBases), f.nBases != 0)
+if( is.numeric(c$Covariate) ) {
+ plot(hst$e.Covariate, hst$e.nBases, type="h", lwd=4, main=paste(covariateName,"histogram"), xlab=covariateName, ylab="Count",yaxt="n")
+ points(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=4, col="cornflowerblue")
+ axis(2,axTicks(2), format(axTicks(2), scientific=F))
+} else {
+ hst=subset(data.frame(c$Covariate, c$nBases), c.nBases != 0)
+ plot(1:length(hst$c.Covariate), hst$c.nBases, type="h", lwd=4, main=paste(covariateName,"histogram"), xlab=covariateName, ylab="Count",yaxt="n",xaxt="n")
+ axis(1, at=seq(1,length(hst$c.Covariate),2), labels = hst$c.Covariate[seq(1,length(hst$c.Covariate),2)])
+ axis(2,axTicks(2), format(axTicks(2), scientific=F))
+}
+dev.off()
\ No newline at end of file
diff --git a/R/plot_residualError_QualityScoreCovariate.R b/R/plot_residualError_QualityScoreCovariate.R
new file mode 100644
index 000000000..278b57efd
--- /dev/null
+++ b/R/plot_residualError_QualityScoreCovariate.R
@@ -0,0 +1,55 @@
+#!/broad/tools/apps/R-2.6.0/bin/Rscript
+
+args <- commandArgs(TRUE)
+
+input = args[1]
+Qcutoff = as.numeric(args[2])
+
+t=read.table(input, header=T)
+#t=read.csv(input)
+#par(mfrow=c(2,1), cex=1.2)
+
+#outfile = paste(input, ".quality_emp_v_stated.png", sep="")
+#png(outfile, height=7, width=7, units="in", res=72) # height=1000, width=446)
+outfile = paste(input, ".quality_emp_v_stated.pdf", sep="")
+pdf(outfile, height=7, width=7)
+d.good <- t[t$nBases >= 10000 & t$Qreported >= Qcutoff,]
+d.1000 <- t[t$nBases < 1000 & t$Qreported >= Qcutoff,]
+d.10000 <- t[t$nBases < 10000 & t$nBases >= 1000 & t$Qreported >= Qcutoff,]
+f <- t[t$Qreported < Qcutoff,]
+e <- rbind(d.good, d.1000, d.10000)
+rmseBlue = sqrt(sum((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases) / sum(d.good$nBases) )
+rmseAll = sqrt(sum((e$Qempirical-e$Qreported)^2 * e$nBases) / sum(e$nBases) )
+theTitle = paste("RMSE_good = ", round(rmseBlue,digits=3), ", RMSE_all = ", round(rmseAll,digits=3))
+if (length(t$nBases) == length(d.good$nBases) ) {
+ theTitle = paste("RMSE = ", round(rmseAll,digits=3));
+ }
+plot(d.good$Qreported, d.good$Qempirical, type="p", col="blue", main=theTitle, xlim=c(0,40), ylim=c(0,40), pch=16, xlab="Reported quality score", ylab="Empirical quality score")
+points(d.1000$Qreported, d.1000$Qempirical, type="p", col="lightblue", pch=16)
+points(d.10000$Qreported, d.10000$Qempirical, type="p", col="cornflowerblue", pch=16)
+points(f$Qreported, f$Qempirical, type="p", col="maroon1", pch=16)
+abline(0,1, lty=2)
+dev.off()
+
+#outfile = paste(input, ".quality_emp_hist.png", sep="")
+#png(outfile, height=7, width=7, units="in", res=72) # height=1000, width=446)
+outfile = paste(input, ".quality_emp_hist.pdf", sep="")
+pdf(outfile, height=7, width=7)
+hst=subset(data.frame(e$Qempirical, e$nBases), e.nBases != 0)
+hst2=subset(data.frame(f$Qempirical, f$nBases), f.nBases != 0)
+plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,40), main="Empirical quality score histogram", xlab="Empirical quality score", ylab="Count",yaxt="n")
+points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1")
+axis(2,axTicks(2), format(axTicks(2), scientific=F))
+dev.off()
+
+#
+# Plot Q reported histogram
+#
+outfile = paste(input, ".quality_rep_hist.pdf", sep="")
+pdf(outfile, height=7, width=7)
+hst=subset(data.frame(e$Qreported, e$nBases), e.nBases != 0)
+hst2=subset(data.frame(f$Qreported, f$nBases), f.nBases != 0)
+plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,40), main="Reported quality score histogram", xlab="Reported quality score", ylab="Count",yaxt="n")
+points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1")
+axis(2,axTicks(2), format(axTicks(2), scientific=F))
+dev.off()
diff --git a/R/plot_q_emp_stated_hst.R b/archive/R/plot_q_emp_stated_hst.R
similarity index 100%
rename from R/plot_q_emp_stated_hst.R
rename to archive/R/plot_q_emp_stated_hst.R
diff --git a/R/plot_qual_diff_v_cycle.R b/archive/R/plot_qual_diff_v_cycle.R
similarity index 100%
rename from R/plot_qual_diff_v_cycle.R
rename to archive/R/plot_qual_diff_v_cycle.R
diff --git a/R/plot_qual_diff_v_dinuc.R b/archive/R/plot_qual_diff_v_dinuc.R
similarity index 100%
rename from R/plot_qual_diff_v_dinuc.R
rename to archive/R/plot_qual_diff_v_dinuc.R
diff --git a/python/analyzeRecalQuals.py b/archive/python/analyzeRecalQuals.py
similarity index 100%
rename from python/analyzeRecalQuals.py
rename to archive/python/analyzeRecalQuals.py
diff --git a/python/analyzeRecalQuals_1KG.py b/archive/python/analyzeRecalQuals_1KG.py
similarity index 100%
rename from python/analyzeRecalQuals_1KG.py
rename to archive/python/analyzeRecalQuals_1KG.py
diff --git a/build.xml b/build.xml
index ef2121a3f..9c9a9c10d 100644
--- a/build.xml
+++ b/build.xml
@@ -114,12 +114,13 @@
-
+
-
+
+
-
+
@@ -150,7 +151,7 @@
-
+
diff --git a/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java b/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java
new file mode 100755
index 000000000..3c5e12ebd
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java
@@ -0,0 +1,83 @@
+package org.broadinstitute.sting.analyzecovariates;
+
+import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
+import org.broadinstitute.sting.gatk.walkers.recalibration.NHashMap;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: Dec 1, 2009
+ */
+
+public class AnalysisDataManager {
+
+ private NHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed
+ private ArrayList> dataCollapsedByCovariate; // Tables where everything except read group and given covariate has been collapsed
+
+ AnalysisDataManager() {
+ }
+
+ AnalysisDataManager( final int numCovariates ) {
+ dataCollapsedReadGroup = new NHashMap();
+ dataCollapsedByCovariate = new ArrayList>();
+ for( int iii = 0; iii < numCovariates - 1; iii++ ) { // readGroup isn't counted here, its table is separate
+ dataCollapsedByCovariate.add( new NHashMap() );
+ }
+ }
+
+ /**
+ * Add the given mapping to all of the collapsed hash tables
+ * @param key The list of comparables that is the key for this mapping
+ * @param fullDatum The RecalDatum which is the data for this mapping
+ */
+ public final void addToAllTables( final List extends Comparable> key, final RecalDatum fullDatum, final int IGNORE_QSCORES_LESS_THAN ) {
+
+ int qscore = Integer.parseInt( key.get(1).toString() );
+ ArrayList newKey;
+ RecalDatum collapsedDatum;
+
+ if( !(qscore < IGNORE_QSCORES_LESS_THAN) ) {
+ // Create dataCollapsedReadGroup, the table where everything except read group has been collapsed
+ newKey = new ArrayList();
+ newKey.add( key.get(0) ); // Make a new key with just the read group
+ collapsedDatum = dataCollapsedReadGroup.get( newKey );
+ if( collapsedDatum == null ) {
+ dataCollapsedReadGroup.put( newKey, new RecalDatum(fullDatum) );
+ } else {
+ collapsedDatum.combine( fullDatum ); // using combine instead of increment in order to calculate overall aggregateQReported
+ }
+ }
+
+ // Create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
+ for( int iii = 0; iii < dataCollapsedByCovariate.size(); iii++ ) {
+ if( iii == 0 || !(qscore < IGNORE_QSCORES_LESS_THAN) ) { // use all data for the plot versus reported quality, but not for the other plots versus cycle etc.
+ newKey = new ArrayList();
+ newKey.add( key.get(0) ); // Make a new key with the read group ...
+ newKey.add( key.get(iii + 1) ); // and the given covariate
+ collapsedDatum = dataCollapsedByCovariate.get(iii).get( newKey );
+ if( collapsedDatum == null ) {
+ dataCollapsedByCovariate.get(iii).put( newKey, new RecalDatum(fullDatum) );
+ } else {
+ collapsedDatum.combine( fullDatum );
+ }
+ }
+ }
+ }
+
+ /**
+ * Get the appropriate collapsed table out of the set of all the tables held by this Object
+ * @param covariate Which covariate indexes the desired collapsed HashMap
+ * @return The desired collapsed HashMap
+ */
+ public final NHashMap getCollapsedTable( final int covariate ) {
+ if( covariate == 0) {
+ return dataCollapsedReadGroup; // Table where everything except read group has been collapsed
+ } else {
+ return dataCollapsedByCovariate.get( covariate - 1 ); // Table where everything except read group, quality score, and given covariate has been collapsed
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
new file mode 100755
index 000000000..b6eeebc0c
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
@@ -0,0 +1,307 @@
+package org.broadinstitute.sting.analyzecovariates;
+
+import org.broadinstitute.sting.gatk.walkers.recalibration.*;
+import org.broadinstitute.sting.utils.PackageUtils;
+import org.broadinstitute.sting.utils.xReadLines;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.io.*;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: Dec 1, 2009
+ */
+
+public class AnalyzeCovariates {
+
+ /////////////////////////////
+ // Command Line Arguments
+ /////////////////////////////
+ private static String RECAL_FILE = "output.recal_data.csv";
+ private static String OUTPUT_DIR = "analyzeCovariates/";
+ private static String PATH_TO_RSCRIPT = "/broad/tools/apps/R-2.6.0/bin/Rscript";
+ private static String PATH_TO_RESOURCES = "R/";
+ private static int IGNORE_QSCORES_LESS_THAN = 5;
+ private static int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups
+
+ /////////////////////////////
+ // Private Member Variables
+ /////////////////////////////
+ private static AnalysisDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
+ private static ArrayList requestedCovariates; // List of covariates to be used in this calculation
+ private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
+ private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
+ private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
+
+ public static void main(String[] args) {
+
+ // parse command line arguments
+ parseArguments( args );
+ // create the output directory where all the data tables and plots will go
+ try {
+ Process p = Runtime.getRuntime().exec("mkdir " + OUTPUT_DIR);
+ } catch (IOException e) {
+ throw new RuntimeException("Couldn't create directory: " + OUTPUT_DIR);
+ }
+ if( !OUTPUT_DIR.endsWith("/") ) { OUTPUT_DIR = OUTPUT_DIR + "/"; }
+ if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; }
+
+ // initialize all the data from the csv file and allocate the list of covariates
+ System.out.println("Reading in input csv file...");
+ initializeData();
+ System.out.println("...Done!");
+
+ // output data tables for Rscript to read in
+ System.out.println("Writing out intermediate tables for R...");
+ writeDataTables();
+ System.out.println("...Done!");
+
+ // perform the analysis using Rscript and output the plots
+ System.out.println("Calling analysis R scripts and writing out figures...");
+ callRScripts();
+ System.out.println("...Done!");
+
+ }
+
+ private static void parseArguments( String[] args ) {
+ int iii = 0;
+ String arg;
+
+ try {
+ while( iii < args.length && args[iii].startsWith("-") ) {
+ arg = args[iii++];
+
+ if( arg.equals( "-recalFile" ) ) {
+ RECAL_FILE = args[iii++];
+ } else if( arg.equals( "-Rscript" ) ) {
+ PATH_TO_RSCRIPT = args[iii++];
+ } else if( arg.equals( "-resources" ) ) {
+ PATH_TO_RESOURCES = args[iii++];
+ } else if( arg.equals( "-ignoreQ" ) ) {
+ IGNORE_QSCORES_LESS_THAN = Integer.parseInt( args[iii++] );
+ } else if (arg.equals( "-numRG" ) ) {
+ NUM_READ_GROUPS_TO_PROCESS = Integer.parseInt( args[iii++] );
+ } else if( arg.equals( "-outputDir" ) ) {
+ OUTPUT_DIR = args[iii++];
+ } else {
+ iii = -1;
+ break;
+ }
+ }
+
+ if( iii != args.length ) {
+ throw new RuntimeException( "Exception" );
+ }
+ } catch(Exception e) {
+ System.out.println( "Usage: [-option param] \n" );
+ System.out.println(" Available options:");
+ System.out.println("\t-recalFile \tPath to input recal csv file. Default value: output.recal_data.csv");
+ System.out.println("\t-Rscript \t\tPath to your implementation of Rscript. Default value: /broad/tools/apps/R-2.6.0/bin/Rscript");
+ System.out.println("\t-resources \tPath to resources folder holding the Sting R scripts. Default value: R/");
+ System.out.println("\t-outputDir \tWhere to put the output plots. Default value: analyzeCovariates/");
+ System.out.println("\t-ignoreQ \t\tIgnore bases with reported quality less than this number. Default value: 5");
+ System.out.println("\t-numRG \t\tOnly process N read groups. Default value: -1 (process all read groups)");
+ System.exit(-1);
+ }
+ }
+
+ private static void initializeData() {
+
+ // Get a list of all available covariates
+ List> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
+
+ int lineNumber = 0;
+ boolean foundAllCovariates = false;
+ int estimatedCapacity = 1; // Capacity is multiplicitive so this starts at one
+
+ // Read in the covariates that were used from the input file
+ requestedCovariates = new ArrayList();
+
+ try {
+ for ( String line : new xReadLines(new File( RECAL_FILE )) ) {
+ lineNumber++;
+ if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches()) {
+ ; // Skip over the comment lines, (which start with '#')
+ }
+ else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data
+ if( foundAllCovariates ) {
+ throw new RuntimeException( "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
+ } else { // Found the covariate list in input file, loop through all of them and instantiate them
+ String[] vals = line.split(",");
+ for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
+ boolean foundClass = false;
+ for( Class> covClass : classes ) {
+ if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
+ foundClass = true;
+ try {
+ Covariate covariate = (Covariate)covClass.newInstance();
+ requestedCovariates.add( covariate );
+ estimatedCapacity *= covariate.estimatedNumberOfBins();
+
+ } catch ( InstantiationException e ) {
+ throw new RuntimeException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
+ } catch ( IllegalAccessException e ) {
+ throw new RuntimeException( String.format("Can not instantiate covariate class '%s': must have no-arg constructor.", covClass.getSimpleName()) );
+ }
+ }
+ }
+
+ if( !foundClass ) {
+ throw new RuntimeException( "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." );
+ }
+ }
+
+ }
+
+ } else { // Found a line of data
+ if( !foundAllCovariates ) {
+
+ foundAllCovariates = true;
+
+ // At this point all the covariates should have been found and initialized
+ if( requestedCovariates.size() < 2 ) {
+ throw new RuntimeException( "Malformed input recalibration file. Covariate names can't be found in file: " + RECAL_FILE );
+ }
+
+ // Don't want to crash with out of heap space exception
+ if( estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed
+ estimatedCapacity = 300 * 40 * 200;
+ }
+
+ // Initialize any covariate member variables using the shared argument collection
+ for( Covariate cov : requestedCovariates ) {
+ cov.initialize( new RecalibrationArgumentCollection() );
+ }
+
+ // Initialize the data hashMaps
+ dataManager = new AnalysisDataManager( requestedCovariates.size() );
+
+ }
+ addCSVData(line); // Parse the line and add the data to the HashMap
+ }
+ }
+
+ } catch ( FileNotFoundException e ) {
+ throw new RuntimeException("Can not find input file: " + RECAL_FILE);
+ } catch ( NumberFormatException e ) {
+ throw new RuntimeException("Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
+ }
+ }
+
+ private static void addCSVData(String line) {
+ String[] vals = line.split(",");
+
+ // Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
+ if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
+ throw new RuntimeException("Malformed input recalibration file. Found data line with too many fields: " + line +
+ " --Perhaps the read group string contains a comma and isn't being parsed correctly.");
+ }
+
+ ArrayList key = new ArrayList();
+ Covariate cov;
+ int iii;
+ for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
+ cov = requestedCovariates.get( iii );
+ key.add( cov.getValue( vals[iii] ) );
+ }
+ // Create a new datum using the number of observations, number of mismatches, and reported quality score
+ RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ) );
+ // Add that datum to all the collapsed tables which will be used in the sequential calculation
+ dataManager.addToAllTables( key, datum, IGNORE_QSCORES_LESS_THAN );
+
+ }
+
+ private static void writeDataTables() {
+
+ int numReadGroups = 0;
+
+ // for each read group
+ NHashMap readGroupTable = dataManager.getCollapsedTable(0);
+ for( List extends Comparable> readGroupKey : readGroupTable.keySet() ) {
+
+ if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) {
+ String readGroup = readGroupKey.get(0).toString();
+ RecalDatum readGroupDatum = readGroupTable.get(readGroupKey);
+ System.out.print("Writing out data tables for read group: " + readGroup + "\twith " + readGroupDatum.getNumObservations() + " observations" );
+ System.out.println("\tand aggregate residual error = " + String.format("%.3f", readGroupDatum.empiricalQualDouble(0) - readGroupDatum.getEstimatedQReported()));
+
+ // for each covariate
+ for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
+ Covariate cov = requestedCovariates.get(iii);
+
+ // Create a PrintStream
+ PrintStream output = null;
+ try {
+ output = new PrintStream(new FileOutputStream(OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat"));
+
+ } catch (FileNotFoundException e) {
+ System.err.println("Can't create file: " + OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
+ System.exit(-1);
+ }
+
+ // Output the header
+ output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases");
+
+ // Loop through the covariate table looking for keys with matching read groups
+ // BUGBUG: hopefully rewrite this to be more efficient
+ for( List extends Comparable> covariateKey : dataManager.getCollapsedTable(iii).keySet() ) {
+ if( covariateKey.get(0).toString().equals(readGroup) ) {
+ output.print( covariateKey.get(1).toString() + "\t" ); // Covariate
+ RecalDatum thisDatum = dataManager.getCollapsedTable(iii).get(covariateKey);
+ output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" ); // Qreported
+ output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0)) + "\t" ); // Qempirical
+ output.print( thisDatum.getNumMismatches() + "\t" ); // nMismatches
+ output.println( thisDatum.getNumObservations() ); // nBases
+ }
+ }
+
+ // Close the PrintStream
+ output.close();
+ }
+ } else {
+ break;
+ }
+
+ }
+ }
+
+ private static void callRScripts() {
+
+ int numReadGroups = 0;
+
+ // for each read group
+ for( List extends Comparable> readGroupList : dataManager.getCollapsedTable(0).keySet() ) {
+
+ if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) {
+
+ String readGroup = readGroupList.get(0).toString();
+ System.out.println("Analyzing read group: " + readGroup);
+
+ // for each covariate
+ for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
+ Covariate cov = requestedCovariates.get(iii);
+ try {
+ if( iii == 1 ) {
+ // Analyze reported quality
+ Process p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_QualityScoreCovariate.R" + " " +
+ OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
+ IGNORE_QSCORES_LESS_THAN); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
+ } else { // Analyze all other covariates
+ Process p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_OtherCovariate.R" + " " +
+ OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
+ cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument which is the name of the covariate to make the plots look nice
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ System.exit(-1);
+ }
+ }
+ } else {
+ break;
+ }
+ }
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
index 2b804e7c4..2e74067e4 100644
--- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
@@ -44,6 +44,7 @@ import net.sf.samtools.SAMReadGroupRecord;
*/
public class RecalDataManager {
+
public NHashMap data; // The full dataset
private NHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed
private NHashMap dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
index 8702cb85f..d972d1ddf 100644
--- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
@@ -170,7 +170,7 @@ public class TableRecalibrationWalker extends ReadWalker