diff --git a/R/plot_residualError_OtherCovariate.R b/R/plot_residualError_OtherCovariate.R new file mode 100644 index 000000000..7d7cd2df9 --- /dev/null +++ b/R/plot_residualError_OtherCovariate.R @@ -0,0 +1,55 @@ +#!/broad/tools/apps/R-2.6.0/bin/Rscript + +args <- commandArgs(TRUE) +verbose = TRUE + +input = args[1] +covariateName = args[2] + +#X11(width=7, height=14) +#outfile = paste(input, ".qual_diff_v_cycle.png", sep="") +#png(outfile, height=7, width=7, units="in", res=72) #height=1000, width=680) +outfile = paste(input, ".qual_diff_v_", covariateName, ".pdf", sep="") +pdf(outfile, height=7, width=7) +par(cex=1.1) +c <- read.table(input, header=T) +c <- c[sort.list(c[,1]),] + +d.good <- c[c$nBases >= 1000,] +d.1000 <- c[c$nBases < 1000,] +rmseBlue = sqrt(sum((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases) / sum(d.good$nBases) ) +rmseAll = sqrt(sum((c$Qempirical-c$Qreported)^2 * c$nBases) / sum(c$nBases) ) +theTitle = paste("RMSE_good = ", round(rmseBlue,digits=3), ", RMSE_all = ", round(rmseAll,digits=3)) +if( length(d.good$nBases) == length(c$nBases) ) { + theTitle = paste("RMSE = ", round(rmseAll,digits=3)) + } +if( is.numeric(c$Covariate) ) { + plot(d.good$Covariate, d.good$Qempirical-d.good$Qreported, type="p", main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", pch=16, ylim=c(-10, 10), xlim=c(min(c$Covariate),max(c$Covariate))) + points(d.1000$Covariate, d.1000$Qempirical-d.1000$Qreported, type="p", col="cornflowerblue", pch=16) +} else { + plot(c$Covariate, c$Qempirical-c$Qreported, type="l", main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", ylim=c(-10, 10)) + points(d.1000$Covariate, d.1000$Qempirical-d.1000$Qreported, type="l", col="cornflowerblue") +} +dev.off() +#points(d.1000$Cycle, d.1000$Qempirical_Qreported, type="p", col="cornflowerblue", pch=16) + +# +# Plot histogram of the covariate +# +e = d.good +f = d.1000 +outfile = paste(input, ".", covariateName,"_hist.pdf", sep="") +pdf(outfile, height=7, width=7) +hst=subset(data.frame(e$Covariate, e$nBases), e.nBases != 0) +hst2=subset(data.frame(f$Covariate, f$nBases), f.nBases != 0) +if( is.numeric(c$Covariate) ) { + plot(hst$e.Covariate, hst$e.nBases, type="h", lwd=4, main=paste(covariateName,"histogram"), xlab=covariateName, ylab="Count",yaxt="n") + points(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=4, col="cornflowerblue") + axis(2,axTicks(2), format(axTicks(2), scientific=F)) +} else { + hst=subset(data.frame(c$Covariate, c$nBases), c.nBases != 0) + plot(1:length(hst$c.Covariate), hst$c.nBases, type="h", lwd=4, main=paste(covariateName,"histogram"), xlab=covariateName, ylab="Count",yaxt="n",xaxt="n") + axis(1, at=seq(1,length(hst$c.Covariate),2), labels = hst$c.Covariate[seq(1,length(hst$c.Covariate),2)]) + axis(2,axTicks(2), format(axTicks(2), scientific=F)) +} +dev.off() \ No newline at end of file diff --git a/R/plot_residualError_QualityScoreCovariate.R b/R/plot_residualError_QualityScoreCovariate.R new file mode 100644 index 000000000..278b57efd --- /dev/null +++ b/R/plot_residualError_QualityScoreCovariate.R @@ -0,0 +1,55 @@ +#!/broad/tools/apps/R-2.6.0/bin/Rscript + +args <- commandArgs(TRUE) + +input = args[1] +Qcutoff = as.numeric(args[2]) + +t=read.table(input, header=T) +#t=read.csv(input) +#par(mfrow=c(2,1), cex=1.2) + +#outfile = paste(input, ".quality_emp_v_stated.png", sep="") +#png(outfile, height=7, width=7, units="in", res=72) # height=1000, width=446) +outfile = paste(input, ".quality_emp_v_stated.pdf", sep="") +pdf(outfile, height=7, width=7) +d.good <- t[t$nBases >= 10000 & t$Qreported >= Qcutoff,] +d.1000 <- t[t$nBases < 1000 & t$Qreported >= Qcutoff,] +d.10000 <- t[t$nBases < 10000 & t$nBases >= 1000 & t$Qreported >= Qcutoff,] +f <- t[t$Qreported < Qcutoff,] +e <- rbind(d.good, d.1000, d.10000) +rmseBlue = sqrt(sum((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases) / sum(d.good$nBases) ) +rmseAll = sqrt(sum((e$Qempirical-e$Qreported)^2 * e$nBases) / sum(e$nBases) ) +theTitle = paste("RMSE_good = ", round(rmseBlue,digits=3), ", RMSE_all = ", round(rmseAll,digits=3)) +if (length(t$nBases) == length(d.good$nBases) ) { + theTitle = paste("RMSE = ", round(rmseAll,digits=3)); + } +plot(d.good$Qreported, d.good$Qempirical, type="p", col="blue", main=theTitle, xlim=c(0,40), ylim=c(0,40), pch=16, xlab="Reported quality score", ylab="Empirical quality score") +points(d.1000$Qreported, d.1000$Qempirical, type="p", col="lightblue", pch=16) +points(d.10000$Qreported, d.10000$Qempirical, type="p", col="cornflowerblue", pch=16) +points(f$Qreported, f$Qempirical, type="p", col="maroon1", pch=16) +abline(0,1, lty=2) +dev.off() + +#outfile = paste(input, ".quality_emp_hist.png", sep="") +#png(outfile, height=7, width=7, units="in", res=72) # height=1000, width=446) +outfile = paste(input, ".quality_emp_hist.pdf", sep="") +pdf(outfile, height=7, width=7) +hst=subset(data.frame(e$Qempirical, e$nBases), e.nBases != 0) +hst2=subset(data.frame(f$Qempirical, f$nBases), f.nBases != 0) +plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,40), main="Empirical quality score histogram", xlab="Empirical quality score", ylab="Count",yaxt="n") +points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1") +axis(2,axTicks(2), format(axTicks(2), scientific=F)) +dev.off() + +# +# Plot Q reported histogram +# +outfile = paste(input, ".quality_rep_hist.pdf", sep="") +pdf(outfile, height=7, width=7) +hst=subset(data.frame(e$Qreported, e$nBases), e.nBases != 0) +hst2=subset(data.frame(f$Qreported, f$nBases), f.nBases != 0) +plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,40), main="Reported quality score histogram", xlab="Reported quality score", ylab="Count",yaxt="n") +points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1") +axis(2,axTicks(2), format(axTicks(2), scientific=F)) +dev.off() diff --git a/R/plot_q_emp_stated_hst.R b/archive/R/plot_q_emp_stated_hst.R similarity index 100% rename from R/plot_q_emp_stated_hst.R rename to archive/R/plot_q_emp_stated_hst.R diff --git a/R/plot_qual_diff_v_cycle.R b/archive/R/plot_qual_diff_v_cycle.R similarity index 100% rename from R/plot_qual_diff_v_cycle.R rename to archive/R/plot_qual_diff_v_cycle.R diff --git a/R/plot_qual_diff_v_dinuc.R b/archive/R/plot_qual_diff_v_dinuc.R similarity index 100% rename from R/plot_qual_diff_v_dinuc.R rename to archive/R/plot_qual_diff_v_dinuc.R diff --git a/python/analyzeRecalQuals.py b/archive/python/analyzeRecalQuals.py similarity index 100% rename from python/analyzeRecalQuals.py rename to archive/python/analyzeRecalQuals.py diff --git a/python/analyzeRecalQuals_1KG.py b/archive/python/analyzeRecalQuals_1KG.py similarity index 100% rename from python/analyzeRecalQuals_1KG.py rename to archive/python/analyzeRecalQuals_1KG.py diff --git a/build.xml b/build.xml index ef2121a3f..9c9a9c10d 100644 --- a/build.xml +++ b/build.xml @@ -114,12 +114,13 @@ - + - + + - + @@ -150,7 +151,7 @@ - + diff --git a/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java b/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java new file mode 100755 index 000000000..3c5e12ebd --- /dev/null +++ b/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java @@ -0,0 +1,83 @@ +package org.broadinstitute.sting.analyzecovariates; + +import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum; +import org.broadinstitute.sting.gatk.walkers.recalibration.NHashMap; + +import java.util.ArrayList; +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: Dec 1, 2009 + */ + +public class AnalysisDataManager { + + private NHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed + private ArrayList> dataCollapsedByCovariate; // Tables where everything except read group and given covariate has been collapsed + + AnalysisDataManager() { + } + + AnalysisDataManager( final int numCovariates ) { + dataCollapsedReadGroup = new NHashMap(); + dataCollapsedByCovariate = new ArrayList>(); + for( int iii = 0; iii < numCovariates - 1; iii++ ) { // readGroup isn't counted here, its table is separate + dataCollapsedByCovariate.add( new NHashMap() ); + } + } + + /** + * Add the given mapping to all of the collapsed hash tables + * @param key The list of comparables that is the key for this mapping + * @param fullDatum The RecalDatum which is the data for this mapping + */ + public final void addToAllTables( final List key, final RecalDatum fullDatum, final int IGNORE_QSCORES_LESS_THAN ) { + + int qscore = Integer.parseInt( key.get(1).toString() ); + ArrayList newKey; + RecalDatum collapsedDatum; + + if( !(qscore < IGNORE_QSCORES_LESS_THAN) ) { + // Create dataCollapsedReadGroup, the table where everything except read group has been collapsed + newKey = new ArrayList(); + newKey.add( key.get(0) ); // Make a new key with just the read group + collapsedDatum = dataCollapsedReadGroup.get( newKey ); + if( collapsedDatum == null ) { + dataCollapsedReadGroup.put( newKey, new RecalDatum(fullDatum) ); + } else { + collapsedDatum.combine( fullDatum ); // using combine instead of increment in order to calculate overall aggregateQReported + } + } + + // Create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed + for( int iii = 0; iii < dataCollapsedByCovariate.size(); iii++ ) { + if( iii == 0 || !(qscore < IGNORE_QSCORES_LESS_THAN) ) { // use all data for the plot versus reported quality, but not for the other plots versus cycle etc. + newKey = new ArrayList(); + newKey.add( key.get(0) ); // Make a new key with the read group ... + newKey.add( key.get(iii + 1) ); // and the given covariate + collapsedDatum = dataCollapsedByCovariate.get(iii).get( newKey ); + if( collapsedDatum == null ) { + dataCollapsedByCovariate.get(iii).put( newKey, new RecalDatum(fullDatum) ); + } else { + collapsedDatum.combine( fullDatum ); + } + } + } + } + + /** + * Get the appropriate collapsed table out of the set of all the tables held by this Object + * @param covariate Which covariate indexes the desired collapsed HashMap + * @return The desired collapsed HashMap + */ + public final NHashMap getCollapsedTable( final int covariate ) { + if( covariate == 0) { + return dataCollapsedReadGroup; // Table where everything except read group has been collapsed + } else { + return dataCollapsedByCovariate.get( covariate - 1 ); // Table where everything except read group, quality score, and given covariate has been collapsed + } + } + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java new file mode 100755 index 000000000..b6eeebc0c --- /dev/null +++ b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -0,0 +1,307 @@ +package org.broadinstitute.sting.analyzecovariates; + +import org.broadinstitute.sting.gatk.walkers.recalibration.*; +import org.broadinstitute.sting.utils.PackageUtils; +import org.broadinstitute.sting.utils.xReadLines; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; +import java.io.*; + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: Dec 1, 2009 + */ + +public class AnalyzeCovariates { + + ///////////////////////////// + // Command Line Arguments + ///////////////////////////// + private static String RECAL_FILE = "output.recal_data.csv"; + private static String OUTPUT_DIR = "analyzeCovariates/"; + private static String PATH_TO_RSCRIPT = "/broad/tools/apps/R-2.6.0/bin/Rscript"; + private static String PATH_TO_RESOURCES = "R/"; + private static int IGNORE_QSCORES_LESS_THAN = 5; + private static int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups + + ///////////////////////////// + // Private Member Variables + ///////////////////////////// + private static AnalysisDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps + private static ArrayList requestedCovariates; // List of covariates to be used in this calculation + private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*"); + private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*"); + private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*"); + + public static void main(String[] args) { + + // parse command line arguments + parseArguments( args ); + // create the output directory where all the data tables and plots will go + try { + Process p = Runtime.getRuntime().exec("mkdir " + OUTPUT_DIR); + } catch (IOException e) { + throw new RuntimeException("Couldn't create directory: " + OUTPUT_DIR); + } + if( !OUTPUT_DIR.endsWith("/") ) { OUTPUT_DIR = OUTPUT_DIR + "/"; } + if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; } + + // initialize all the data from the csv file and allocate the list of covariates + System.out.println("Reading in input csv file..."); + initializeData(); + System.out.println("...Done!"); + + // output data tables for Rscript to read in + System.out.println("Writing out intermediate tables for R..."); + writeDataTables(); + System.out.println("...Done!"); + + // perform the analysis using Rscript and output the plots + System.out.println("Calling analysis R scripts and writing out figures..."); + callRScripts(); + System.out.println("...Done!"); + + } + + private static void parseArguments( String[] args ) { + int iii = 0; + String arg; + + try { + while( iii < args.length && args[iii].startsWith("-") ) { + arg = args[iii++]; + + if( arg.equals( "-recalFile" ) ) { + RECAL_FILE = args[iii++]; + } else if( arg.equals( "-Rscript" ) ) { + PATH_TO_RSCRIPT = args[iii++]; + } else if( arg.equals( "-resources" ) ) { + PATH_TO_RESOURCES = args[iii++]; + } else if( arg.equals( "-ignoreQ" ) ) { + IGNORE_QSCORES_LESS_THAN = Integer.parseInt( args[iii++] ); + } else if (arg.equals( "-numRG" ) ) { + NUM_READ_GROUPS_TO_PROCESS = Integer.parseInt( args[iii++] ); + } else if( arg.equals( "-outputDir" ) ) { + OUTPUT_DIR = args[iii++]; + } else { + iii = -1; + break; + } + } + + if( iii != args.length ) { + throw new RuntimeException( "Exception" ); + } + } catch(Exception e) { + System.out.println( "Usage: [-option param] \n" ); + System.out.println(" Available options:"); + System.out.println("\t-recalFile \tPath to input recal csv file. Default value: output.recal_data.csv"); + System.out.println("\t-Rscript \t\tPath to your implementation of Rscript. Default value: /broad/tools/apps/R-2.6.0/bin/Rscript"); + System.out.println("\t-resources \tPath to resources folder holding the Sting R scripts. Default value: R/"); + System.out.println("\t-outputDir \tWhere to put the output plots. Default value: analyzeCovariates/"); + System.out.println("\t-ignoreQ \t\tIgnore bases with reported quality less than this number. Default value: 5"); + System.out.println("\t-numRG \t\tOnly process N read groups. Default value: -1 (process all read groups)"); + System.exit(-1); + } + } + + private static void initializeData() { + + // Get a list of all available covariates + List> classes = PackageUtils.getClassesImplementingInterface(Covariate.class); + + int lineNumber = 0; + boolean foundAllCovariates = false; + int estimatedCapacity = 1; // Capacity is multiplicitive so this starts at one + + // Read in the covariates that were used from the input file + requestedCovariates = new ArrayList(); + + try { + for ( String line : new xReadLines(new File( RECAL_FILE )) ) { + lineNumber++; + if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches()) { + ; // Skip over the comment lines, (which start with '#') + } + else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data + if( foundAllCovariates ) { + throw new RuntimeException( "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE ); + } else { // Found the covariate list in input file, loop through all of them and instantiate them + String[] vals = line.split(","); + for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical + boolean foundClass = false; + for( Class covClass : classes ) { + if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) { + foundClass = true; + try { + Covariate covariate = (Covariate)covClass.newInstance(); + requestedCovariates.add( covariate ); + estimatedCapacity *= covariate.estimatedNumberOfBins(); + + } catch ( InstantiationException e ) { + throw new RuntimeException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) ); + } catch ( IllegalAccessException e ) { + throw new RuntimeException( String.format("Can not instantiate covariate class '%s': must have no-arg constructor.", covClass.getSimpleName()) ); + } + } + } + + if( !foundClass ) { + throw new RuntimeException( "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." ); + } + } + + } + + } else { // Found a line of data + if( !foundAllCovariates ) { + + foundAllCovariates = true; + + // At this point all the covariates should have been found and initialized + if( requestedCovariates.size() < 2 ) { + throw new RuntimeException( "Malformed input recalibration file. Covariate names can't be found in file: " + RECAL_FILE ); + } + + // Don't want to crash with out of heap space exception + if( estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed + estimatedCapacity = 300 * 40 * 200; + } + + // Initialize any covariate member variables using the shared argument collection + for( Covariate cov : requestedCovariates ) { + cov.initialize( new RecalibrationArgumentCollection() ); + } + + // Initialize the data hashMaps + dataManager = new AnalysisDataManager( requestedCovariates.size() ); + + } + addCSVData(line); // Parse the line and add the data to the HashMap + } + } + + } catch ( FileNotFoundException e ) { + throw new RuntimeException("Can not find input file: " + RECAL_FILE); + } catch ( NumberFormatException e ) { + throw new RuntimeException("Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker."); + } + } + + private static void addCSVData(String line) { + String[] vals = line.split(","); + + // Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly + if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical + throw new RuntimeException("Malformed input recalibration file. Found data line with too many fields: " + line + + " --Perhaps the read group string contains a comma and isn't being parsed correctly."); + } + + ArrayList key = new ArrayList(); + Covariate cov; + int iii; + for( iii = 0; iii < requestedCovariates.size(); iii++ ) { + cov = requestedCovariates.get( iii ); + key.add( cov.getValue( vals[iii] ) ); + } + // Create a new datum using the number of observations, number of mismatches, and reported quality score + RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ) ); + // Add that datum to all the collapsed tables which will be used in the sequential calculation + dataManager.addToAllTables( key, datum, IGNORE_QSCORES_LESS_THAN ); + + } + + private static void writeDataTables() { + + int numReadGroups = 0; + + // for each read group + NHashMap readGroupTable = dataManager.getCollapsedTable(0); + for( List readGroupKey : readGroupTable.keySet() ) { + + if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) { + String readGroup = readGroupKey.get(0).toString(); + RecalDatum readGroupDatum = readGroupTable.get(readGroupKey); + System.out.print("Writing out data tables for read group: " + readGroup + "\twith " + readGroupDatum.getNumObservations() + " observations" ); + System.out.println("\tand aggregate residual error = " + String.format("%.3f", readGroupDatum.empiricalQualDouble(0) - readGroupDatum.getEstimatedQReported())); + + // for each covariate + for( int iii = 1; iii < requestedCovariates.size(); iii++ ) { + Covariate cov = requestedCovariates.get(iii); + + // Create a PrintStream + PrintStream output = null; + try { + output = new PrintStream(new FileOutputStream(OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat")); + + } catch (FileNotFoundException e) { + System.err.println("Can't create file: " + OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat"); + System.exit(-1); + } + + // Output the header + output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases"); + + // Loop through the covariate table looking for keys with matching read groups + // BUGBUG: hopefully rewrite this to be more efficient + for( List covariateKey : dataManager.getCollapsedTable(iii).keySet() ) { + if( covariateKey.get(0).toString().equals(readGroup) ) { + output.print( covariateKey.get(1).toString() + "\t" ); // Covariate + RecalDatum thisDatum = dataManager.getCollapsedTable(iii).get(covariateKey); + output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" ); // Qreported + output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0)) + "\t" ); // Qempirical + output.print( thisDatum.getNumMismatches() + "\t" ); // nMismatches + output.println( thisDatum.getNumObservations() ); // nBases + } + } + + // Close the PrintStream + output.close(); + } + } else { + break; + } + + } + } + + private static void callRScripts() { + + int numReadGroups = 0; + + // for each read group + for( List readGroupList : dataManager.getCollapsedTable(0).keySet() ) { + + if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) { + + String readGroup = readGroupList.get(0).toString(); + System.out.println("Analyzing read group: " + readGroup); + + // for each covariate + for( int iii = 1; iii < requestedCovariates.size(); iii++ ) { + Covariate cov = requestedCovariates.get(iii); + try { + if( iii == 1 ) { + // Analyze reported quality + Process p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_QualityScoreCovariate.R" + " " + + OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " + + IGNORE_QSCORES_LESS_THAN); // The third argument is the Q scores that should be turned pink in the plot because they were ignored + } else { // Analyze all other covariates + Process p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_OtherCovariate.R" + " " + + OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " + + cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument which is the name of the covariate to make the plots look nice + } + } catch (IOException e) { + e.printStackTrace(); + System.exit(-1); + } + } + } else { + break; + } + } + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index 2b804e7c4..2e74067e4 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -44,6 +44,7 @@ import net.sf.samtools.SAMReadGroupRecord; */ public class RecalDataManager { + public NHashMap data; // The full dataset private NHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed private NHashMap dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 8702cb85f..d972d1ddf 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -170,7 +170,7 @@ public class TableRecalibrationWalker extends ReadWalker