Updated version of the recalibration tool

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1060 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-06-19 17:45:47 +00:00
parent aef519b427
commit 8ac40e8e2d
11 changed files with 699 additions and 176 deletions

View File

@ -19,8 +19,8 @@ import java.io.FileNotFoundException;
@WalkerName("CountCovariates")
public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
@Argument(fullName="maxReadLen", shortName="mrl", doc="max read length", required=false)
public int maxReadLen = 101;
@Argument(fullName="buggyMaxReadLen", doc="If we see a read longer than this, we assume there's a bug and abort", required=false)
public int buggyMaxReadLen = 100000;
//@Argument(fullName="MAX_QUAL_SCORE", shortName="mqs", doc="max quality score", required=false)
//public int MAX_QUAL_SCORE = 63;
@ -28,11 +28,11 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
@Argument(fullName="OUTPUT_FILEROOT", shortName="outroot", required=false, doc="Filename root for the outputted logistic regression training files")
public String OUTPUT_FILEROOT = "output";
@Argument(fullName="CREATE_TRAINING_DATA", shortName="trainingdata", required=false, doc="Create training data files for logistic regression")
public boolean CREATE_TRAINING_DATA = true;
//@Argument(fullName="CREATE_TRAINING_DATA", shortName="trainingdata", required=false, doc="Create training data files for logistic regression")
//public boolean CREATE_TRAINING_DATA = true;
@Argument(fullName="DOWNSAMPLE_FRACTION", shortName="sample", required=false, doc="Fraction of bases to randomly sample")
public float DOWNSAMPLE_FRACTION=1.0f;
//@Argument(fullName="DOWNSAMPLE_FRACTION", shortName="sample", required=false, doc="Fraction of bases to randomly sample")
//public float DOWNSAMPLE_FRACTION=1.0f;
@Argument(fullName="MIN_MAPPING_QUALITY", shortName="minmap", required=false, doc="Only use reads with at least this quality score")
public int MIN_MAPPING_QUALITY = 0;
@ -47,8 +47,11 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
public List<String> platforms = Collections.singletonList("*");
//public List<String> platforms = Collections.singletonList("ILLUMINA");
@Argument(fullName="rawData", shortName="raw", required=false, doc="If true, raw mismatch observations will be output to a file")
public boolean outputRawData = true;
//@Argument(fullName="rawData", shortName="raw", required=false, doc="If true, raw mismatch observations will be output to a file")
//public boolean outputRawData = true;
@Argument(fullName="writeDetailedAnalysisFiles", required=false, doc="If true, we'll write detailed analysis files out at the end of covariate calcuilations")
public boolean writeDetailedAnalysisFiles = false;
@Argument(fullName="collapsePos", shortName="collapsePos", required=false, doc="")
public boolean collapsePos = false;
@ -76,9 +79,11 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
if( !isSupportedReadGroup(readGroup) )
continue;
String rg = readGroup.getReadGroupId();
RecalDataManager manager = new RecalDataManager(rg, maxReadLen, QualityUtils.MAX_QUAL_SCORE+1, RecalData.NDINUCS, ! collapsePos, ! collapseDinuc );
//RecalDataManager manager = new RecalDataManager(rg, maxReadLen, QualityUtils.MAX_QUAL_SCORE+1, RecalData.NDINUCS, ! collapsePos, ! collapseDinuc );
RecalDataManager manager = new RecalDataManager(rg, ! collapsePos, ! collapseDinuc );
data.put(rg, manager);
}
out.printf("Created recalibration data collectors for %d read group(s)%n", data.size());
}
private RecalData getRecalData(String readGroup, int pos, int qual, char prevBase, char base) {
@ -100,7 +105,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
for (int i =0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i);
if ( read.getReadLength() > maxReadLen ) {
if ( read.getReadLength() > buggyMaxReadLen ) {
throw new RuntimeException("Expectedly long read, please increase maxium read len with maxReadLen parameter: " + read.format());
}
@ -140,7 +145,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
}
int qual = quals[offset];
if ( qual > 0 && qual <= QualityUtils.MAX_QUAL_SCORE ) {
if ( qual > 0 ) { // && qual <= QualityUtils.MAX_QUAL_SCORE ) {
// previous base is the next base in terms of machine chemistry if this is a negative strand
// if ( qual == 2 )
//if ( read.getReadName().equals("30PTAAAXX090126:5:14:132:764#0") )
@ -156,6 +161,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
}
public void onTraversalDone(Integer result) {
/*
PrintStream covars_out;
try {
covars_out = new PrintStream(OUTPUT_FILEROOT+".covars.out");
@ -168,14 +174,11 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
} catch (FileNotFoundException e) {
System.err.println("FileNotFoundException: " + e.getMessage());
}
qualityEmpiricalObserved();
qualityDiffVsCycle();
qualityDiffVsDinucleotide();
*/
printInfo(out);
if (CREATE_TRAINING_DATA) writeTrainingData();
writeTrainingData();
writeAnalysisData();
}
private void printInfo(PrintStream out) {
@ -190,8 +193,17 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
private void writeTrainingData() {
out.printf("Writing raw recalibration data%n");
writeRawTable();
out.printf("...done%n");
out.printf("Writing logistic recalibration data%n");
writeLogisticRecalibrationTable();
out.printf("...done%n");
}
private void writeLogisticRecalibrationTable() {
PrintStream dinuc_out = null;
PrintStream table_out = null;
try {
dinuc_out = new PrintStream( OUTPUT_FILEROOT+".covariate_counts.csv");
dinuc_out.println("rg,dn,logitQ,pos,indicator,count");
@ -207,28 +219,62 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
}
}
}
if ( outputRawData ) {
table_out = new PrintStream( OUTPUT_FILEROOT+".raw_data.csv");
printInfo(table_out);
table_out.println("rg,dn,Qrep,pos,NBases,MMismatches,Qemp");
for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) {
for ( RecalData datum: getRecalData(readGroup.getReadGroupId()) ) {
if ( datum.N > 0 )
table_out.format("%s%n", datum.toCSVString(collapsePos));
}
}
}
}
catch (FileNotFoundException e) {
System.err.println("FileNotFoundException: " + e.getMessage());
return;
}
finally {
if (dinuc_out != null) dinuc_out.close();
if (table_out != null) table_out.close();
}
}
private void writeRawTable() {
PrintStream table_out = null;
try {
table_out = new PrintStream( OUTPUT_FILEROOT+".raw_data.csv");
printInfo(table_out);
table_out.println("rg,dn,Qrep,pos,NBases,MMismatches,Qemp");
for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) {
for ( RecalData datum: getRecalData(readGroup.getReadGroupId()) ) {
if ( datum.N > 0 )
table_out.format("%s%n", datum.toCSVString(collapsePos));
}
}
} catch (FileNotFoundException e ) {
System.err.println("FileNotFoundException: " + e.getMessage());
}
finally {
if (table_out != null) table_out.close();
}
}
// --------------------------------------------------------------------------------------------------------------
//
// Analysis data
//
// --------------------------------------------------------------------------------------------------------------
private void writeAnalysisData() {
if ( writeDetailedAnalysisFiles ) {
for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) {
PrintStream DiffVsCycleFile = null, DiffVsDinucFile = null, EmpObsFile = null;
try {
DiffVsCycleFile = new PrintStream(OUTPUT_FILEROOT+".RG_"+readGroup.getReadGroupId()+".quality_difference_v_cycle.csv");
DiffVsDinucFile = new PrintStream(OUTPUT_FILEROOT+".RG_"+readGroup.getReadGroupId()+".quality_difference_v_dinucleotide.csv");
EmpObsFile = new PrintStream(OUTPUT_FILEROOT+".RG_"+readGroup.getReadGroupId()+".empirical_v_reported_quality.csv");
qualityEmpiricalObserved(EmpObsFile, readGroup.getReadGroupId());
qualityDiffVsCycle(DiffVsCycleFile, readGroup.getReadGroupId());
qualityDiffVsDinucleotide(DiffVsDinucFile, readGroup.getReadGroupId());
} catch (FileNotFoundException e){
throw new RuntimeException("Could not open output files based on OUTPUT_FILEROOT option: " + OUTPUT_FILEROOT, e);
} finally {
if ( DiffVsCycleFile != null) DiffVsCycleFile.close();
if ( DiffVsDinucFile != null) DiffVsDinucFile.close();
if ( EmpObsFile != null) EmpObsFile.close();
}
}
}
}
class MeanReportedQuality {
@ -249,114 +295,83 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
}
}
public void qualityDiffVsCycle() {
for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) {
PrintStream ByCycleFile = null;
try {
ByCycleFile = new PrintStream(OUTPUT_FILEROOT+".RG_"+readGroup.getReadGroupId()+".quality_difference_v_cycle.csv");
} catch (FileNotFoundException e){
System.out.println("Could not open output files based on OUTPUT_FILEROOT option: " + OUTPUT_FILEROOT);
System.exit(1);
}
ArrayList<RecalData> ByCycle = new ArrayList<RecalData>();
ArrayList<MeanReportedQuality> ByCycleReportedQ = new ArrayList<MeanReportedQuality>();
ByCycleFile.printf("cycle,Qemp-obs,Qemp,Qobs,B,N%n");
RecalData All = new RecalData(0,0,readGroup.getReadGroupId(),"");
MeanReportedQuality AllReported = new MeanReportedQuality();
for (int c=0; c < maxReadLen+1; c++) {
ByCycle.add(new RecalData(c, -1,readGroup.getReadGroupId(),"-"));
ByCycleReportedQ.add(new MeanReportedQuality());
}
for ( RecalData datum: getRecalData(readGroup.getReadGroupId()) ) {
ByCycle.get(datum.pos).inc(datum.N, datum.B);
ByCycleReportedQ.get(datum.pos).inc(datum.qual, datum.N);
All.inc(datum.N, datum.B);
AllReported.inc(datum.qual, datum.N);
}
for (int c=0; c < maxReadLen+1; c++) {
double empiricalQual = -10 * Math.log10((double)ByCycle.get(c).B / ByCycle.get(c).N);
double reportedQual = ByCycleReportedQ.get(c).result();
ByCycleFile.printf("%d, %f, %f, %f, %d, %d%n", c, empiricalQual-reportedQual, empiricalQual, reportedQual, ByCycle.get(c).B, ByCycle.get(c).N);
}
public void qualityDiffVsCycle(PrintStream file, final String readGroup) {
ArrayList<RecalData> ByCycle = new ArrayList<RecalData>();
ArrayList<MeanReportedQuality> ByCycleReportedQ = new ArrayList<MeanReportedQuality>();
file.printf("cycle,Qemp-obs,Qemp,Qobs,B,N%n");
RecalData All = new RecalData(0,0,readGroup,"");
MeanReportedQuality AllReported = new MeanReportedQuality();
RecalDataManager manager = data.get(readGroup);
int maxReadLen = manager.getMaxReadLen();
for (int c=0; c < maxReadLen+1; c++) {
ByCycle.add(new RecalData(c, -1,readGroup,"-"));
ByCycleReportedQ.add(new MeanReportedQuality());
}
for ( RecalData datum: getRecalData(readGroup) ) {
ByCycle.get(datum.pos).inc(datum.N, datum.B);
ByCycleReportedQ.get(datum.pos).inc(datum.qual, datum.N);
All.inc(datum.N, datum.B);
AllReported.inc(datum.qual, datum.N);
}
for (int c=0; c < maxReadLen+1; c++) {
double empiricalQual = -10 * Math.log10((double)ByCycle.get(c).B / ByCycle.get(c).N);
double reportedQual = ByCycleReportedQ.get(c).result();
file.printf("%d, %f, %f, %f, %d, %d%n", c, empiricalQual-reportedQual, empiricalQual, reportedQual, ByCycle.get(c).B, ByCycle.get(c).N);
}
//System.out.printf("Cycle: N=%d, B=%d, Qemp=%.1f, ", All.N, All.B, -10 * Math.log10((double)All.B/All.N));
//System.out.printf("Qrep=%.1f%n", AllReported.result());
}
public void qualityDiffVsDinucleotide() {
for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) {
PrintStream ByDinucFile = null;
try {
ByDinucFile = new PrintStream(OUTPUT_FILEROOT+".RG_"+readGroup.getReadGroupId()+".quality_difference_v_dinucleotide.csv");
} catch (FileNotFoundException e){
System.out.println("Could not open output files based on OUTPUT_FILEROOT option: " + OUTPUT_FILEROOT);
System.exit(1);
}
ArrayList<RecalData> ByCycle = new ArrayList<RecalData>();
ArrayList<MeanReportedQuality> ByCycleReportedQ = new ArrayList<MeanReportedQuality>();
ByDinucFile.printf("dinuc,Qemp-obs,Qemp,Qobs,B,N%n");
RecalData All = new RecalData(0,0,readGroup.getReadGroupId(),"");
MeanReportedQuality AllReported = new MeanReportedQuality();
for (int c=0; c < RecalData.NDINUCS; c++) {
ByCycle.add(new RecalData(-1, -1,readGroup.getReadGroupId(),RecalData.dinucIndex2bases(c)));
ByCycleReportedQ.add(new MeanReportedQuality());
}
for ( RecalData datum: getRecalData(readGroup.getReadGroupId()) ) {
int dinucIndex = RecalData.dinucIndex(datum.dinuc); //bases2dinucIndex(datum.dinuc.charAt(0), datum.dinuc.charAt(1), false);
ByCycle.get(dinucIndex).inc(datum.N, datum.B);
ByCycleReportedQ.get(dinucIndex).inc(datum.qual, datum.N);
All.inc(datum.N, datum.B);
AllReported.inc(datum.qual, datum.N);
}
for (int c=0; c < RecalData.NDINUCS; c++) {
double empiricalQual = -10 * Math.log10((double)ByCycle.get(c).B / ByCycle.get(c).N);
double reportedQual = ByCycleReportedQ.get(c).result();
ByDinucFile.printf("%s, %f, %f, %f, %d, %d%n", ByCycle.get(c).dinuc, empiricalQual-reportedQual, empiricalQual, reportedQual, ByCycle.get(c).B, ByCycle.get(c).N);
}
public void qualityDiffVsDinucleotide(PrintStream file, final String readGroup) {
ArrayList<RecalData> ByCycle = new ArrayList<RecalData>();
ArrayList<MeanReportedQuality> ByCycleReportedQ = new ArrayList<MeanReportedQuality>();
file.printf("dinuc,Qemp-obs,Qemp,Qobs,B,N%n");
RecalData All = new RecalData(0,0,readGroup,"");
MeanReportedQuality AllReported = new MeanReportedQuality();
for (int c=0; c < RecalData.NDINUCS; c++) {
ByCycle.add(new RecalData(-1, -1,readGroup,RecalData.dinucIndex2bases(c)));
ByCycleReportedQ.add(new MeanReportedQuality());
}
for ( RecalData datum: getRecalData(readGroup) ) {
int dinucIndex = RecalData.dinucIndex(datum.dinuc); //bases2dinucIndex(datum.dinuc.charAt(0), datum.dinuc.charAt(1), false);
ByCycle.get(dinucIndex).inc(datum.N, datum.B);
ByCycleReportedQ.get(dinucIndex).inc(datum.qual, datum.N);
All.inc(datum.N, datum.B);
AllReported.inc(datum.qual, datum.N);
}
for (int c=0; c < RecalData.NDINUCS; c++) {
double empiricalQual = -10 * Math.log10((double)ByCycle.get(c).B / ByCycle.get(c).N);
double reportedQual = ByCycleReportedQ.get(c).result();
file.printf("%s, %f, %f, %f, %d, %d%n", ByCycle.get(c).dinuc, empiricalQual-reportedQual, empiricalQual, reportedQual, ByCycle.get(c).B, ByCycle.get(c).N);
}
//System.out.printf("Dinuc: N=%d, B=%d, Qemp=%.1f, ", All.N, All.B, -10 * Math.log10((double)All.B/All.N));
//System.out.printf("Qrep=%.1f%n", AllReported.result());
}
public void qualityEmpiricalObserved() {
for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) {
PrintStream ByQualFile = null;
try {
ByQualFile = new PrintStream(OUTPUT_FILEROOT+".RG_"+readGroup.getReadGroupId()+".empirical_v_reported_quality.csv");
} catch (FileNotFoundException e){
System.out.println("Could not open output files based on OUTPUT_FILEROOT option: " + OUTPUT_FILEROOT);
System.exit(1);
}
ArrayList<RecalData> ByQ = new ArrayList<RecalData>();
ArrayList<MeanReportedQuality> ByQReportedQ = new ArrayList<MeanReportedQuality>();
ByQualFile.printf("Qrep,Qemp,Qrep_avg,B,N%n");
RecalData All = new RecalData(0,0,readGroup.getReadGroupId(),"");
MeanReportedQuality AllReported = new MeanReportedQuality();
for (int q=0; q<QualityUtils.MAX_QUAL_SCORE+1; q++) {
ByQ.add(new RecalData(-1,q,readGroup.getReadGroupId(),"-"));
ByQReportedQ.add(new MeanReportedQuality());
}
for ( RecalData datum: getRecalData(readGroup.getReadGroupId()) ){
ByQ.get(datum.qual).inc(datum.N, datum.B);
ByQReportedQ.get(datum.qual).inc(datum.qual, datum.N);
All.inc(datum.N, datum.B);
AllReported.inc(datum.qual, datum.N);
//out.printf("%2d%6d%3d %2d %s%n", datum.qual, datum.N, datum.pos, datum.qual, datum.dinuc);
}
for (int q=0; q<QualityUtils.MAX_QUAL_SCORE; q++) {
double empiricalQual = -10 * Math.log10((double)ByQ.get(q).B / ByQ.get(q).N);
ByQualFile.printf("%3d, %2.2f, %2.2f, %12d, %12d%n", q, empiricalQual, ByQReportedQ.get(q).result(), ByQ.get(q).B, ByQ.get(q).N);
//out.printf("%3d,%s,%3d,%5.1f,%5.1f,%6d,%6d", pos, dinuc, qual, empiricalQual, qual-empiricalQual, N, B); n
}
public void qualityEmpiricalObserved(PrintStream file, final String readGroup) {
ArrayList<RecalData> ByQ = new ArrayList<RecalData>();
ArrayList<MeanReportedQuality> ByQReportedQ = new ArrayList<MeanReportedQuality>();
file.printf("Qrep,Qemp,Qrep_avg,B,N%n");
RecalData All = new RecalData(0,0,readGroup,"");
MeanReportedQuality AllReported = new MeanReportedQuality();
for (int q=0; q<QualityUtils.MAX_QUAL_SCORE+1; q++) {
ByQ.add(new RecalData(-1,q,readGroup,"-"));
ByQReportedQ.add(new MeanReportedQuality());
}
for ( RecalData datum: getRecalData(readGroup) ){
ByQ.get(datum.qual).inc(datum.N, datum.B);
ByQReportedQ.get(datum.qual).inc(datum.qual, datum.N);
All.inc(datum.N, datum.B);
AllReported.inc(datum.qual, datum.N);
//out.printf("%2d%6d%3d %2d %s%n", datum.qual, datum.N, datum.pos, datum.qual, datum.dinuc);
}
for (int q=0; q<QualityUtils.MAX_QUAL_SCORE; q++) {
double empiricalQual = -10 * Math.log10((double)ByQ.get(q).B / ByQ.get(q).N);
file.printf("%3d, %2.2f, %2.2f, %12d, %12d%n", q, empiricalQual, ByQReportedQ.get(q).result(), ByQ.get(q).B, ByQ.get(q).N);
//out.printf("%3d,%s,%3d,%5.1f,%5.1f,%6d,%6d", pos, dinuc, qual, empiricalQual, qual-empiricalQual, N, B); n
}
//System.out.printf("Emp-Obs: N=%d, B=%d, Qemp=%.1f, ", All.N, All.B, -10 * Math.log10((double)All.B/All.N));
//System.out.printf("Qrep=%.1f%n", AllReported.result());
}
public Integer reduceInit() {
@ -367,12 +382,6 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
return 0;
}
Random random_genrator;
// Print out data for regression
public CovariateCounterWalker() throws FileNotFoundException {
random_genrator = new Random(123454321); // keep same random seed while debugging
}
/**
* Check to see whether this read group should be processed.
* @param readGroup

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.recalibration;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.ExpandingArrayList;
import java.util.*;
@ -13,20 +14,28 @@ import java.util.*;
*/
public class RecalDataManager {
ArrayList<RecalData> flattenData = new ArrayList<RecalData>();
RecalData[][][] data = null;
boolean trackPos, trackDinuc;
String readGroup;
int nDinucs, maxReadLen;
//RecalData[][][] data = null;
ExpandingArrayList<ExpandingArrayList<ExpandingArrayList<RecalData>>> data = null;
public RecalDataManager(String readGroup,
int maxReadLen, int maxQual, int nDinucs,
//int maxReadLen, int maxQual, int nDinucs,
boolean trackPos, boolean trackDinuc) {
data = new RecalData[maxReadLen+1][maxQual+1][nDinucs];
data = new ExpandingArrayList<ExpandingArrayList<ExpandingArrayList<RecalData>>>();
//data = new RecalData[maxReadLen+1][maxQual+1][nDinucs];
this.readGroup = readGroup;
this.trackPos = trackPos;
this.trackDinuc = trackDinuc;
this.maxReadLen = maxReadLen;
this.nDinucs = nDinucs;
//this.maxReadLen = maxReadLen;
//this.nDinucs = nDinucs;
}
public int getMaxReadLen() {
return data.size();
}
public int getPosIndex(int pos) {
@ -58,13 +67,11 @@ public class RecalDataManager {
if ( getRecalData(datum.pos, datum.qual, datum.dinuc) != null )
throw new RuntimeException(String.format("Duplicate entry discovered: %s vs. %s", getRecalData(datum.pos, datum.qual, datum.dinuc), datum));
int posIndex = getPosIndex(datum.pos);
int internalDinucIndex = getDinucIndex(datum.dinuc);
if ( internalDinucIndex == -1 ) return;
data[posIndex][datum.qual][internalDinucIndex] = datum;
set(posIndex, datum.qual, internalDinucIndex, datum);
flattenData.add(datum);
}
@ -78,18 +85,61 @@ public class RecalDataManager {
if ( internalDinucIndex == -1 ) return null;
RecalData datum = data[posIndex][qual][internalDinucIndex];
RecalData datum = get(posIndex, qual, internalDinucIndex);
if ( datum == null && expandP ) {
//System.out.printf("Allocating %s %d %d %d%n", readGroup, pos, qual, dinuc_index);
datum = new RecalData(posIndex, qual, readGroup, trackDinuc ? dinuc : "**");
data[posIndex][qual][internalDinucIndex] = datum;
set(posIndex, qual, internalDinucIndex, datum);
flattenData.add(datum);
}
return datum;
}
public List<RecalData> select(int pos, int qual, int dinuc_index ) {
/**
* Returns the RecalData associated with pos, qual, dinuc, or null if not is bound.
* Does not expand the system to corporate a new data point
* @param posIndex
* @param qual
* @param dinucIndex
* @return
*/
private RecalData get(int posIndex, int qual, int dinucIndex) {
ExpandingArrayList<ExpandingArrayList<RecalData>> d2 = data.get(posIndex);
if (d2 == null) return null;
ExpandingArrayList<RecalData> d3 = d2.get(qual);
return d3 == null ? null : d3.get(dinucIndex);
}
//private RecalData get(int posIndex, int qual, int dinucIndex) {
// return data[posIndex][qual][dinucIndex];
//}
private void set(int posIndex, int qual, int dinucIndex, RecalData datum) {
// grow data if necessary
ExpandingArrayList<ExpandingArrayList<RecalData>> d2 = data.get(posIndex);
if (d2 == null) {
d2 = new ExpandingArrayList<ExpandingArrayList<RecalData>>();
data.set(posIndex, d2);
}
// Expand d2 if necessary
ExpandingArrayList<RecalData> d3 = d2.get(qual);
if ( d3 == null ) {
d3 = new ExpandingArrayList<RecalData>();
d2.set(qual, d3);
}
// set d3 to datum
d3.set(dinucIndex, datum);
}
//private void set(int posIndex, int qual, int dinucIndex, RecalData datum) {
// data[posIndex][qual][dinucIndex] = datum;
//}
/* public List<RecalData> select(int pos, int qual, int dinuc_index ) {
List<RecalData> l = new LinkedList<RecalData>();
for ( int i = 0; i < data.length; i++ ) {
if ( i == pos || pos == -1 || ! trackPos ) {
@ -147,7 +197,7 @@ public class RecalDataManager {
System.out.printf("getDataByDinuc => %d%n", l.size());
return l;
}
}*/
public List<RecalData> getAll() {
return flattenData;

View File

@ -32,7 +32,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
HashMap<String, RecalMapping> cache = new HashMap<String, RecalMapping>();
//@Argument(shortName="serial", doc="", required=false)
boolean serialRecalibration = false;
//boolean serialRecalibration = false;
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
private static Pattern COLLAPSED_POS_PATTERN = Pattern.compile("^#\\s+collapsed_pos\\s+(\\w+)");
@ -98,7 +98,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
// initialize the data structure
HashMap<String, RecalDataManager> managers = new HashMap<String, RecalDataManager>();
for ( String readGroup : readGroups ) {
RecalDataManager manager = new RecalDataManager(readGroup, maxPos, maxQReported, dinucs.size(), ! collapsedPos, ! collapsedDinuc);
RecalDataManager manager = new RecalDataManager(readGroup, ! collapsedPos, ! collapsedDinuc);
//RecalDataManager manager = new RecalDataManager(readGroup, maxPos, maxQReported, dinucs.size(), ! collapsedPos, ! collapsedDinuc);
managers.put(readGroup, manager);
}
@ -111,10 +112,10 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
for ( String readGroup : readGroups ) {
RecalDataManager manager = managers.get(readGroup);
RecalMapping mapper = null;
if ( serialRecalibration )
mapper = new SerialRecalMapping(manager, dinucs, maxPos, maxQReported);
else
mapper = new CombinatorialRecalMapping(manager, dinucs, maxPos, maxQReported);
//if ( serialRecalibration )
// mapper = new SerialRecalMapping(manager, dinucs, maxPos, maxQReported);
//else
mapper = new CombinatorialRecalMapping(manager, dinucs, maxPos, maxQReported);
cache.put(readGroup, mapper);
}
}
@ -286,7 +287,7 @@ class CombinatorialRecalMapping implements RecalMapping {
return result;
}
}*/
/*
class SerialRecalMapping implements RecalMapping {
// mapping from dinuc x Q => new Q
HashMap<String, byte[]> mappingByDinuc;
@ -338,4 +339,4 @@ class SerialRecalMapping implements RecalMapping {
return newQual;
}
}
}*/

View File

@ -0,0 +1,34 @@
package org.broadinstitute.sting.utils;
import java.util.ArrayList;
import java.util.Collection;
public class ExpandingArrayList<E> extends ArrayList<E> {
public ExpandingArrayList() { super(); }
public ExpandingArrayList(Collection<? extends E> c) { super(c); }
public ExpandingArrayList(int initialCapacity) { super(initialCapacity); }
/**
* Returns the element at the specified position in this list. If index > size,
* returns null. Otherwise tries to access the array
* @param index
* @return
* @throws IndexOutOfBoundsException in index < 0
*/
public E get(int index) throws IndexOutOfBoundsException {
if ( index < size() )
return super.get(index);
else
return null;
}
public E set(int index, E element) {
if ( index >= size() ) {
// We need to add null items until we can safely set index to element
for ( int i = size(); i <= index; i++ )
add(null);
}
return super.set(index, element);
}
}

View File

@ -0,0 +1,156 @@
// our package
package org.broadinstitute.sting.gatk.walkers.recalibration;
// the imports for unit testing.
import org.junit.Assert;
import org.junit.Test;
import org.junit.Before;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.QualityUtils;
import java.util.HashSet;
/**
* Basic unit test for RecalData
*/
public class RecalDataTest extends BaseTest {
RecalData datum, starDatum;
/**
* Tests that we got a string parameter in correctly
*/
@Before
public void before() {
datum = new RecalData(2, 3, "0", "AA");
datum.B = datum.N = 1;
starDatum = new RecalData(2, 3, "0", "**");
starDatum.B = starDatum.N = 1;
}
@Test
public void testBasic() {
logger.warn("Executing testIsBetween");
Assert.assertTrue(datum.N == 1);
Assert.assertTrue(datum.B == 1);
Assert.assertTrue(datum.pos == 2);
Assert.assertTrue(datum.qual == 3);
Assert.assertTrue(datum.readGroup.equals("0"));
Assert.assertTrue(datum.dinuc.equals("AA"));
Assert.assertTrue(starDatum.N == 1);
Assert.assertTrue(starDatum.B == 1);
Assert.assertTrue(starDatum.pos == 2);
Assert.assertTrue(starDatum.qual == 3);
Assert.assertTrue(starDatum.readGroup.equals("0"));
Assert.assertTrue(starDatum.dinuc.equals("**"));
}
@Test
public void testInc() {
logger.warn("Executing testInc");
datum.inc(1L, 0L);
Assert.assertTrue(datum.N == 2);
Assert.assertTrue(datum.B == 1);
datum.inc('A', 'A');
Assert.assertTrue(datum.N == 3);
Assert.assertTrue(datum.B == 1);
datum.inc('A', 'C');
Assert.assertTrue(datum.N == 4);
Assert.assertTrue(datum.B == 2);
}
@Test
public void testEmpQual() {
logger.warn("Executing testEmpQual");
datum.B = 1;
datum.N = 1;
Assert.assertEquals(datum.empiricalQualDouble(), 0.0, 1e-5);
Assert.assertEquals(datum.empiricalQualByte(), 0);
datum.B = 1;
datum.N = 2;
Assert.assertEquals(datum.empiricalQualDouble(), 3.0103, 1e-5);
Assert.assertEquals(datum.empiricalQualByte(), 3);
datum.B = 1;
datum.N = 3;
Assert.assertEquals(datum.empiricalQualDouble(), 4.771213, 1e-5);
Assert.assertEquals(datum.empiricalQualByte(), 5);
datum.B = 1;
datum.B = 2;
Assert.assertEquals(datum.empiricalQualDouble(), 1.760913, 1e-5);
Assert.assertEquals(datum.empiricalQualByte(), 2);
datum.B = 1;
datum.N = 10;
Assert.assertEquals(datum.empiricalQualDouble(), 10.0, 1e-5);
Assert.assertEquals(datum.empiricalQualByte(), 10);
datum.B = 1;
datum.N = 100;
Assert.assertEquals(datum.empiricalQualDouble(), 20.0, 1e-5);
Assert.assertEquals(datum.empiricalQualByte(), 20);
datum.B = 1;
datum.N = 1000;
Assert.assertEquals(datum.empiricalQualDouble(), 30.0, 1e-5);
Assert.assertEquals(datum.empiricalQualByte(), 30);
datum.B = 0;
datum.N = 1000;
Assert.assertEquals(datum.empiricalQualDouble(), QualityUtils.MAX_REASONABLE_Q_SCORE, 1e-5);
Assert.assertEquals(datum.empiricalQualByte(), QualityUtils.MAX_REASONABLE_Q_SCORE);
}
public void testtoCSVString() {
logger.warn("Executing testtoCSVString");
Assert.assertEquals(datum.toCSVString(false), "0,AA,3,2,1,1,0");
Assert.assertEquals(datum.toCSVString(true), "0,AA,3,*,1,1,0");
}
public void testFromCSVString() {
logger.warn("Executing testFromCSVString");
Assert.assertEquals(RecalData.fromCSVString("0,AA,3,2,1,1,0").toCSVString(false), datum.toCSVString(false));
Assert.assertEquals(RecalData.fromCSVString("0,AA,3,*,1,1,0").toCSVString(false), datum.toCSVString(true));
Assert.assertEquals(RecalData.fromCSVString("0,**,3,2,1,1,0").toCSVString(false), starDatum.toCSVString(false));
Assert.assertEquals(RecalData.fromCSVString("0,**,3,*,1,1,0").toCSVString(false), starDatum.toCSVString(true));
}
public void testDinucIndex() {
logger.warn("Executing testDinucIndex");
HashSet<Integer> indices = new HashSet<Integer>();
HashSet<Byte> unknownBytes = new HashSet<Byte>();
byte bases[] = {'A', 'C', 'G', 'T', '*', 'N'};
unknownBytes.add((byte)'*');
unknownBytes.add((byte)'N');
for ( int i = 0; i < bases.length; i++ ) {
for ( int j = 0; j < bases.length; j++ ) {
byte[] bp = {bases[i], bases[j]};
String s = new String(bp);
int index = RecalData.dinucIndex(s);
indices.add(index);
Assert.assertEquals(index, RecalData.dinucIndex(bases[i], bases[j]));
if ( index != -1 ) {
Assert.assertEquals(RecalData.dinucIndex2bases(index), s);
} else {
Assert.assertTrue(unknownBytes.contains(bp[0]) || unknownBytes.contains(bp[1]) );
}
}
}
Assert.assertEquals(indices.size(), 17);
}
}

View File

@ -0,0 +1,153 @@
// our package
package org.broadinstitute.sting.utils;
// the imports for unit testing.
import org.junit.Assert;
import org.junit.Test;
import org.junit.Before;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalData;
import org.broadinstitute.sting.utils.QualityUtils;
import java.util.HashSet;
import java.util.Arrays;
/**
* Basic unit test for RecalData
*/
public class ExpandingArrayListTest extends BaseTest {
ExpandingArrayList<Integer> empty, initCap10, hasOne, hasTen;
@Before
public void before() {
empty = new ExpandingArrayList<Integer>();
initCap10 = new ExpandingArrayList<Integer>(10);
hasOne = new ExpandingArrayList<Integer>();
hasOne.add(1);
hasTen = new ExpandingArrayList<Integer>();
hasTen.addAll(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
}
@Test
public void testBasicSizes() {
logger.warn("Executing testBasicSizes");
Assert.assertEquals(empty.size(), 0);
Assert.assertEquals(initCap10.size(), 0);
Assert.assertEquals(hasOne.size(), 1);
Assert.assertEquals(hasTen.size(), 10);
}
@Test
public void testTenElements() {
logger.warn("Executing testTenElements");
for ( int i = 0; i < 10; i++ ) {
Assert.assertEquals((int)hasTen.get(i), i+1);
}
}
@Test
public void testSettingTenElements() {
logger.warn("Executing testSettingTenElements");
for ( int i = 0; i < 10; i++ ) {
Assert.assertEquals((int)hasTen.set(i, 2*i), i+1);
}
Assert.assertEquals(hasTen.size(), 10);
for ( int i = 0; i < 10; i++ ) {
Assert.assertEquals((int)hasTen.get(i), 2*i);
}
}
@Test
public void testAdd() {
logger.warn("Executing testAdd");
Assert.assertEquals(empty.size(), 0);
empty.add(1);
Assert.assertEquals(empty.size(), 1);
Assert.assertEquals((int)empty.get(0), 1);
empty.add(2);
Assert.assertEquals(empty.size(), 2);
Assert.assertEquals((int)empty.get(1), 2);
}
@Test
public void testSet1() {
logger.warn("Executing testSet1");
Assert.assertEquals(empty.size(), 0);
empty.set(0, 1);
Assert.assertEquals(empty.size(), 1);
Assert.assertEquals((int)empty.get(0), 1);
empty.set(1, 2);
Assert.assertEquals(empty.size(), 2);
Assert.assertEquals((int)empty.get(1), 2);
// doesn't expand
empty.set(0, 3);
Assert.assertEquals(empty.size(), 2);
Assert.assertEquals((int)empty.get(0), 3);
}
@Test
public void testSetExpanding() {
logger.warn("Executing testSetExpanding");
Assert.assertEquals(empty.size(), 0);
empty.set(3, 1);
Assert.assertEquals(empty.size(), 4);
Assert.assertEquals(empty.get(0), null);
Assert.assertEquals(empty.get(1), null);
Assert.assertEquals(empty.get(2), null);
Assert.assertEquals((int)empty.get(3), 1);
}
@Test
public void testSetExpandingReset() {
logger.warn("Executing testSetExpandingReset");
Assert.assertEquals(empty.size(), 0);
empty.set(3, 3);
empty.set(2, 2);
empty.set(1, 1);
empty.set(0, 0);
Assert.assertEquals(empty.size(), 4);
for ( int i = 0; i < 4; i++ )
Assert.assertEquals((int)empty.get(i), i);
}
@Test
public void testSetExpandingBig() {
logger.warn("Executing testSetExpandingBig");
Assert.assertEquals(empty.size(), 0);
empty.set(1000, 1000);
Assert.assertEquals(empty.size(), 1001);
for ( int i = 0; i < 1000; i++ )
Assert.assertEquals(empty.get(i), null);
Assert.assertEquals((int)empty.get(1000), 1000);
}
@Test (expected=IndexOutOfBoundsException.class )
public void testSetBadGetNegative() {
logger.warn("Executing testSetBadGetNegative");
empty.get(-1);
}
@Test
public void testSetBadGetPost() {
logger.warn("Executing testSetBadGetPost");
empty.set(1, 1);
Assert.assertEquals(empty.get(0), null);
Assert.assertEquals((int)empty.get(1), 1);
Assert.assertEquals(empty.get(2), null);
}
}

View File

@ -16,12 +16,17 @@ def geli2dbsnpFile(geli):
return os.path.join(root, flowcellDotlane) + '.dbsnp_matches'
def SingleSampleGenotyperCmd(bam, geli, use2bp):
naid = bam.split(".")[0]
naid = os.path.split(bam)[1].split(".")[0]
metrics = geli + '.metrics'
gatkPath = '/humgen/gsa-scr1/kiran/repositories/Sting/trunk/dist/GenomeAnalysisTK.jar'
hapmapChip = '/home/radon01/andrewk/hapmap_1kg/gffs/' + naid + '.gff'
hapmapChipStr = ' '.join(['--hapmap_chip', hapmapChip])
if not os.path.exists(hapmapChip):
print '*** warning, no hapmap chip resuls for', naid
hapmapChipStr = ''
targetList = '/home/radon01/depristo/work/1kg_pilot_evaluation/data/thousand_genomes_alpha_redesign.targets.interval_list'
cmd = "java -ea -jar " + gatkPath + ' ' + ' '.join(['-T SingleSampleGenotyper', '-I', bam, '-L', targetList, '-R', ref, '-D', '/humgen/gsa-scr1/GATK_Data/dbsnp.rod.out', '--hapmap_chip', hapmapChip, '-calls', geli, '-met', metrics, '-geli -fb -l INFO'])
cmd = "java -ea -jar " + gatkPath + ' ' + ' '.join(['-T SingleSampleGenotyper', '-I', bam, '-L', targetList, '-R', ref, '-D', '/humgen/gsa-scr1/GATK_Data/dbsnp_129_hg18.rod', '-metout', metrics, '-varout', geli, '-geli -l INFO ']) + hapmapChipStr
return cmd
def bams2geli(bams):
@ -34,7 +39,7 @@ def bams2geli(bams):
else:
if not os.path.exists(geli):
cmd = picard_utils.callGenotypesCmd( bam, geli, options = picard_utils.hybridSelectionExtraArgsForCalling())
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, just_print_commands = OPTIONS.dry )
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, just_print_commands = OPTIONS.dry )
return geli, jobid
calls = map(call1, bams)
return map(lambda x: x[0], calls), map(lambda x: x[1], calls)

View File

@ -17,7 +17,7 @@ output_root = './'
# Location of the resource files distributed with the recalibration tool.
# If editing, please end this variable with a trailing slash.
resources='resources/'
resources='recalibratorResources/'
#resources='/broad/1KG/DCC_recal/ReadQualityRecalibrator/resources/'
import getopt,glob,os,sys
@ -30,17 +30,17 @@ if not os.path.isabs(resources):
resources = os.path.abspath(application_path) + '/' + resources
# Where does the reference live?
reference_base = resources + 'human_b36_both'
reference_base = resources + 'Homo_sapiens_assembly18'
reference = reference_base + '.fasta'
reference_dict = reference_base + '.dict'
reference_fai = reference_base + '.fasta.fai'
# Where does DBSNP live?
dbsnp = resources + 'dbsnp.1kg.rod.out'
dbsnp = resources + 'dbsnp_129_hg18.rod'
# Where are the application files required to run the recalibration?
gatk = resources + 'gatk/GenomeAnalysisTK.jar'
#gatk = '/home/radon01/depristo/dev/GenomeAnalysisTK/trunk/dist/GenomeAnalysisTK.jar'
#gatk = resources + 'gatk/GenomeAnalysisTK.jar'
gatk = '/home/radon01/depristo/dev/GenomeAnalysisTK_stable/trunk/dist/GenomeAnalysisTK.jar'
logistic_regression_script = resources + 'logistic_regression.R'
empirical_vs_reported_grapher = resources + 'plot_q_emp_stated_hst.R'
@ -146,7 +146,7 @@ def usage():
# Try to parse the given command-line arguments.
try:
opts, args = getopt.gnu_getopt(sys.argv[1:],'',['recalibrate','evaluate', 'reprocess', 'dry'])
opts, args = getopt.gnu_getopt(sys.argv[1:],'',['recalibrate','evaluate', 'reprocess', 'dry', 'outputRoot='])
except getopt.GetoptError, err:
usage()
@ -168,6 +168,8 @@ for opt,arg in opts:
reprocessFiles = True
if opt == '--dry':
dryRun = True
if opt == '--outputRoot':
output_root = arg
# Default to 'recalibrate' unless the user specified only evaluate.
do_recalibration = not (evaluate_requested and not recalibrate_requested)

View File

@ -27,20 +27,17 @@ def cmd(cmd_str_from_user, farm_queue=False, output_head=None, just_print_comman
cmd_str += " \""+cmd_str_from_user + "\""
print ">>> Farming via "+cmd_str
#result = 'Job <2542666> is submitted to queue <broad>.'
result = subprocess.Popen([cmd_str, ""], shell=True, stdout=subprocess.PIPE).communicate()[0]
p = re.compile('Job <(\d+)> is submitted to queue')
jobid = p.match(result).group(1)
return jobid
else:
cmd_str = cmd_str_from_user
print ">>> Executing "+cmd_str
if just_print_commands or (globals().has_key("justPrintCommands") and globals().justPrintCommands):
return -1
elif farm_queue:
result = subprocess.Popen([cmd_str, ""], shell=True, stdout=subprocess.PIPE).communicate()[0]
p = re.compile('Job <(\d+)> is submitted to queue')
jobid = p.match(result).group(1)
return jobid
else:
# Actually execute the command if we're not just in debugging output mode
status = os.system(cmd_str)

View File

@ -0,0 +1,93 @@
#
# GATK configuration parser
#
import ConfigParser
import os.path
import sys
defaultRequiredOptions = {}
def addRequiredOption(name, type):
defaultRequiredOptions[name] = type
addRequiredOption('jar', 'input_file')
addRequiredOption('reference', 'input_file')
addRequiredOption('referenceIndex', 'input_file')
addRequiredOption('referenceDict', 'input_file')
addRequiredOption('java', 'file')
addRequiredOption('jvm_args', str)
addRequiredOption('args', str)
addRequiredOption('tmp', 'output_file')
class gatkConfigParser(ConfigParser.SafeConfigParser):
GATK = 'DEFAULT'
def __init__(self, configFiles):
ConfigParser.SafeConfigParser.__init__(self)
files = filter(None, configFiles)
print 'Reading configuration file(s):', files
print self.read(files)
self.validateRequiredOptions()
def validateRequiredOptions(self):
for key, value in defaultRequiredOptions.iteritems():
self.validateOption(self.GATK, key, value)
def validateOption(self, section, name, type = str):
v = self.getOption(section, name, type)
print ' => Validated option', name, v
def getGATKOption(self, name, type = str):
return self.getOption(self.GATK, name, type)
def getGATKModeOption(self, name, mode, type = str):
return self.getOption(mode, name, type)
def getOption(self, section, name, typeF = None):
if not self.has_option(section, name):
raise "Option %s not found in section %s" % (name, section)
else:
val = self.get(section, name)
if typeF == 'input_file' or typeF == 'output_file':
path = os.path.abspath(os.path.expanduser(val))
if typeF == 'input_file':
if not os.path.exists(path):
raise "Input file does not exist", path
if not os.access(path, os.R_OK):
raise "Input file cannot be read", path
if typeF == 'output_file':
if not os.access(path, os.W_OK):
raise "Output file cannot be written", path
return path
elif type(typeF) == str:
return str(val)
elif typeF == None:
return val
else:
return typeF(val)
def java(self): return self.getOption(self.GATK, 'java')
def jvm_args(self): return self.getOption(self.GATK, 'jvm_args')
def jar(self): return self.getOption(self.GATK, 'jar')
def gatk_args(self): return self.getOption(self.GATK, 'args')
def reference(self): return self.getOption(self.GATK, 'reference')
def gatkCmd(self, mode):
cmd = ' '.join([self.java(), self.jvm_args(), '-jar', self.jar(), self.gatk_args(), '-R', self.reference()])
cmd += ' ' + ' '.join(['-T', mode, self.getGATKModeOption('args', mode)])
return cmd
import unittest
class TestMergeBAMsUtils(unittest.TestCase):
def setUp(self):
configFile = os.path.join(os.path.split(sys.argv[0])[0] + "/../testdata/defaultGATKConfig.cfg")
self.config = gatkConfigParser(configFile)
def testValidate(self):
self.config.validateRequiredOptions()
def testCmd(self):
s = "java -ea -Xmx2048m -jar ~/dev/GenomeAnalysisTK/trunk/dist/GenomeAnalysisTK.jar -l INFO -L 1:1-10,000,000 -R /home/radon01/depristo/work/humanref/Homo_sapiens_assembly18.fasta -T CountCovariates --CREATE_TRAINING_DATA --MIN_MAPPING_QUALITY 1"
self.assertEquals(self.config.gatkCmd('CountCovariates'), s)
if __name__ == '__main__':
unittest.main()

23
testdata/defaultGATKConfig.cfg vendored 100755
View File

@ -0,0 +1,23 @@
[DEFAULT]
jar: ~/dev/GenomeAnalysisTK/trunk/dist/GenomeAnalysisTK.jar
referenceRoot: /home/radon01/depristo/work/humanref/Homo_sapiens_assembly18
reference: %(referenceRoot)s.fasta
referenceIndex: %(reference)s.fai
referenceDict: %(referenceRoot)s.dict
java: java
jvm_args: -ea -Xmx2048m
gatkData: /humgen/gsa-scr1/GATK_Data
dbsnp: %(gatkData)s/dbsnp_129_hg18.rod
tmp: /tmp/
args: -l INFO
#args: -l INFO -L chr1:1-1,000,000
[CountCovariates]
args: --CREATE_TRAINING_DATA --MIN_MAPPING_QUALITY 1 -D %(dbsnp)s
[TableRecalibration]
args: -compress 1
[R]
Rscript: /broad/tools/apps/R-2.6.0/bin/Rscript
PlotQEmpStated: ~andrewk/recalQual_resources/plot_q_emp_stated_hst.R