implementation of the Gatherer class for CountCovariates, which makes it now scatter/gatherable. Kudos to the @Gather annotation Khalid just introduced!
QuickCCTest is my test script for the gatherer. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5547 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
20344a27b4
commit
0a772688fe
|
|
@ -30,27 +30,24 @@ public class CountCovariatesGatherer extends Gatherer {
|
||||||
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
|
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
|
||||||
private static final String EOF_MARKER = "EOF";
|
private static final String EOF_MARKER = "EOF";
|
||||||
|
|
||||||
private HashMap<String, int[]> dataMap;
|
private HashMap<String, RecalDatumOptimized> dataMap;
|
||||||
|
|
||||||
|
|
||||||
private void addCSVData (String line) {
|
private void addCSVData (String line) {
|
||||||
String[] covariates = line.split(",");
|
String[] covariates = line.split(",");
|
||||||
String key = "";
|
String key = "";
|
||||||
int [] values = new int[3];
|
RecalDatumOptimized values;
|
||||||
|
|
||||||
for (int i = 0; i < covariates.length-3; i++) {
|
for (int i = 0; i < covariates.length-3; i++) {
|
||||||
key += covariates[i] + ",";
|
key += covariates[i] + ",";
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = covariates.length-3; i < covariates.length; i++) {
|
values = new RecalDatumOptimized(Integer.parseInt(covariates[covariates.length-3]),
|
||||||
values[i] = Integer.parseInt(covariates[i].trim());
|
Integer.parseInt(covariates[covariates.length-2]));
|
||||||
}
|
|
||||||
|
|
||||||
if (dataMap.get(key) != null) {
|
if (dataMap.get(key) != null) {
|
||||||
int [] currentValues = dataMap.get(key);
|
RecalDatumOptimized currentValues = dataMap.get(key);
|
||||||
for (int i = 0; i < 2; i++) {
|
values.increment(currentValues);
|
||||||
values[i] += currentValues[i];// todo -- update the third value using the CountCovariatesWalker function
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
dataMap.put(key, values);
|
dataMap.put(key, values);
|
||||||
|
|
@ -58,7 +55,7 @@ public class CountCovariatesGatherer extends Gatherer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void gather(List<File> inputs, File output) {
|
public void gather(List<File> inputs, File output) {
|
||||||
dataMap = new HashMap<String, int[]>();
|
dataMap = new HashMap<String, RecalDatumOptimized>();
|
||||||
PrintStream o;
|
PrintStream o;
|
||||||
try {
|
try {
|
||||||
o = new PrintStream(output);
|
o = new PrintStream(output);
|
||||||
|
|
@ -67,7 +64,7 @@ public class CountCovariatesGatherer extends Gatherer {
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean sawEOF = false;
|
boolean sawEOF = false;
|
||||||
boolean headerPrinted = false;
|
boolean printedHeader = false;
|
||||||
|
|
||||||
// Read input files
|
// Read input files
|
||||||
for ( File RECAL_FILE : inputs) {
|
for ( File RECAL_FILE : inputs) {
|
||||||
|
|
@ -76,11 +73,12 @@ public class CountCovariatesGatherer extends Gatherer {
|
||||||
if ( EOF_MARKER.equals(line) ) {
|
if ( EOF_MARKER.equals(line) ) {
|
||||||
sawEOF = true; // sanity check
|
sawEOF = true; // sanity check
|
||||||
}
|
}
|
||||||
else if( COMMENT_PATTERN.matcher(line).matches() || COVARIATE_PATTERN.matcher(line).matches() ) {
|
else if(COMMENT_PATTERN.matcher(line).matches()) {
|
||||||
if (!headerPrinted) {
|
; // It doesn't make any sense to print intermediate comments, unless we merge them somehow (would require strict definition for the header)
|
||||||
headerPrinted = true;
|
}
|
||||||
|
else if (COVARIATE_PATTERN.matcher(line).matches()) {
|
||||||
|
if (!printedHeader)
|
||||||
o.println(line);
|
o.println(line);
|
||||||
} // Skip over the header (could check if headers are the same, but probably not necessary)
|
|
||||||
}
|
}
|
||||||
else { // Found a line of data
|
else { // Found a line of data
|
||||||
addCSVData(line); // Parse the line and add the data to the HashMap
|
addCSVData(line); // Parse the line and add the data to the HashMap
|
||||||
|
|
@ -95,14 +93,15 @@ public class CountCovariatesGatherer extends Gatherer {
|
||||||
final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted!";
|
final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted!";
|
||||||
throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
|
throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
|
||||||
}
|
}
|
||||||
|
printedHeader = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write output file from dataMap
|
// Write output file from dataMap
|
||||||
for(String key : dataMap.keySet()) {
|
for(String key : dataMap.keySet()) {
|
||||||
int [] values = dataMap.get(key);
|
RecalDatumOptimized values = dataMap.get(key);
|
||||||
String v = "," + values[0] + "," + values[1] + "," + values[2];
|
String v = values.getNumObservations() + "," + values.getNumMismatches() + "," + values.empiricalQualByte();
|
||||||
o.println(key + v);
|
o.println(key + v);
|
||||||
}
|
}
|
||||||
|
o.println("EOF");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -92,13 +92,13 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
||||||
@ArgumentCollection private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
@ArgumentCollection private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||||
|
|
||||||
@Output
|
@Output
|
||||||
@Gather(CountCovariatesGatherer.class)
|
|
||||||
PrintStream out;
|
PrintStream out;
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Command Line Arguments
|
// Command Line Arguments
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
@Output(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the outputted covariates table recalibration file")
|
@Output(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the outputted covariates table recalibration file")
|
||||||
|
@Gather(CountCovariatesGatherer.class)
|
||||||
public PrintStream RECAL_FILE;
|
public PrintStream RECAL_FILE;
|
||||||
|
|
||||||
@Argument(fullName="list", shortName="ls", doc="List the available covariates and exit", required=false)
|
@Argument(fullName="list", shortName="ls", doc="List the available covariates and exit", required=false)
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,8 @@ class QuickCCTest extends QScript {
|
||||||
|
|
||||||
def script = {
|
def script = {
|
||||||
|
|
||||||
|
val recal = new File("recal.csv")
|
||||||
|
|
||||||
val cc = new CountCovariates()
|
val cc = new CountCovariates()
|
||||||
cc.reference_sequence = reference
|
cc.reference_sequence = reference
|
||||||
cc.input_file :+= input
|
cc.input_file :+= input
|
||||||
|
|
@ -40,7 +42,8 @@ class QuickCCTest extends QScript {
|
||||||
cc.intervalsString = intervals
|
cc.intervalsString = intervals
|
||||||
cc.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
|
cc.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
|
||||||
cc.scatterCount = 4
|
cc.scatterCount = 4
|
||||||
cc.recal_file = new File("recal.csv")
|
cc.recal_file = recal
|
||||||
|
cc.memoryLimit = 4
|
||||||
|
|
||||||
add(cc);
|
add(cc);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue