Placeholder commit. Implements a loader for a new multi-part GATK reporting format. See what it looks like at /home/radon01/kiran/scr1/projects/NewVariantEvalOutput/results/v1/tableexample.txt . Still need to address the issue where numeric columns are being interpreted as a vector of strings, not numbers.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4115 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ac58eb3cbb
commit
fba71e3c15
|
|
@ -0,0 +1,47 @@
|
|||
read.gatkreport <- function(filename) {
|
||||
con = file(filename, "r", blocking = TRUE);
|
||||
lines = readLines(con);
|
||||
close(con);
|
||||
|
||||
tableEnv = new.env();
|
||||
|
||||
tableName = NA;
|
||||
tableHeader = c();
|
||||
tableRows = c();
|
||||
|
||||
for (line in lines) {
|
||||
if (length(grep("^#:table[[:space:]]+", line, ignore.case=TRUE)) > 0) {
|
||||
headerFields = unlist(strsplit(line, "[[:space:]]+"));
|
||||
|
||||
if (!is.na(tableName)) {
|
||||
d = data.frame(tableRows, row.names=NULL, stringsAsFactors=FALSE);
|
||||
colnames(d) = tableHeader;
|
||||
|
||||
assign(tableName, d, envir=tableEnv);
|
||||
}
|
||||
|
||||
tableName = headerFields[2];
|
||||
tableHeader = c();
|
||||
tableRows = c();
|
||||
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
|
||||
# do nothing
|
||||
} else if (!is.na(tableName)) {
|
||||
row = unlist(strsplit(line, "[[:space:]]+"));
|
||||
|
||||
if (length(tableHeader) == 0) {
|
||||
tableHeader = row;
|
||||
} else {
|
||||
tableRows = rbind(tableRows, row);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!is.na(tableName)) {
|
||||
d = data.frame(tableRows, row.names=NULL);
|
||||
colnames(d) = tableHeader;
|
||||
|
||||
assign(tableName, d, envir=tableEnv);
|
||||
}
|
||||
|
||||
gatkreport = as.list(tableEnv);
|
||||
}
|
||||
Loading…
Reference in New Issue