Placeholder commit. Implements a loader for a new multi-part GATK reporting format. See what it looks like at /home/radon01/kiran/scr1/projects/NewVariantEvalOutput/results/v1/tableexample.txt . Still need to address the issue where numeric columns are being interpreted as a vector of strings, not numbers.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4115 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2010-08-25 18:48:44 +00:00
parent ac58eb3cbb
commit fba71e3c15
1 changed files with 47 additions and 0 deletions

47
R/GATKReport.R 100644
View File

@ -0,0 +1,47 @@
read.gatkreport <- function(filename) {
con = file(filename, "r", blocking = TRUE);
lines = readLines(con);
close(con);
tableEnv = new.env();
tableName = NA;
tableHeader = c();
tableRows = c();
for (line in lines) {
if (length(grep("^#:table[[:space:]]+", line, ignore.case=TRUE)) > 0) {
headerFields = unlist(strsplit(line, "[[:space:]]+"));
if (!is.na(tableName)) {
d = data.frame(tableRows, row.names=NULL, stringsAsFactors=FALSE);
colnames(d) = tableHeader;
assign(tableName, d, envir=tableEnv);
}
tableName = headerFields[2];
tableHeader = c();
tableRows = c();
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
# do nothing
} else if (!is.na(tableName)) {
row = unlist(strsplit(line, "[[:space:]]+"));
if (length(tableHeader) == 0) {
tableHeader = row;
} else {
tableRows = rbind(tableRows, row);
}
}
}
if (!is.na(tableName)) {
d = data.frame(tableRows, row.names=NULL);
colnames(d) = tableHeader;
assign(tableName, d, envir=tableEnv);
}
gatkreport = as.list(tableEnv);
}