gatk-3.8/R/GATKReport.R

63 lines
2.0 KiB
R

# Load a table into the specified environment. Make sure that each new table gets a unique name (this allows one to cat a bunch of tables with the same name together and load them into R without each table overwriting the last.
.assignGATKTableToEnvironment <- function(tableName, tableHeader, tableRows, tableEnv) {
d = data.frame(tableRows, row.names=NULL, stringsAsFactors=FALSE);
colnames(d) = tableHeader;
for (i in 1:ncol(d)) {
v = suppressWarnings(as.numeric(d[,i]));
d[,i] = v;
}
usedNames = ls(envir=tableEnv, pattern=tableName);
if (length(usedNames) > 0) {
tableName = paste(tableName, ".", length(usedNames), sep="");
}
assign(tableName, d, envir=tableEnv);
}
# Load all GATKReport tables from a file
read.gatkreport <- function(filename) {
con = file(filename, "r", blocking = TRUE);
lines = readLines(con);
close(con);
tableEnv = new.env();
tableName = NA;
tableHeader = c();
tableRows = c();
for (line in lines) {
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
headerFields = unlist(strsplit(line, "[[:space:]]+"));
if (!is.na(tableName)) {
.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
}
tableName = headerFields[2];
tableHeader = c();
tableRows = c();
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
# do nothing
} else if (!is.na(tableName)) {
row = unlist(strsplit(line, "[[:space:]]+"));
if (length(tableHeader) == 0) {
tableHeader = row;
} else {
tableRows = rbind(tableRows, row);
}
}
}
if (!is.na(tableName)) {
.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
}
gatkreport = as.list(tableEnv);
}