Merge pull request #808 from broadinstitute/pd_gsalib_concordance
Added gsa.reshape.concordance.table function to gsalib
This commit is contained in:
commit
c374d126d7
|
|
@ -82,7 +82,7 @@ import java.util.*;
|
|||
* <p>
|
||||
* It may be informative to reshape rows of the GenotypeConcordance counts and proportions tables into separate row-major tables
|
||||
* where the columns indicate the COMP genotype and the rows indicate the EVAL genotype for easy comparison between the
|
||||
* two callsets. This can be done with a command similar to d <- matrix(sampleRow,nrow=6,byrow=T) in R where sampleRow is the 36-value row corresponding to the sample of interest, excluding "Mismatching_Alleles".
|
||||
* two callsets. This can be done with the gsa.reshape.concordance.table function in the gsalib R library.
|
||||
* In Excel this can be accomplished using the OFFSET function.
|
||||
* </p>
|
||||
* <ul>
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
Package: gsalib
|
||||
Type: Package
|
||||
Title: Utility Functions For GATK
|
||||
Version: 2.1
|
||||
Date: 2014-12-09
|
||||
Version: 2.2
|
||||
Date: 2015-03-17
|
||||
Author: Kiran Garimella
|
||||
Maintainer: Geraldine Van der Auwera <vdauwera@broadinstitute.org>
|
||||
Description: This package contains utility functions used by the Genome Analysis Toolkit (GATK) to load tables and plot data. The GATK is a toolkit for variant discovery in high-throughput sequencing data.
|
||||
|
|
|
|||
|
|
@ -1 +1,2 @@
|
|||
export(gsa.read.gatkreport)
|
||||
export(gsa.read.gatkreport)
|
||||
export(gsa.reshape.concordance.table)
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
gsa.reshape.concordance.table <- function(data, table.name="GenotypeConcordance_Counts", sample.name="ALL") {
|
||||
if (!is.null(table.name)) {
|
||||
data <- data[[table.name]]
|
||||
}
|
||||
if (is.null(data)) {
|
||||
return NULL
|
||||
}
|
||||
d <- data[data$Sample==sample.name,2:(length(data[1,])-1)]
|
||||
|
||||
possible.genotypes <- c('NO_CALL', 'HOM_REF', 'HET', 'HOM_VAR', 'UNAVAILABLE', 'MIXED')
|
||||
combinations <- outer(possible.genotypes, possible.genotypes, function(a,b) {paste(a,b,sep='_')})
|
||||
existing.combi <- matrix(combinations %in% colnames(d), nrow=length(possible.genotypes))
|
||||
eval.genotypes <- apply(existing.combi, 1, any)
|
||||
comp.genotypes <- apply(existing.combi, 2, any)
|
||||
|
||||
m <- matrix(d, nrow=sum(eval.genotypes), byrow=T)
|
||||
dimnames(m) <- list(EvalGenotypes=possible.genotypes[eval.genotypes],
|
||||
CompGenotypes=possible.genotypes[comp.genotypes])
|
||||
m
|
||||
}
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
\name{gsa.reshape.concordance.table}
|
||||
\alias{gsa.reshape.concordance.table}
|
||||
\title{
|
||||
Reshape a Concordance Table
|
||||
}
|
||||
\description{
|
||||
Given a GATKReport generated by GenotypeConcordance (as output by \code{gsa.read.gatkreport}), this function reshapes the concordance for a specified sample into a matrix with the EvalGenotypes in rows and the CompGenotypes in columns (see the documentation for GenotypeConcordance for the definition of Eval and Comp)
|
||||
}
|
||||
\usage{
|
||||
gsa.reshape.concordance.table(x, table="GenotypeConcordance_Counts", sample.name="ALL")
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{
|
||||
A GATKReport as output by \code{gsa.read.gatkreport}. If \code{table} is \code{NULL}, \code{x} is assumed to be the vector of concordance values to reshape.
|
||||
}
|
||||
\item{table}{
|
||||
The table name in the GATKReport to reshape. Defaults to "GenotypeConcordance_Counts", but could also be one of the proportion tables ("GenotypeConcordance_EvalProportions", "GenotypeConcordance_CompProportions"). This value can also be \code{NULL}, in which case \code{x} is reshaped directly.
|
||||
}
|
||||
\item{sample.name}{
|
||||
The sample name within \code{table} to use.
|
||||
}
|
||||
}
|
||||
\value{
|
||||
Returns a two-dimensional matrix with Eval genotypes in the rows and Comp genotypes in the columns. The genotypes themselves (\code{HOM_REF}, \code{NO_CALL}, etc) are specified in the row/col names of the matrix.
|
||||
}
|
||||
\author{
|
||||
Phillip Dexheimer
|
||||
}
|
||||
|
||||
\seealso{
|
||||
\code{\link{gsa.read.gatkreport}}
|
||||
}
|
||||
\examples{
|
||||
test_file = system.file("inst", "extdata", "test_gatkreport.table", package = "gsalib")
|
||||
report = gsa.read.gatkreport(test_file)
|
||||
gsa.reshape.concordance.table(report)
|
||||
|
||||
## Output looks like:
|
||||
## CompGenotypes
|
||||
##EvalGenotypes NO_CALL HOM_REF HET HOM_VAR UNAVAILABLE MIXED
|
||||
## NO_CALL 0 0 0 0 0 0
|
||||
## HOM_REF 0 2 0 0 0 0
|
||||
## HET 0 3 0 0 0 0
|
||||
## HOM_VAR 0 2 0 0 0 0
|
||||
## UNAVAILABLE 0 0 0 0 0 0
|
||||
## MIXED 0 0 0 0 0 0
|
||||
}
|
||||
\keyword{ manip }
|
||||
|
|
@ -12,8 +12,8 @@ Utility functions for analysis of genome sequence data with the GATK
|
|||
\tabular{ll}{
|
||||
Package: \tab gsalib\cr
|
||||
Type: \tab Package\cr
|
||||
Version: \tab 2.1\cr
|
||||
Date: \tab 2014-12-09\cr
|
||||
Version: \tab 2.2\cr
|
||||
Date: \tab 2015-03-17\cr
|
||||
License: \tab MIT\cr
|
||||
LazyLoad: \tab yes\cr
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue