Merge pull request #808 from broadinstitute/pd_gsalib_concordance
Added gsa.reshape.concordance.table function to gsalib
This commit is contained in:
commit
c374d126d7
|
|
@ -82,7 +82,7 @@ import java.util.*;
|
||||||
* <p>
|
* <p>
|
||||||
* It may be informative to reshape rows of the GenotypeConcordance counts and proportions tables into separate row-major tables
|
* It may be informative to reshape rows of the GenotypeConcordance counts and proportions tables into separate row-major tables
|
||||||
* where the columns indicate the COMP genotype and the rows indicate the EVAL genotype for easy comparison between the
|
* where the columns indicate the COMP genotype and the rows indicate the EVAL genotype for easy comparison between the
|
||||||
* two callsets. This can be done with a command similar to d <- matrix(sampleRow,nrow=6,byrow=T) in R where sampleRow is the 36-value row corresponding to the sample of interest, excluding "Mismatching_Alleles".
|
* two callsets. This can be done with the gsa.reshape.concordance.table function in the gsalib R library.
|
||||||
* In Excel this can be accomplished using the OFFSET function.
|
* In Excel this can be accomplished using the OFFSET function.
|
||||||
* </p>
|
* </p>
|
||||||
* <ul>
|
* <ul>
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
Package: gsalib
|
Package: gsalib
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Utility Functions For GATK
|
Title: Utility Functions For GATK
|
||||||
Version: 2.1
|
Version: 2.2
|
||||||
Date: 2014-12-09
|
Date: 2015-03-17
|
||||||
Author: Kiran Garimella
|
Author: Kiran Garimella
|
||||||
Maintainer: Geraldine Van der Auwera <vdauwera@broadinstitute.org>
|
Maintainer: Geraldine Van der Auwera <vdauwera@broadinstitute.org>
|
||||||
Description: This package contains utility functions used by the Genome Analysis Toolkit (GATK) to load tables and plot data. The GATK is a toolkit for variant discovery in high-throughput sequencing data.
|
Description: This package contains utility functions used by the Genome Analysis Toolkit (GATK) to load tables and plot data. The GATK is a toolkit for variant discovery in high-throughput sequencing data.
|
||||||
|
|
|
||||||
|
|
@ -1 +1,2 @@
|
||||||
export(gsa.read.gatkreport)
|
export(gsa.read.gatkreport)
|
||||||
|
export(gsa.reshape.concordance.table)
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
gsa.reshape.concordance.table <- function(data, table.name="GenotypeConcordance_Counts", sample.name="ALL") {
|
||||||
|
if (!is.null(table.name)) {
|
||||||
|
data <- data[[table.name]]
|
||||||
|
}
|
||||||
|
if (is.null(data)) {
|
||||||
|
return NULL
|
||||||
|
}
|
||||||
|
d <- data[data$Sample==sample.name,2:(length(data[1,])-1)]
|
||||||
|
|
||||||
|
possible.genotypes <- c('NO_CALL', 'HOM_REF', 'HET', 'HOM_VAR', 'UNAVAILABLE', 'MIXED')
|
||||||
|
combinations <- outer(possible.genotypes, possible.genotypes, function(a,b) {paste(a,b,sep='_')})
|
||||||
|
existing.combi <- matrix(combinations %in% colnames(d), nrow=length(possible.genotypes))
|
||||||
|
eval.genotypes <- apply(existing.combi, 1, any)
|
||||||
|
comp.genotypes <- apply(existing.combi, 2, any)
|
||||||
|
|
||||||
|
m <- matrix(d, nrow=sum(eval.genotypes), byrow=T)
|
||||||
|
dimnames(m) <- list(EvalGenotypes=possible.genotypes[eval.genotypes],
|
||||||
|
CompGenotypes=possible.genotypes[comp.genotypes])
|
||||||
|
m
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
\name{gsa.reshape.concordance.table}
|
||||||
|
\alias{gsa.reshape.concordance.table}
|
||||||
|
\title{
|
||||||
|
Reshape a Concordance Table
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Given a GATKReport generated by GenotypeConcordance (as output by \code{gsa.read.gatkreport}), this function reshapes the concordance for a specified sample into a matrix with the EvalGenotypes in rows and the CompGenotypes in columns (see the documentation for GenotypeConcordance for the definition of Eval and Comp)
|
||||||
|
}
|
||||||
|
\usage{
|
||||||
|
gsa.reshape.concordance.table(x, table="GenotypeConcordance_Counts", sample.name="ALL")
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{x}{
|
||||||
|
A GATKReport as output by \code{gsa.read.gatkreport}. If \code{table} is \code{NULL}, \code{x} is assumed to be the vector of concordance values to reshape.
|
||||||
|
}
|
||||||
|
\item{table}{
|
||||||
|
The table name in the GATKReport to reshape. Defaults to "GenotypeConcordance_Counts", but could also be one of the proportion tables ("GenotypeConcordance_EvalProportions", "GenotypeConcordance_CompProportions"). This value can also be \code{NULL}, in which case \code{x} is reshaped directly.
|
||||||
|
}
|
||||||
|
\item{sample.name}{
|
||||||
|
The sample name within \code{table} to use.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
Returns a two-dimensional matrix with Eval genotypes in the rows and Comp genotypes in the columns. The genotypes themselves (\code{HOM_REF}, \code{NO_CALL}, etc) are specified in the row/col names of the matrix.
|
||||||
|
}
|
||||||
|
\author{
|
||||||
|
Phillip Dexheimer
|
||||||
|
}
|
||||||
|
|
||||||
|
\seealso{
|
||||||
|
\code{\link{gsa.read.gatkreport}}
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
test_file = system.file("inst", "extdata", "test_gatkreport.table", package = "gsalib")
|
||||||
|
report = gsa.read.gatkreport(test_file)
|
||||||
|
gsa.reshape.concordance.table(report)
|
||||||
|
|
||||||
|
## Output looks like:
|
||||||
|
## CompGenotypes
|
||||||
|
##EvalGenotypes NO_CALL HOM_REF HET HOM_VAR UNAVAILABLE MIXED
|
||||||
|
## NO_CALL 0 0 0 0 0 0
|
||||||
|
## HOM_REF 0 2 0 0 0 0
|
||||||
|
## HET 0 3 0 0 0 0
|
||||||
|
## HOM_VAR 0 2 0 0 0 0
|
||||||
|
## UNAVAILABLE 0 0 0 0 0 0
|
||||||
|
## MIXED 0 0 0 0 0 0
|
||||||
|
}
|
||||||
|
\keyword{ manip }
|
||||||
|
|
@ -12,8 +12,8 @@ Utility functions for analysis of genome sequence data with the GATK
|
||||||
\tabular{ll}{
|
\tabular{ll}{
|
||||||
Package: \tab gsalib\cr
|
Package: \tab gsalib\cr
|
||||||
Type: \tab Package\cr
|
Type: \tab Package\cr
|
||||||
Version: \tab 2.1\cr
|
Version: \tab 2.2\cr
|
||||||
Date: \tab 2014-12-09\cr
|
Date: \tab 2015-03-17\cr
|
||||||
License: \tab MIT\cr
|
License: \tab MIT\cr
|
||||||
LazyLoad: \tab yes\cr
|
LazyLoad: \tab yes\cr
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue