Refactoring/fixing up UG HMM code: a) Make code use PairHMM class instead of having duplicated code. That way UG and HaplotypeCaller now use same core code. Changes to be able to do this: 1. Compute context-dependent GOP as a function of read, not of haplotype, b) Extracted code to initialize HMM arrays into separate method, c) Move PairHMM class and unit test to public, d) Reenable banded code in PairHMM, inverted sense of flag (true=enable feature) but leave off in HaplotypeCaller.

This commit is contained in:
Guillermo del Angel 2012-04-17 14:22:48 -04:00
parent f9f8589692
commit c78b0eee3a
76 changed files with 2737 additions and 615 deletions

View File

@ -0,0 +1,22 @@
Copyright (c) 2012 The Broad Institute
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
THE USE OR OTHER DEALINGS IN THE SOFTWARE.

Binary file not shown.

22
licensing/LICENSE 100644
View File

@ -0,0 +1,22 @@
Copyright (c) 2012 The Broad Institute
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,236 @@
library(gplots)
library(ggplot2)
# -------------------------------------------------------
# Utilities for displaying multiple plots per page
# -------------------------------------------------------
distributeGraphRows <- function(graphs, heights = c()) {
# Viewport layout 2 graphs top to bottom with given relative heights
#
#
if (length(heights) == 0) {
heights <- rep.int(1, length(graphs))
}
heights <- heights[!is.na(graphs)]
graphs <- graphs[!is.na(graphs)]
numGraphs <- length(graphs)
Layout <- grid.layout(nrow = numGraphs, ncol = 1, heights=heights)
grid.newpage()
pushViewport(viewport(layout = Layout))
subplot <- function(x) viewport(layout.pos.row = x, layout.pos.col = 1)
for (i in 1:numGraphs) {
print(graphs[[i]], vp = subplot(i))
}
}
distributeLogGraph <- function(graph, xName) {
continuousGraph <- graph + scale_x_continuous(xName)
logGraph <- graph + scale_x_log10(xName) + opts(title="")
distributeGraphRows(list(continuousGraph, logGraph))
}
distributePerSampleGraph <- function(perSampleGraph, distGraph, ratio=c(2,1)) {
distributeGraphRows(list(perSampleGraph, distGraph), ratio)
}
removeExtraStrats <- function(variantEvalDataFrame, moreToRemove=c()) {
# Remove the standard extra stratification columns FunctionalClass, Novelty, and others in moreToRemove from the variantEvalDataFrame
#
# Only keeps the column marked with "all" for each removed column
#
for ( toRemove in c("FunctionalClass", "Novelty", moreToRemove) ) {
if (toRemove %in% colnames(variantEvalDataFrame)) {
variantEvalDataFrame <- variantEvalDataFrame[variantEvalDataFrame[[toRemove]] == "all",]
}
}
variantEvalDataFrame
}
openPDF <- function(outputPDF) {
# Open the outputPDF file with standard dimensions, if outputPDF is not NA
if ( ! is.na(outputPDF) ) {
pdf(outputPDF, height=8.5, width=11)
}
}
closePDF <- function(outputPDF) {
# close the outputPDF file if not NA, and try to compact the PDF if possible
if ( ! is.na(outputPDF) ) {
dev.off()
if (exists("compactPDF")) {
compactPDF(outputPDF)
}
}
}
makeRatioDataFrame <- function(ACs, num, denom, widths = NULL) {
if ( is.null(widths) ) widths <- rep(1, length(ACs))
value = NULL
titv <- data.frame(AC=ACs, width = widths, num=num, denom = denom, ratio = num / denom)
}
.reduceACs <- function(binWidthForAC, ACs) {
# computes data structures necessary to reduce the full range of ACs
#
# binWidthForAC returns the number of upcoming bins that should be merged into
# that AC bin. ACs is a vector of all AC values from 0 to 2N that should be
# merged together
#
# Returns a list containing the reduced ACs starts, their corresponding widths,
# and a map from original ACs to their new ones (1 -> 1, 2 -> 2, 3 -> 2, etc)
maxAC <- max(ACs)
newACs <- c()
widths <- c()
newACMap <- c()
ac <- 0
while ( ac < maxAC ) {
newACs <- c(newACs, ac)
width <- binWidthForAC(ac)
widths <- c(widths, width)
newACMap <- c(newACMap, rep(ac, width))
ac <- ac + width
}
list(ACs = newACs, widths=widths, newACMap = newACMap)
}
# geometricACs <- function(k, ACs) {
# nBins <- round(k * log10(max(ACs)))
#
# binWidthForAC <- function(ac) {
# max(ceiling(ac / nBins), 1)
# }
#
# return(reduceACs(binWidthForAC, ACs))
# }
reduce.AC.on.LogLinear.intervals <- function(scaleFactor, ACs) {
# map the full range of AC values onto a log linear scale
#
# Reduce the full AC range onto one where the width of each new AC increases at a rate of
# 10^scaleFactor in size with growing AC values. This is primarily useful for accurately
# computing ratios or other quantities by AC that aren't well determined when the AC
# values are very large
#
# Returns a list containing the reduced ACs starts, their corresponding widths,
# and a map from original ACs to their new ones (1 -> 1, 2 -> 2, 3 -> 2, etc)
maxAC <- max(ACs)
afs <- ACs / maxAC
breaks <- 10^(seq(-4, -1, scaleFactor))
widths <- c()
lastBreak <- 1
for ( i in length(breaks):1 ) {
b <- breaks[i]
width <- sum(afs < lastBreak & afs >= b)
widths <- c(widths, width)
lastBreak <- b
}
widths <- rev(widths)
binWidthForAC <- function(ac) {
af <- ac / maxAC
value = 1
for ( i in length(breaks):1 )
if ( af >= breaks[i] ) {
value = widths[i]
break
}
return(value)
}
return(.reduceACs(binWidthForAC, ACs))
}
.remapACs <- function(remapper, k, df) {
newACs <- remapper(k, df$AC)
n = length(newACs$ACs)
num = rep(0, n)
denom = rep(0, n)
for ( i in 1:dim(df)[1] ) {
rowI = df$AC == i
row = df[rowI,]
newAC = newACs$newACMap[row$AC]
newRowI = newACs$ACs == newAC
num[newRowI] = num[newRowI] + df$num[rowI]
denom[newRowI] = denom[newRowI] + df$denom[rowI]
}
newdf <- makeRatioDataFrame(newACs$ACs, num, denom, newACs$widths )
newdf
}
compute.ratio.on.LogLinear.AC.intervals <- function(ACs, num, denom, scaleFactor = 0.1) {
df = makeRatioDataFrame(ACs, num, denom, 1)
return(.remapACs(reduce.AC.on.LogLinear.intervals, scaleFactor, df))
}
plotVariantQC <- function(metrics, measures, requestedStrat = "Sample",
fixHistogramX=F, anotherStrat = NULL, nObsField = "n_indels",
onSamePage=F, facetVariableOnXPerSample = F, facetVariableOnXForDist = T, moreTitle="") {
metrics$strat = metrics[[requestedStrat]]
otherFacet = "."
id.vars = c("strat", "nobs")
metrics$nobs <- metrics[[nObsField]]
# keep track of the other strat and it's implied facet value
if (! is.null(anotherStrat)) {
id.vars = c(id.vars, anotherStrat)
otherFacet = anotherStrat
}
molten <- melt(metrics, id.vars=id.vars, measure.vars=c(measures))
perSampleGraph <- ggplot(data=molten, aes(x=strat, y=value, group=variable, color=variable, fill=variable))
title <- opts(title=paste(paste(paste(measures, collapse=", "), "by", requestedStrat), moreTitle))
determineFacet <- function(onX) {
if ( onX ) {
paste(otherFacet, "~ variable")
} else {
paste("variable ~", otherFacet)
}
}
sampleFacet = determineFacet(facetVariableOnXPerSample)
distFacet = determineFacet(facetVariableOnXForDist)
if ( requestedStrat == "Sample" ) {
perSampleGraph <- perSampleGraph + geom_text(aes(label=strat), size=1.5) + geom_blank() # don't display a scale
perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "")
} else {
perSampleGraph <- perSampleGraph + geom_point(aes(size=log10(nobs))) #+ geom_smooth(aes(weight=log10(nobs)))
perSampleGraph <- perSampleGraph + scale_x_log10("AlleleCount")
}
perSampleGraph <- perSampleGraph + ylab("Variable value") + title
perSampleGraph <- perSampleGraph + facet_grid(sampleFacet, scales="free")
nValues = length(unique(molten$value))
if (nValues > 2) {
if ( requestedStrat == "Sample" ) {
distGraph <- ggplot(data=molten, aes(x=value, group=variable, fill=variable))
} else {
distGraph <- ggplot(data=molten, aes(x=value, group=variable, fill=variable, weight=nobs))
}
distGraph <- distGraph + geom_histogram(aes(y=..ndensity..))
distGraph <- distGraph + geom_density(alpha=0.5, aes(y=..scaled..))
distGraph <- distGraph + geom_rug(aes(y=NULL, color=variable, position="jitter"))
scale = "free"
if ( fixHistogramX ) scale = "fixed"
distGraph <- distGraph + facet_grid(distFacet, scales=scale)
distGraph <- distGraph + ylab("Relative frequency")
distGraph <- distGraph + xlab("Variable value (see facet for variable by color)")
distGraph <- distGraph + opts(axis.text.x=theme_text(angle=-45)) # , legend.position="none")
} else {
distGraph <- NA
}
if ( onSamePage ) {
suppressMessages(distributePerSampleGraph(perSampleGraph, distGraph))
} else {
suppressMessages(print(perSampleGraph))
suppressMessages(print(distGraph + title))
}
}

View File

@ -18,10 +18,7 @@ import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.*;
/**
* A microscheduler that schedules shards according to a tree-like structure.
@ -44,11 +41,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
private final Queue<TreeReduceTask> reduceTasks = new LinkedList<TreeReduceTask>();
/**
* An exception that's occurred in this traversal. If null, no exception has occurred.
*/
private RuntimeException error = null;
/**
* Queue of incoming shards.
*/
@ -99,11 +91,13 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
ReduceTree reduceTree = new ReduceTree(this);
initializeWalker(walker);
//
// exception handling here is a bit complex. We used to catch and rethrow exceptions all over
// the place, but that just didn't work well. Now we have a specific execution exception (inner class)
// to use for multi-threading specific exceptions. All RuntimeExceptions that occur within the threads are rethrown
// up the stack as their underlying causes
//
while (isShardTraversePending() || isTreeReducePending()) {
// Check for errors during execution.
if(hasTraversalErrorOccurred())
throw getTraversalError();
// Too many files sitting around taking up space? Merge them.
if (isMergeLimitExceeded())
mergeExistingOutput(false);
@ -130,12 +124,8 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
result = reduceTree.getResult().get();
notifyTraversalDone(walker,result);
}
catch (ReviewedStingException ex) {
throw ex;
}
catch (Exception ex) {
throw new ReviewedStingException("Unable to retrieve result", ex);
}
catch( InterruptedException ex ) { handleException(ex); }
catch( ExecutionException ex ) { handleException(ex); }
// do final cleanup operations
outputTracker.close();
@ -338,32 +328,41 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
}
/**
* Detects whether an execution error has occurred.
* @return True if an error has occurred. False otherwise.
* Handle an exception that occurred in a worker thread as needed by this scheduler.
*
* The way to use this function in a worker is:
*
* try { doSomeWork();
* catch ( InterruptedException ex ) { hms.handleException(ex); }
* catch ( ExecutionException ex ) { hms.handleException(ex); }
*
* @param ex the exception that occurred in the worker thread
*/
private synchronized boolean hasTraversalErrorOccurred() {
return error != null;
}
private synchronized RuntimeException getTraversalError() {
if(!hasTraversalErrorOccurred())
throw new ReviewedStingException("User has attempted to retrieve a traversal error when none exists");
return error;
protected final void handleException(InterruptedException ex) {
throw new HierarchicalMicroScheduler.ExecutionFailure("Hierarchical reduce interrupted", ex);
}
/**
* Allows other threads to notify of an error during traversal.
* Handle an exception that occurred in a worker thread as needed by this scheduler.
*
* The way to use this function in a worker is:
*
* try { doSomeWork();
* catch ( InterruptedException ex ) { hms.handleException(ex); }
* catch ( ExecutionException ex ) { hms.handleException(ex); }
*
* @param ex the exception that occurred in the worker thread
*/
protected synchronized void notifyOfTraversalError(Throwable error) {
// If the error is already a Runtime, pass it along as is. Otherwise, wrap it.
if (error instanceof RuntimeException)
this.error = (RuntimeException)error;
protected final void handleException(ExecutionException ex) {
if ( ex.getCause() instanceof RuntimeException )
// if the cause was a runtime exception that's what we want to send up the stack
throw (RuntimeException )ex.getCause();
else
this.error = new ReviewedStingException("An error occurred during the traversal.", error);
throw new HierarchicalMicroScheduler.ExecutionFailure("Hierarchical reduce failed", ex);
}
/** A small wrapper class that provides the TreeReducer interface along with the FutureTask semantics. */
private class TreeReduceTask extends FutureTask {
private TreeReducer treeReducer = null;
@ -382,6 +381,17 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
}
}
/**
* A specific exception class for HMS-specific failures such as
* Interrupted or ExecutionFailures that aren't clearly the fault
* of the underlying walker code
*/
public static class ExecutionFailure extends ReviewedStingException {
public ExecutionFailure(final String s, final Throwable throwable) {
super(s, throwable);
}
}
/**
* Used by the ShardTraverser to report time consumed traversing a given shard.
*

View File

@ -27,16 +27,15 @@ import java.util.concurrent.Callable;
* Carries the walker over a given shard, in a callable interface.
*/
public class ShardTraverser implements Callable {
private HierarchicalMicroScheduler microScheduler;
private Walker walker;
private Shard shard;
private TraversalEngine traversalEngine;
private ThreadLocalOutputTracker outputTracker;
final private HierarchicalMicroScheduler microScheduler;
final private Walker walker;
final private Shard shard;
final private TraversalEngine traversalEngine;
final private ThreadLocalOutputTracker outputTracker;
private OutputMergeTask outputMergeTask;
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(ShardTraverser.class);
final protected static Logger logger = Logger.getLogger(ShardTraverser.class);
/**
* Is this traversal complete?
@ -58,11 +57,10 @@ public class ShardTraverser implements Callable {
public Object call() {
try {
traversalEngine.startTimersIfNecessary();
long startTime = System.currentTimeMillis();
final long startTime = System.currentTimeMillis();
Object accumulator = walker.reduceInit();
LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),
final WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),
microScheduler.getReadIterator(shard),
shard.getGenomeLocs(),
microScheduler.engine.getSampleDB().getSampleNames()); // todo: microScheduler.engine is protected - is it okay to user it here?
@ -76,18 +74,12 @@ public class ShardTraverser implements Callable {
windowMaker.close();
outputMergeTask = outputTracker.closeStorage();
long endTime = System.currentTimeMillis();
final long endTime = System.currentTimeMillis();
microScheduler.reportShardTraverseTime(endTime-startTime);
return accumulator;
}
catch(Throwable t) {
// Notify that an exception has occurred and rethrow it.
microScheduler.notifyOfTraversalError(t);
throw new ReviewedStingException("An error has occurred during traversal",t);
}
finally {
} finally {
synchronized(this) {
complete = true;
notifyAll();

View File

@ -25,20 +25,11 @@ import java.util.concurrent.Future;
* interface to force the reduce.
*/
public class TreeReducer implements Callable {
private HierarchicalMicroScheduler microScheduler;
final private HierarchicalMicroScheduler microScheduler;
private TreeReducible walker;
private Future lhs;
private Future rhs;
/**
* Create a one-sided reduce. Result will be a simple pass-through of the result.
* @param microScheduler The parent hierarchical microscheduler for this reducer.
* @param lhs The one side of the reduce.
*/
public TreeReducer( HierarchicalMicroScheduler microScheduler, Future lhs ) {
this( microScheduler, lhs, null );
}
/**
* Create a full tree reduce. Combine this two results using an unspecified walker at some point in the future.
* @param microScheduler The parent hierarchical microscheduler for this reducer.
@ -67,10 +58,7 @@ public class TreeReducer implements Callable {
if( lhs == null )
throw new IllegalStateException(String.format("Insufficient data on which to reduce; lhs = %s, rhs = %s", lhs, rhs) );
if( rhs == null )
return lhs.isDone();
return lhs.isDone() && rhs.isDone();
return lhs.isDone() && (rhs == null || rhs.isDone());
}
/**
@ -80,24 +68,21 @@ public class TreeReducer implements Callable {
public Object call() {
Object result = null;
long startTime = System.currentTimeMillis();
final long startTime = System.currentTimeMillis();
try {
if( lhs == null )
result = lhs.get();
// todo -- what the hell is this above line? Shouldn't it be the two below?
// if( lhs == null )
// throw new IllegalStateException(String.format("Insufficient data on which to reduce; lhs = %s, rhs = %s", lhs, rhs) );
else
result = walker.treeReduce( lhs.get(), rhs.get() );
}
catch( InterruptedException ex ) {
microScheduler.notifyOfTraversalError(ex);
throw new ReviewedStingException("Hierarchical reduce interrupted", ex);
}
catch( ExecutionException ex ) {
microScheduler.notifyOfTraversalError(ex);
throw new ReviewedStingException("Hierarchical reduce failed", ex);
}
catch( InterruptedException ex ) { microScheduler.handleException(ex); }
catch( ExecutionException ex ) { microScheduler.handleException(ex); }
long endTime = System.currentTimeMillis();
final long endTime = System.currentTimeMillis();
// Constituent bits of this tree reduces are no longer required. Throw them away.
this.lhs = null;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2009 The Broad Institute
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -12,7 +12,6 @@
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@ -99,8 +98,13 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
/**
* Create a new stub given the requested file.
*
* @param engine engine.
* @param genotypeFile file to (ultimately) create.
* @param isCompressed should we compress the output stream?
* @param argumentSources sources.
* @param skipWritingHeader skip writing header.
* @param doNotWriteGenotypes do not write genotypes.
*/
public VCFWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingHeader, boolean doNotWriteGenotypes) {
this.engine = engine;
@ -114,8 +118,13 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
/**
* Create a new stub given the requested file.
*
* @param engine engine.
* @param genotypeStream stream to (ultimately) write.
* @param isCompressed should we compress the output stream?
* @param argumentSources sources.
* @param skipWritingHeader skip writing header.
* @param doNotWriteGenotypes do not write genotypes.
*/
public VCFWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingHeader, boolean doNotWriteGenotypes) {
this.engine = engine;
@ -154,7 +163,7 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
/**
* Gets the master sequence dictionary from the engine associated with this stub
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
* @return
* @return the master sequence dictionary from the engine associated with this stub
*/
public SAMSequenceDictionary getMasterSequenceDictionary() {
return engine.getMasterSequenceDictionary();
@ -188,22 +197,25 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
vcfHeader = header;
// Check for the command-line argument header line. If not present, add it in.
if ( !skipWritingHeader ) {
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
boolean foundCommandLineHeaderLine = false;
for (VCFHeaderLine line: vcfHeader.getMetaData()) {
if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) )
foundCommandLineHeaderLine = true;
if (!skipWritingHeader && header.isWriteEngineHeaders()) {
if (header.isWriteCommandLine()) {
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
boolean foundCommandLineHeaderLine = false;
for (VCFHeaderLine line: vcfHeader.getMetaData()) {
if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) )
foundCommandLineHeaderLine = true;
}
if ( !foundCommandLineHeaderLine )
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
}
if ( !foundCommandLineHeaderLine )
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
// also put in the reference contig header lines
String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName());
for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() )
vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly));
vcfHeader.addMetaDataLine(new VCFHeaderLine("reference", "file://" + engine.getArguments().referenceFile.getAbsolutePath()));
vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, "file://" + engine.getArguments().referenceFile.getAbsolutePath()));
}
outputTracker.getStorage(this).writeHeader(vcfHeader);
@ -225,7 +237,7 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
/**
* Gets a string representation of this object.
* @return
* @return a string representation of this object.
*/
@Override
public String toString() {
@ -247,20 +259,20 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
val = String.format("<ID=%s,length=%d,assembly=%s>", contig.getSequenceName(), contig.getSequenceLength(), assembly);
else
val = String.format("<ID=%s,length=%d>", contig.getSequenceName(), contig.getSequenceLength());
return new VCFHeaderLine("contig", val);
return new VCFHeaderLine(VCFHeader.CONTIG_KEY, val);
}
private String getReferenceAssembly(String refPath) {
// This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot
String assembly = null;
if ( refPath.indexOf("b37") != -1 || refPath.indexOf("v37") != -1 )
if (refPath.contains("b37") || refPath.contains("v37"))
assembly = "b37";
else if ( refPath.indexOf("b36") != -1 )
else if (refPath.contains("b36"))
assembly = "b36";
else if ( refPath.indexOf("hg18") != -1 )
else if (refPath.contains("hg18"))
assembly = "hg18";
else if ( refPath.indexOf("hg19") != -1 )
else if (refPath.contains("hg19"))
assembly = "hg19";
return assembly;
}
}
}

View File

@ -47,6 +47,14 @@ public class RefMetaDataTracker {
//
// ------------------------------------------------------------------------------------------
/**
* Only for testing -- not accesssible in any other context
*/
public RefMetaDataTracker() {
ref = null;
map = Collections.emptyMap();
}
public RefMetaDataTracker(final Collection<RODRecordList> allBindings, final ReferenceContext ref) {
this.ref = ref;

View File

@ -250,53 +250,40 @@ public class GATKReportTable {
}
/**
* Returns the first primary key matching the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
*
* @param dottedColumnValues Period concatenated values.
* Returns the first primary key matching the column values.
* Ex: "CountVariants", "dbsnp", "eval", "called", "all", "novel", "all"
* @param columnValues column values.
* @return The first primary key matching the column values or throws an exception.
*/
public Object getPrimaryKeyByData(String dottedColumnValues) {
Object key = findPrimaryKey(dottedColumnValues);
public Object getPrimaryKeyByData(Object... columnValues) {
Object key = findPrimaryKeyByData(columnValues);
if (key == null)
throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues);
throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + Arrays.asList(columnValues));
return key;
}
/**
* Returns true if there is at least on row with the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
*
* @param dottedColumnValues Period concatenated values.
* @return true if there is at least one row matching the columns.
*/
public boolean containsPrimaryKey(String dottedColumnValues) {
return findPrimaryKey(dottedColumnValues) != null;
}
/**
* Returns the first primary key matching the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
*
* @param dottedColumnValues Period concatenated values.
* @return The first primary key matching the column values or null.
*/
private Object findPrimaryKey(String dottedColumnValues) {
return findPrimaryKey(dottedColumnValues.split("\\."));
}
/**
* Returns the first primary key matching the column values.
* Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" }
* Ex: "CountVariants", "dbsnp", "eval", "called", "all", "novel", "all"
*
* @param columnValues column values.
* @return The first primary key matching the column values.
* @return The first primary key matching the column values or null if the key does not exist.
*/
private Object findPrimaryKey(Object[] columnValues) {
public Object findPrimaryKeyByData(Object... columnValues) {
if (columnValues == null)
throw new NullPointerException("Column values is null");
if (columnValues.length == 0)
throw new IllegalArgumentException("Column values is empty");
int columnCount = columns.size();
for (Object primaryKey : primaryKeyColumn) {
boolean matching = true;
for (int i = 0; matching && i < columnValues.length; i++) {
matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i + 1));
// i --> index into columnValues parameter
// j --> index into columns collection
for (int i = 0, j = 0; matching && i < columnValues.length && j < columnCount; j++) {
if (!columns.getByIndex(j).isDisplayable())
continue;
matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i));
i++;
}
if (matching)
return primaryKey;
@ -360,8 +347,8 @@ public class GATKReportTable {
* output file), and the format string used to display the data.
*
* @param columnName the name of the column
* @param defaultValue the default value of a blank cell
* @param display if true - the column will be displayed; if false - the column will be hidden
* @param defaultValue if true - the column will be displayed; if false - the column will be hidden
* @param display display the column
* @param format the format string used to display data
*/
public void addColumn(String columnName, Object defaultValue, boolean display, String format) {

View File

@ -47,6 +47,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
int minStart = Integer.MAX_VALUE;
@ -108,7 +109,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
// add these blocks of work to the work queue
// band-pass filter the list of isActive probabilities and turn into active regions
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
final List<ActiveRegion> activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension );
final List<ActiveRegion> activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize );
// add active regions to queue of regions to process
workQueue.addAll( activeRegions );

View File

@ -16,4 +16,5 @@ import java.lang.annotation.RetentionPolicy;
public @interface ActiveRegionExtension {
public int extension() default 0;
public int maxRegion() default 1500;
}

View File

@ -7,10 +7,7 @@ import org.broadinstitute.sting.commandline.IntervalBinding;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.filters.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -33,8 +30,8 @@ import java.util.List;
@By(DataSource.READS)
@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
@PartitionBy(PartitionType.READ)
@ActiveRegionExtension(extension=50)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
@ActiveRegionExtension(extension=50,maxRegion=1500)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)

View File

@ -127,7 +127,7 @@ public class FlagStatWalker extends ReadWalker<Integer, Integer> {
if (read.getDuplicateReadFlag()) {
myStat.duplicates++;
}
if (read.getReferenceIndex() >= 0) {
if (!read.getReadUnmappedFlag()) {
myStat.mapped++;
}
if (read.getReadPairedFlag()) {
@ -139,21 +139,21 @@ public class FlagStatWalker extends ReadWalker<Integer, Integer> {
myStat.read1++;
}
if (read.getProperPairFlag()) {
myStat.properly_paired++;
}
if (!read.getMateUnmappedFlag() && read.getReferenceIndex() >= 0) {
if (!read.getReadUnmappedFlag() && !read.getMateUnmappedFlag()) {
myStat.with_itself_and_mate_mapped++;
}
if (read.getMateUnmappedFlag()) {
myStat.singletons++;
}
}
if (read.getReferenceIndex() >= 0 && read.getMateReferenceIndex() >= 0 && ! read.getReferenceIndex().equals(read.getMateReferenceIndex())) {
myStat.with_mate_mapped_to_a_different_chr++;
if (read.getMappingQuality() >= 5) {
myStat.with_mate_mapped_to_a_different_chr_maq_greaterequal_than_5++;
if (!read.getReferenceIndex().equals(read.getMateReferenceIndex())) {
myStat.with_mate_mapped_to_a_different_chr++;
if (read.getMappingQuality() >= 5) {
myStat.with_mate_mapped_to_a_different_chr_maq_greaterequal_than_5++;
}
}
}
if (!read.getReadUnmappedFlag() && read.getMateUnmappedFlag()) {
myStat.singletons++;
}
}
return 1;

View File

@ -5,12 +5,10 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.*;
/**
@ -31,8 +29,31 @@ public class BaseQualityRankSumTest extends RankSumTest {
altQuals.add((double)p.getQual());
}
}
}
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals) {
// TODO -- implement me; how do we pull out the correct offset from the read?
return;
/*
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
final boolean matchesRef = ref.equals(alleleBin.getKey());
final boolean matchesAlt = alts.contains(alleleBin.getKey());
if ( !matchesRef && !matchesAlt )
continue;
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
if ( isUsableBase(p) ) {
if ( matchesRef )
refQuals.add((double)p.getQual());
else
altQuals.add((double)p.getQual());
}
}
}
*/
}
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
@ -35,6 +36,8 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@ -49,7 +52,7 @@ import java.util.Map;
* allele Frequency, for each ALT allele, in the same order as listed; total number
* of alleles in called genotypes.
*/
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
@ -63,6 +66,13 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true);
}
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
if ( ! vc.hasGenotypes() )
return null;
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true);
}
public List<String> getKeyNames() {
return Arrays.asList(keyNames);
}

View File

@ -3,12 +3,15 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
@ -33,7 +36,7 @@ import java.util.Map;
* Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
* -dcov D is N * D
*/
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
@ -47,6 +50,22 @@ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnno
return map;
}
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;
int depth = 0;
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
for ( final List<GATKSAMRecord> alleleBin : alleleBins.values() ) {
depth += alleleBin.size();
}
}
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%d", depth));
return map;
}
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.DEPTH_KEY); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered")); }

View File

@ -28,6 +28,7 @@ import cern.jet.math.Arithmetic;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
@ -37,6 +38,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -49,7 +51,7 @@ import java.util.*;
* indicative of false positive calls. Note that the fisher strand test may not be
* calculated for certain complex indel cases or for multi-allelic sites.
*/
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private static final String FS = "FS";
private static final double MIN_PVALUE = 1E-320;
@ -78,6 +80,22 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
return map;
}
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
if ( !vc.isVariant() )
return null;
int[][] table = getContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
Double pvalue = Math.max(pValueForContingencyTable(table), MIN_PVALUE);
if ( pvalue == null )
return null;
Map<String, Object> map = new HashMap<String, Object>();
map.put(FS, String.format("%.3f", QualityUtils.phredScaleErrorRate(pvalue)));
return map;
}
public List<String> getKeyNames() {
return Arrays.asList(FS);
}
@ -193,6 +211,38 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
return sum;
}
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc
* allele1 # #
* allele2 # #
* @return a 2x2 contingency table
*/
private static int[][] getContingencyTable(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, Allele ref, Allele alt) {
int[][] table = new int[2][2];
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
final boolean matchesRef = ref.equals(alleleBin.getKey());
final boolean matchesAlt = alt.equals(alleleBin.getKey());
if ( !matchesRef && !matchesAlt )
continue;
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
boolean isFW = read.getReadNegativeStrandFlag();
int row = matchesRef ? 0 : 1;
int column = isFW ? 0 : 1;
table[row][column]++;
}
}
}
return table;
}
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc
@ -214,8 +264,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
Allele base = Allele.create(p.getBase(), false);
boolean isFW = !p.getRead().getReadNegativeStrandFlag();
boolean matchesRef = ref.equals(base, true);
boolean matchesAlt = alt.equals(base, true);
final boolean matchesRef = ref.equals(base, true);
final boolean matchesAlt = alt.equals(base, true);
if ( matchesRef || matchesAlt ) {
int row = matchesRef ? 0 : 1;
int column = isFW ? 0 : 1;
@ -227,6 +277,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
return table;
}
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc

View File

@ -3,12 +3,15 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -27,12 +30,19 @@ import java.util.Map;
* more information. Note that the Inbreeding Coefficient will not be calculated for files
* with fewer than a minimum (generally 10) number of samples.
*/
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation {
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private static final int MIN_SAMPLES = 10;
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
return calculateIC(vc);
}
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
return calculateIC(vc);
}
private Map<String, Object> calculateIC(final VariantContext vc) {
final GenotypesContext genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
return null;

View File

@ -6,12 +6,10 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.*;
/**
@ -35,6 +33,23 @@ public class MappingQualityRankSumTest extends RankSumTest {
}
}
}
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals) {
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
final boolean matchesRef = ref.equals(alleleBin.getKey());
final boolean matchesAlt = alts.contains(alleleBin.getKey());
if ( !matchesRef && !matchesAlt )
continue;
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
if ( matchesRef )
refQuals.add((double)read.getMappingQuality());
else
altQuals.add((double)read.getMappingQuality());
}
}
}
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();

View File

@ -3,11 +3,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -23,7 +26,7 @@ import java.util.Map;
* Low scores are indicative of false positive calls and artifacts. Note that QualByDepth requires sequencing
* reads associated with the samples with polymorphic genotypes.
*/
public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation {
public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
@ -62,4 +65,40 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); }
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;
final GenotypesContext genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )
return null;
int depth = 0;
for ( final Genotype genotype : genotypes ) {
// we care only about variant calls with likelihoods
if ( !genotype.isHet() && !genotype.isHomVar() )
continue;
final Map<Allele, List<GATKSAMRecord>> alleleBins = stratifiedContexts.get(genotype.getSampleName());
if ( alleleBins == null )
continue;
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
if ( !alleleBin.getKey().equals(Allele.NO_CALL) )
depth += alleleBin.getValue().size();
}
}
if ( depth == 0 )
return null;
double QD = -10.0 * vc.getLog10PError() / (double)depth;
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%.2f", QD));
return map;
}
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
@ -13,6 +14,8 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
@ -24,7 +27,7 @@ import java.util.Map;
/**
* Root Mean Square of the mapping quality of the reads across all samples.
*/
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
@ -34,7 +37,7 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
for ( AlignmentContext context : stratifiedContexts.values() )
totalSize += context.size();
int[] qualities = new int[totalSize];
final int[] qualities = new int[totalSize];
int index = 0;
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
@ -54,6 +57,35 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
return map;
}
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;
int depth = 0;
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
depth += alleleBin.getValue().size();
}
}
final int[] qualities = new int[depth];
int index = 0;
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
for ( final List<GATKSAMRecord> reads : alleleBins.values() ) {
for ( final GATKSAMRecord read : reads ) {
if ( read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
qualities[index++] = read.getMappingQuality();
}
}
}
double rms = MathUtils.rms(qualities);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%.2f", rms));
return map;
}
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.RMS_MAPPING_QUALITY_KEY); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "RMS Mapping Quality")); }

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
@ -12,6 +13,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
@ -26,7 +28,7 @@ import java.util.Map;
/**
* Abstract root for all RankSum based annotations
*/
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation {
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
static final double INDEL_LIKELIHOOD_THRESH = 0.1;
static final boolean DEBUG = false;
@ -38,7 +40,6 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
if (genotypes == null || genotypes.size() == 0)
return null;
final ArrayList<Double> refQuals = new ArrayList<Double>();
final ArrayList<Double> altQuals = new ArrayList<Double>();
@ -104,12 +105,52 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
if (!Double.isNaN(testResults.first))
map.put(getKeyNames().get(0), String.format("%.3f", testResults.first));
return map;
}
protected abstract void fillQualsFromPileup(byte ref, List<Byte> alts, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals);
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
if (stratifiedContexts.size() == 0)
return null;
protected abstract void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals);
final GenotypesContext genotypes = vc.getGenotypes();
if (genotypes == null || genotypes.size() == 0)
return null;
final ArrayList<Double> refQuals = new ArrayList<Double>();
final ArrayList<Double> altQuals = new ArrayList<Double>();
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
final Map<Allele, List<GATKSAMRecord>> context = stratifiedContexts.get(genotype.getSampleName());
if ( context == null )
continue;
fillQualsFromPileup(vc.getReference(), vc.getAlternateAlleles(), context, refQuals, altQuals);
}
if ( refQuals.size() == 0 || altQuals.size() == 0 )
return null;
final MannWhitneyU mannWhitneyU = new MannWhitneyU();
for (final Double qual : altQuals) {
mannWhitneyU.add(qual, MannWhitneyU.USet.SET1);
}
for (final Double qual : refQuals) {
mannWhitneyU.add(qual, MannWhitneyU.USet.SET2);
}
// we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases)
final Pair<Double, Double> testResults = mannWhitneyU.runOneSidedTest(MannWhitneyU.USet.SET1);
final Map<String, Object> map = new HashMap<String, Object>();
if (!Double.isNaN(testResults.first))
map.put(getKeyNames().get(0), String.format("%.3f", testResults.first));
return map;
}
protected abstract void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals);
protected abstract void fillQualsFromPileup(final byte ref, final List<Byte> alts, final ReadBackedPileup pileup, final List<Double> refQuals, final List<Double> altQuals);
protected abstract void fillIndelQualsFromPileup(final ReadBackedPileup pileup, final List<Double> refQuals, final List<Double> altQuals);
protected static boolean isUsableBase(final PileupElement p) {
return !(p.isInsertionAtBeginningOfRead() ||

View File

@ -11,12 +11,10 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.*;
/**
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
@ -49,6 +47,27 @@ public class ReadPosRankSumTest extends RankSumTest {
}
}
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals) {
// TODO -- implement me; how do we pull out the correct offset from the read?
return;
/*
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
final boolean matchesRef = ref.equals(alleleBin.getKey());
final boolean matchesAlt = alts.contains(alleleBin.getKey());
if ( !matchesRef && !matchesAlt )
continue;
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
if ( matchesRef )
refQuals.add((double)read.getMappingQuality());
else
altQuals.add((double)read.getMappingQuality());
}
}
*/
}
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele
// to classify a pileup element as ref or alt, we look at the likelihood associated with the allele associated to this element.

View File

@ -33,10 +33,8 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
@ -94,6 +92,13 @@ public class VariantAnnotatorEngine {
initializeDBs();
}
// experimental constructor for active region traversal
public VariantAnnotatorEngine(GenomeAnalysisEngine toolkit) {
this.walker = null;
this.toolkit = toolkit;
requestedInfoAnnotations = AnnotationInterfaceManager.createInfoFieldAnnotations(Arrays.asList("ActiveRegionBasedAnnotation"), Collections.<String>emptyList());
}
// select specific expressions to use
public void initializeExpressions(List<String> expressionsToUse) {
// set up the expressions
@ -169,7 +174,7 @@ public class VariantAnnotatorEngine {
this.requireStrictAlleleMatch = requireStrictAlleleMatch;
}
public VariantContext annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public VariantContext annotateContext(final RefMetaDataTracker tracker, final ReferenceContext ref, final Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
// annotate db occurrences
@ -192,6 +197,20 @@ public class VariantAnnotatorEngine {
return builder.genotypes(annotateGenotypes(tracker, ref, stratifiedContexts, vc)).make();
}
public VariantContext annotateContext(final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
// go through all the requested info annotationTypes
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
Map<String, Object> annotationsFromCurrentType = ((ActiveRegionBasedAnnotation)annotationType).annotate(stratifiedContexts, vc);
if ( annotationsFromCurrentType != null )
infoAnnotations.putAll(annotationsFromCurrentType);
}
// generate a new annotated VC
return new VariantContextBuilder(vc).attributes(infoAnnotations).make();
}
private VariantContext annotateDBs(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc, Map<String, Object> infoAnnotations) {
for ( Map.Entry<RodBinding<VariantContext>, String> dbSet : dbAnnotations.entrySet() ) {
if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {

View File

@ -0,0 +1,18 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.List;
import java.util.Map;
// TODO -- make this an abstract class when we move away from InfoFieldAnnotation
public interface ActiveRegionBasedAnnotation extends AnnotationType {
// return annotations for the given contexts split by sample and then allele
public abstract Map<String, Object> annotate(final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, final VariantContext vc);
// return the descriptions used for the VCF INFO meta field
public abstract List<VCFInfoHeaderLine> getDescriptions();
}

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import net.sf.picard.util.PeekableIterator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -32,8 +33,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocComparator;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -79,10 +78,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
private IntervalBinding<Feature> intervalTrack = null;
@Output(doc = "File to which variants should be written", required = true)
protected VCFWriter vcfWriter = null;
@Argument(fullName = "expand_interval", shortName = "exp", doc = "", required = false)
private int expandInterval = 50;
private VCFWriter vcfWriter = null;
@Argument(fullName = "minimum_base_quality", shortName = "mbq", doc = "", required = false)
private int minimumBaseQuality = 20;
@ -96,13 +92,11 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
@Argument(fullName = "maximum_coverage", shortName = "maxcov", doc = "", required = false)
private int maximumCoverage = 700;
private TreeSet<GenomeLoc> intervalList = null; // The list of intervals of interest (plus expanded intervals if user wants them)
private HashMap<GenomeLoc, IntervalStatistics> intervalMap = null; // interval => statistics
private Iterator<GenomeLoc> intervalListIterator; // An iterator to go over all the intervals provided as we traverse the genome
private GenomeLoc currentInterval = null; // The "current" interval loaded
private IntervalStatistics currentIntervalStatistics = null; // The "current" interval being filled with statistics
private Set<String> samples = null; // All the samples being processed
private GenomeLocParser parser; // just an object to allow us to create genome locs (for the expanded intervals)
private PeekableIterator<GenomeLoc> intervalListIterator; // an iterator to go over all the intervals provided as we traverse the genome
private Set<String> samples = null; // all the samples being processed
private final Allele SYMBOLIC_ALLELE = Allele.create("<DT>", false); // avoid creating the symbolic allele multiple times
@Override
public void initialize() {
@ -111,72 +105,22 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
if (intervalTrack == null)
throw new UserException("This tool currently only works if you provide an interval track");
parser = new GenomeLocParser(getToolkit().getMasterSequenceDictionary()); // Important to initialize the parser before creating the intervals below
List<GenomeLoc> originalList = intervalTrack.getIntervals(getToolkit()); // The original list of targets provided by the user that will be expanded or not depending on the options provided
intervalList = new TreeSet<GenomeLoc>(new GenomeLocComparator());
intervalMap = new HashMap<GenomeLoc, IntervalStatistics>();
for (GenomeLoc interval : originalList)
intervalList.add(interval);
//addAndExpandIntervalToMap(interval);
intervalListIterator = new PeekableIterator<GenomeLoc>(intervalTrack.getIntervals(getToolkit()).listIterator());
intervalListIterator = intervalList.iterator();
// get all of the unique sample names
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
// initialize the header
Set<VCFHeaderLine> headerInfo = getHeaderInfo();
vcfWriter.writeHeader(new VCFHeader(headerInfo, samples));
}
/**
* Gets the header lines for the VCF writer
*
* @return A set of VCF header lines
*/
private Set<VCFHeaderLine> getHeaderInfo() {
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
// INFO fields for overall data
headerLines.add(new VCFInfoHeaderLine("END", 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
headerLines.add(new VCFInfoHeaderLine("DP", 1, VCFHeaderLineType.Integer, "Total depth in the site. Sum of the depth of all pools"));
headerLines.add(new VCFInfoHeaderLine("AD", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size."));
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
// FORMAT fields for each genotype
headerLines.add(new VCFFormatHeaderLine("DP", 1, VCFHeaderLineType.Integer, "Total depth in the site. Sum of the depth of all pools"));
headerLines.add(new VCFFormatHeaderLine("AD", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size."));
// FILTER fields
for (CallableStatus stat : CallableStatus.values()) {
headerLines.add(new VCFHeaderLine(stat.name(), stat.description));
}
return headerLines;
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); // get all of the unique sample names for the VCF Header
vcfWriter.writeHeader(new VCFHeader(getHeaderInfo(), samples)); // initialize the VCF header
}
@Override
public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
GenomeLoc refLocus = ref.getLocus();
while (currentInterval == null || currentInterval.isBefore(refLocus)) { // do this for first time and while currentInterval is behind current locus
if (!intervalListIterator.hasNext())
return 0L;
if (currentInterval != null)
processIntervalStats(currentInterval, Allele.create(ref.getBase(), true));
removePastIntervals(refLocus, ref.getBase()); // process and remove any intervals in the map that are don't overlap the current locus anymore
addNewOverlappingIntervals(refLocus); // add all new intervals that may overlap this reference locus
currentInterval = intervalListIterator.next();
addAndExpandIntervalToMap(currentInterval);
currentIntervalStatistics = intervalMap.get(currentInterval);
}
if (currentInterval.isPast(refLocus)) // skip if we are behind the current interval
return 0L;
currentIntervalStatistics.addLocus(context); // Add current locus to stats
for (IntervalStatistics intervalStatistics : intervalMap.values())
intervalStatistics.addLocus(context); // Add current locus to stats
return 1L;
}
@ -198,10 +142,15 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
return sum + value;
}
/**
* Process all remaining intervals
*
* @param result number of loci processed by the walker
*/
@Override
public void onTraversalDone(Long result) {
for (GenomeLoc interval : intervalMap.keySet())
processIntervalStats(interval, Allele.create("<DT>", true));
for (GenomeLoc interval : intervalMap.keySet())
processIntervalStats(intervalMap.get(interval), Allele.create("A"));
}
@Override
@ -219,82 +168,111 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
@Override
public boolean alwaysAppendDbsnpId() {return false;}
private GenomeLoc createIntervalBefore(GenomeLoc interval) {
int start = Math.max(interval.getStart() - expandInterval, 0);
int stop = Math.max(interval.getStart() - 1, 0);
return parser.createGenomeLoc(interval.getContig(), interval.getContigIndex(), start, stop);
}
/**
* Removes all intervals that are behind the current reference locus from the intervalMap
*
* @param refLocus the current reference locus
* @param refBase the reference allele
*/
private void removePastIntervals(GenomeLoc refLocus, byte refBase) {
List<GenomeLoc> toRemove = new LinkedList<GenomeLoc>();
for (GenomeLoc interval : intervalMap.keySet())
if (interval.isBefore(refLocus)) {
processIntervalStats(intervalMap.get(interval), Allele.create(refBase, true));
toRemove.add(interval);
}
private GenomeLoc createIntervalAfter(GenomeLoc interval) {
int contigLimit = getToolkit().getSAMFileHeader().getSequenceDictionary().getSequence(interval.getContigIndex()).getSequenceLength();
int start = Math.min(interval.getStop() + 1, contigLimit);
int stop = Math.min(interval.getStop() + expandInterval, contigLimit);
return parser.createGenomeLoc(interval.getContig(), interval.getContigIndex(), start, stop);
for (GenomeLoc interval : toRemove)
intervalMap.remove(interval);
GenomeLoc interval = intervalListIterator.peek(); // clean up all intervals that we might have skipped because there was no data
while(interval != null && interval.isBefore(refLocus)) {
interval = intervalListIterator.next();
processIntervalStats(createIntervalStatistic(interval), Allele.create(refBase, true));
interval = intervalListIterator.peek();
}
}
/**
* Takes an interval and commits it to memory.
* It will expand it if so told by the -exp command line argument
* Adds all intervals that overlap the current reference locus to the intervalMap
*
* @param interval The new interval to process
* @param refLocus the current reference locus
*/
private void addAndExpandIntervalToMap(GenomeLoc interval) {
if (expandInterval > 0) {
GenomeLoc before = createIntervalBefore(interval);
GenomeLoc after = createIntervalAfter(interval);
intervalList.add(before);
intervalList.add(after);
intervalMap.put(before, new IntervalStatistics(samples, before, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality));
intervalMap.put(after, new IntervalStatistics(samples, after, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality));
private void addNewOverlappingIntervals(GenomeLoc refLocus) {
GenomeLoc interval = intervalListIterator.peek();
while (interval != null && !interval.isPast(refLocus)) {
System.out.println("LOCUS : " + refLocus + " -- " + interval);
intervalMap.put(interval, createIntervalStatistic(interval));
intervalListIterator.next(); // discard the interval (we've already added it to the map)
interval = intervalListIterator.peek();
}
if (!intervalList.contains(interval))
intervalList.add(interval);
intervalMap.put(interval, new IntervalStatistics(samples, interval, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality));
}
/**
* Takes the interval, finds it in the stash, prints it to the VCF, and removes it
*
* @param interval The interval in memory that you want to write out and clear
* @param allele the allele
* @param stats The statistics of the interval
* @param refAllele the reference allele
*/
private void processIntervalStats(GenomeLoc interval, Allele allele) {
IntervalStatistics stats = intervalMap.get(interval);
private void processIntervalStats(IntervalStatistics stats, Allele refAllele) {
GenomeLoc interval = stats.getInterval();
List<Allele> alleles = new ArrayList<Allele>();
Map<String, Object> attributes = new HashMap<String, Object>();
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
alleles.add(allele);
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStop(), alleles);
alleles.add(refAllele);
alleles.add(SYMBOLIC_ALLELE);
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStart(), alleles);
vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); // QUAL field makes no sense in our VCF
vcb.filters(statusesToStrings(stats.callableStatuses()));
attributes.put(VCFConstants.END_KEY, interval.getStop());
attributes.put(VCFConstants.DEPTH_KEY, stats.totalCoverage());
attributes.put("AV", stats.averageCoverage());
attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage());
vcb = vcb.attributes(attributes);
for (String sample : samples) {
Map<String, Object> infos = new HashMap<String, Object>();
infos.put("DP", stats.getSample(sample).totalCoverage());
infos.put("AV", stats.getSample(sample).averageCoverage());
infos.put(VCFConstants.DEPTH_KEY, stats.getSample(sample).averageCoverage());
Set<String> filters = new HashSet<String>();
filters.addAll(statusesToStrings(stats.getSample(sample).getCallableStatuses()));
genotypes.add(new Genotype(sample, alleles, VariantContext.NO_LOG10_PERROR, filters, infos, false));
genotypes.add(new Genotype(sample, null, VariantContext.NO_LOG10_PERROR, filters, infos, false));
}
vcb = vcb.genotypes(genotypes);
vcfWriter.add(vcb.make());
intervalMap.remove(interval);
}
/**
* Gets the header lines for the VCF writer
*
* @return A set of VCF header lines
*/
private static Set<VCFHeaderLine> getHeaderInfo() {
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
// INFO fields for overall data
headerLines.add(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
headerLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size."));
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
// FORMAT fields for each genotype
headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size."));
// FILTER fields
for (CallableStatus stat : CallableStatus.values())
headerLines.add(new VCFHeaderLine(stat.name(), stat.description));
return headerLines;
}
private static Set<String> statusesToStrings(Set<CallableStatus> statuses) {
Set<String> output = new HashSet<String>(statuses.size());
@ -303,4 +281,8 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> implements Annotato
return output;
}
private IntervalStatistics createIntervalStatistic(GenomeLoc interval) {
return new IntervalStatistics(samples, interval, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality);
}
}

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.util.HashMap;
@ -52,18 +53,28 @@ public class IntervalStatistics {
return samples.get(sample);
}
public GenomeLoc getInterval() {
return interval;
}
public void addLocus(AlignmentContext context) {
ReadBackedPileup pileup = context.getBasePileup();
for (String sample : samples.keySet())
getSample(sample).addLocus(context.getLocation(), pileup.getPileupForSample(sample));
Map<String, ReadBackedPileup> samplePileups = pileup.getPileupsForSamples(samples.keySet());
for (Map.Entry<String, ReadBackedPileup> entry : samplePileups.entrySet()) {
String sample = entry.getKey();
ReadBackedPileup samplePileup = entry.getValue();
SampleStatistics sampleStatistics = samples.get(sample);
if (sampleStatistics == null)
throw new ReviewedStingException(String.format("Trying to add locus statistics to a sample (%s) that doesn't exist in the Interval.", sample));
sampleStatistics.addLocus(context.getLocation(), samplePileup);
}
}
public long totalCoverage() {
if (preComputedTotalCoverage < 0)
calculateTotalCoverage();
return preComputedTotalCoverage;
}
public double averageCoverage() {
if (preComputedTotalCoverage < 0)

View File

@ -36,10 +36,7 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
@ -273,15 +270,14 @@ public class ConsensusAlleleCounter {
builder.alleles(Arrays.asList(refAllele, altAllele));
builder.referenceBaseForIndel(ref.getBase());
builder.noGenotypes();
if (doMultiAllelicCalls)
if (doMultiAllelicCalls) {
vcs.add(builder.make());
if (vcs.size() >= GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED)
break;
} else if (curCnt > maxAlleleCnt) {
maxAlleleCnt = curCnt;
vcs.clear();
vcs.add(builder.make());
else {
if (curCnt > maxAlleleCnt) {
maxAlleleCnt = curCnt;
vcs.clear();
vcs.add(builder.make());
}
}
}

View File

@ -82,15 +82,22 @@ public class UnifiedArgumentCollection {
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
/**
* This argument is not enabled by default because it increases the runtime by an appreciable amount.
* Note that calculating the SLOD increases the runtime by an appreciable amount.
*/
@Argument(fullName = "noSLOD", shortName = "nosl", doc = "If provided, we will not calculate the SLOD", required = false)
public boolean NO_SLOD = false;
/**
* Depending on the value of the --max_alternate_alleles argument, we may genotype only a fraction of the alleles being sent on for genotyping.
* Using this argument instructs the genotyper to annotate (in the INFO field) the number of alternate alleles that were originally discovered at the site.
*/
@Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false)
public boolean ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = false;
/**
* When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
*/
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when in GENOTYPE_MODE = GENOTYPE_GIVEN_ALLELES", required=false)
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
public RodBinding<VariantContext> alleles;
/**
@ -105,8 +112,11 @@ public class UnifiedArgumentCollection {
/**
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive.
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
* that you not play around with this parameter.
*/
@Advanced
@Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
public int MAX_ALTERNATE_ALLELES = 3;
@ -171,6 +181,7 @@ public class UnifiedArgumentCollection {
uac.GenotypingMode = GenotypingMode;
uac.OutputMode = OutputMode;
uac.NO_SLOD = NO_SLOD;
uac.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED;
uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING;
uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING;
uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;

View File

@ -39,6 +39,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@ -127,8 +129,19 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
@ArgumentCollection
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
/**
* If a call overlaps with a record from the provided comp track, the INFO field will be annotated
* as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field).
* Records that are filtered in the comp track will be ignored.
* Note that 'dbSNP' has been special-cased (see the --dbsnp argument).
*/
@Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false)
public List<RodBinding<VariantContext>> comps = Collections.emptyList();
public List<RodBinding<VariantContext>> getCompRodBindings() { return comps; }
// The following are not used by the Unified Genotyper
public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
public boolean alwaysAppendDbsnpId() { return false; }
@ -203,6 +216,10 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
*
**/
public void initialize() {
// check for a bad max alleles value
if ( UAC.MAX_ALTERNATE_ALLELES > GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED)
throw new UserException.BadArgumentValue("max_alternate_alleles", "the maximum possible value is " + GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED);
// warn the user for misusing EMIT_ALL_SITES
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES &&
UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY &&
@ -238,6 +255,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
// annotation (INFO) fields from UnifiedGenotyper
if ( !UAC.NO_SLOD )
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias"));
if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED )
headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotyperEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site"));
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
// also, check to see whether comp rods were included

View File

@ -51,6 +51,8 @@ import java.util.*;
public class UnifiedGenotyperEngine {
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
public static final double HUMAN_SNP_HETEROZYGOSITY = 1e-3;
public static final double HUMAN_INDEL_HETEROZYGOSITY = 1e-4;
@ -365,6 +367,9 @@ public class UnifiedGenotyperEngine {
if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED )
attributes.put(NUMBER_OF_DISCOVERED_ALLELES_KEY, vc.getAlternateAlleles().size());
if ( !UAC.NO_SLOD && !limitedContext && !bestGuessIsRef ) {
//final boolean DEBUG_SLOD = false;

View File

@ -157,7 +157,7 @@ public class PairHMMIndelErrorModel {
}
private void updateCell(final int indI, final int indJ, final int X_METRIC_LENGTH, final int Y_METRIC_LENGTH, byte[] readBases, byte[] readQuals, byte[] haplotypeBases,
private static void updateCell(final int indI, final int indJ, final int X_METRIC_LENGTH, final int Y_METRIC_LENGTH, byte[] readBases, byte[] readQuals, byte[] haplotypeBases,
byte[] currentGOP, byte[] currentGCP, double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) {
if (indI > 0 && indJ > 0) {
final int im1 = indI -1;
@ -183,9 +183,27 @@ public class PairHMMIndelErrorModel {
}
}
private double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals,
public static double computeReadLikehoodGivenHaplotype(byte[] haplotypeBases, byte[] readBases, byte[] readQuals,
byte[] currentGOP, byte[] currentGCP, boolean bandedLikelihoods) {
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions
final int X_METRIC_LENGTH = readBases.length + 1;
final int Y_METRIC_LENGTH = haplotypeBases.length + 1;
// initial arrays to hold the probabilities of being in the match, insertion and deletion cases
final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
return computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, currentGOP,
currentGCP, 0, matchMetricArray, XMetricArray, YMetricArray, bandedLikelihoods);
}
private static double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals,
byte[] currentGOP, byte[] currentGCP, int indToStart,
double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) {
double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray,
boolean bandedLikelihoods) {
final int X_METRIC_LENGTH = readBases.length+1;
final int Y_METRIC_LENGTH = haplotypeBases.length+1;
@ -391,6 +409,9 @@ public class PairHMMIndelErrorModel {
}
}
else {
if (DEBUG) {
System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString());
}
// System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
@ -588,7 +609,7 @@ public class PairHMMIndelErrorModel {
}
pairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
/*
if (previousHaplotypeSeen == null)
@ -602,17 +623,14 @@ public class PairHMMIndelErrorModel {
contextLogGapOpenProbabilities, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities,
startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray);
/* double r2 = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, contextLogGapOpenProbabilities,
contextLogGapContinuationProbabilities, 0, matchMetricArray, XMetricArray, YMetricArray);
double l2 = computeReadLikehoodGivenHaplotype(haplotypeBases, readBases, readQuals, contextLogGapOpenProbabilities,
contextLogGapContinuationProbabilities, bandedLikelihoods);
if (readLikelihood > 0) {
int k=0;
}
*/ if (DEBUG) {
if (DEBUG) {
System.out.println("H:"+new String(haplotypeBases));
System.out.println("R:"+new String(readBases));
System.out.format("L:%4.2f\n",readLikelihood);
// System.out.format("Lorig:%4.2f\n",r2);
// System.out.format("Lorig:%4.2f\n",r2);
System.out.format("StPos:%d\n", startIndexInHaplotype);
}
}

View File

@ -68,7 +68,7 @@ public class VariantEvalReportWriter {
*/
public final void writeReport(final PrintStream out) {
for ( int key = 0; key < stratManager.size(); key++ ) {
final String stratStateString = stratManager.getStratsAndStatesForKeyString(key);
final String stratStateString = stratManager.getStratsAndStatesStringForKey(key);
final List<Pair<VariantStratifier, Object>> stratsAndStates = stratManager.getStratsAndStatesForKey(key);
final EvaluationContext nec = stratManager.get(key);

View File

@ -17,6 +17,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.DynamicStratification;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.IntervalStratification;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.StratificationManager;
@ -221,6 +222,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// The set of all possible evaluation contexts
StratificationManager<VariantStratifier, EvaluationContext> stratManager;
//Set<DynamicStratification> dynamicStratifications = Collections.emptySet();
/**
* Initialize the stratifications, evaluations, evaluation contexts, and reporting object
@ -360,6 +362,14 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
if (tracker != null) {
String aastr = (ancestralAlignments == null) ? null : new String(ancestralAlignments.getSubsequenceAt(ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop()).getBases());
// // update the dynamic stratifications
// for (final VariantContext vc : tracker.getValues(evals, ref.getLocus())) {
// // don't worry -- DynamicStratification only work with one eval object
// for ( final DynamicStratification ds : dynamicStratifications ) {
// ds.update(vc);
// }
// }
// --------- track --------- sample - VariantContexts -
HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> evalVCs = variantEvalUtils.bindVariantContexts(tracker, ref, evals, byFilterIsEnabled, true, perSampleIsEnabled, mergeEvals);
HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> compVCs = variantEvalUtils.bindVariantContexts(tracker, ref, comps, byFilterIsEnabled, false, false, false);
@ -456,13 +466,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
* @param sampleName
* @return
*/
private Collection<EvaluationContext> getEvaluationContexts(final RefMetaDataTracker tracker,
final ReferenceContext ref,
final VariantContext eval,
final String evalName,
final VariantContext comp,
final String compName,
final String sampleName ) {
protected Collection<EvaluationContext> getEvaluationContexts(final RefMetaDataTracker tracker,
final ReferenceContext ref,
final VariantContext eval,
final String evalName,
final VariantContext comp,
final String compName,
final String sampleName ) {
final List<List<Object>> states = new LinkedList<List<Object>>();
for ( final VariantStratifier vs : stratManager.getStratifiers() ) {
states.add(vs.getRelevantStates(ref, tracker, comp, compName, eval, evalName, sampleName));

View File

@ -32,7 +32,6 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -41,51 +40,81 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
public class IndelSummary extends VariantEvaluator implements StandardEval {
final protected static Logger logger = Logger.getLogger(IndelSummary.class);
//
// counts of snps and indels
//
@DataPoint(description = "Number of SNPs", format = "%d")
public int n_SNPs = 0;
@DataPoint(description = "Number of singleton SNPs", format = "%d")
public int n_singleton_SNPs = 0;
@DataPoint(description = "Number of Indels", format = "%d")
@DataPoint(description = "Number of indels", format = "%d")
public int n_indels = 0;
// Number of Indels Sites (counts one for any number of alleles at site)
public int nIndelSites = 0;
@DataPoint(description = "Number of singleton Indels", format = "%d")
@DataPoint(description = "Number of singleton indels", format = "%d")
public int n_singleton_indels = 0;
//
// gold standard
//
@DataPoint(description = "Number of Indels overlapping gold standard sites", format = "%d")
public int n_indels_matching_gold_standard = 0;
@DataPoint(description = "Percent of indels overlapping gold standard sites")
public String gold_standard_matching_rate;
// counts 1 for each site where the number of alleles > 2
public int nMultiIndelSites = 0;
//
// multi-allelics
//
// Number of Indels Sites (counts one for any number of alleles at site)
public int nIndelSites = 0;
@DataPoint(description = "Number of sites with where the number of alleles is greater than 2")
public int n_multiallelic_indel_sites = 0;
@DataPoint(description = "Percent of indel sites that are multi-allelic")
public String percent_of_sites_with_more_than_2_alleles;
//
// snp : indel ratios
//
@DataPoint(description = "SNP to indel ratio")
public String SNP_to_indel_ratio;
@DataPoint(description = "Singleton SNP to indel ratio")
public String SNP_to_indel_ratio_for_singletons;
//
// novelty
//
@DataPoint(description = "Number of novel indels", format = "%d")
public int n_novel_indels = 0;
@DataPoint(description = "Indel novelty rate")
public String indel_novelty_rate;
@DataPoint(description = "Frameshift percent")
public String frameshift_rate_for_coding_indels;
//
// insertions to deletions
//
@DataPoint(description = "Number of insertion indels")
public int n_insertions = 0;
@DataPoint(description = "Number of deletion indels")
public int n_deletions = 0;
@DataPoint(description = "Insertion to deletion ratio")
public String insertion_to_deletion_ratio;
@DataPoint(description = "Number of large (>10 bp) deletions")
public int n_large_deletions = 0;
@DataPoint(description = "Number of large (>10 bp) insertions")
public int n_large_insertions = 0;
@DataPoint(description = "Ratio of large (>10 bp) insertions to deletions")
public String insertion_to_deletion_ratio_for_large_indels;
//
// Frameshifts
//
@ -95,6 +124,9 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
@DataPoint(description = "Number of indels in protein-coding regions not labeled as frameshift")
public int n_coding_indels_in_frame = 0;
@DataPoint(description = "Frameshift percent")
public String frameshift_rate_for_coding_indels;
//
// Het : hom ratios
//
@ -106,8 +138,6 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
int nSNPHets = 0, nSNPHoms = 0, nIndelHets = 0, nIndelHoms = 0;
int nKnownIndels = 0, nInsertions = 0;
int[] insertionCountByLength = new int[]{0, 0, 0, 0}; // note that the first element isn't used
int[] deletionCountByLength = new int[]{0, 0, 0, 0}; // note that the first element isn't used
@ -129,15 +159,6 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
public final static int LARGE_INDEL_SIZE_THRESHOLD = 10;
@DataPoint(description = "Number of large (>10 bp) deletions")
public int n_large_deletions = 0;
@DataPoint(description = "Number of large (>10 bp) insertions")
public int n_large_insertions = 0;
@DataPoint(description = "Ratio of large (>10 bp) insertions to deletions")
public String insertion_to_deletion_ratio_for_large_indels;
@Override public int getComparisonOrder() { return 2; }
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
@ -158,10 +179,9 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
break;
case INDEL:
final VariantContext gold = getWalker().goldStandard == null ? null : tracker.getFirstValue(getWalker().goldStandard);
if ( eval.isComplexIndel() ) break; // don't count complex substitutions
nIndelSites++;
if ( ! eval.isBiallelic() ) nMultiIndelSites++;
if ( ! eval.isBiallelic() ) n_multiallelic_indel_sites++;
// collect information about het / hom ratio
for ( final Genotype g : eval.getGenotypes() ) {
@ -172,13 +192,14 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
for ( Allele alt : eval.getAlternateAlleles() ) {
n_indels++; // +1 for each alt allele
if ( variantWasSingleton(eval) ) n_singleton_indels++;
if ( comp != null ) nKnownIndels++; // TODO -- make this test allele specific?
if ( comp == null ) n_novel_indels++; // TODO -- make this test allele specific?
if ( gold != null ) n_indels_matching_gold_standard++;
// ins : del ratios
final int alleleSize = alt.length() - eval.getReference().length();
if ( alleleSize == 0 ) throw new ReviewedStingException("Allele size not expected to be zero for indel: alt = " + alt + " ref = " + eval.getReference());
if ( alleleSize > 0 ) nInsertions++;
if ( alleleSize > 0 ) n_insertions++;
if ( alleleSize < 0 ) n_deletions++;
// requires snpEFF annotations
if ( eval.getAttributeAsString("SNPEFF_GENE_BIOTYPE", "missing").equals("protein_coding") ) {
@ -216,12 +237,12 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
}
public void finalizeEvaluation() {
percent_of_sites_with_more_than_2_alleles = Utils.formattedRatio(nMultiIndelSites, nIndelSites);
percent_of_sites_with_more_than_2_alleles = Utils.formattedPercent(n_multiallelic_indel_sites, nIndelSites);
SNP_to_indel_ratio = Utils.formattedRatio(n_SNPs, n_indels);
SNP_to_indel_ratio_for_singletons = Utils.formattedRatio(n_singleton_SNPs, n_singleton_indels);
gold_standard_matching_rate = Utils.formattedNoveltyRate(n_indels_matching_gold_standard, n_indels);
indel_novelty_rate = Utils.formattedNoveltyRate(nKnownIndels, n_indels);
gold_standard_matching_rate = Utils.formattedPercent(n_indels_matching_gold_standard, n_indels);
indel_novelty_rate = Utils.formattedNoveltyRate(n_indels - n_novel_indels, n_indels);
frameshift_rate_for_coding_indels = Utils.formattedPercent(n_coding_indels_frameshifting, n_coding_indels_in_frame + n_coding_indels_frameshifting);
ratio_of_1_and_2_to_3_bp_deletions = Utils.formattedRatio(deletionCountByLength[1] + deletionCountByLength[2], deletionCountByLength[3]);
@ -230,7 +251,7 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
SNP_het_to_hom_ratio = Utils.formattedRatio(nSNPHets, nSNPHoms);
indel_het_to_hom_ratio = Utils.formattedRatio(nIndelHets, nIndelHoms);
insertion_to_deletion_ratio = Utils.formattedRatio(nInsertions, n_indels - nInsertions);
insertion_to_deletion_ratio = Utils.formattedRatio(n_insertions, n_deletions);
insertion_to_deletion_ratio_for_large_indels = Utils.formattedRatio(n_large_insertions, n_large_deletions);
}

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
public abstract class VariantEvaluator implements Comparable<VariantEvaluator> {
@ -67,4 +68,41 @@ public abstract class VariantEvaluator implements Comparable<VariantEvaluator> {
public int compareTo(final VariantEvaluator variantEvaluator) {
return getSimpleName().compareTo(variantEvaluator.getSimpleName());
}
/**
* Evaluation modules that override this function to indicate that they support
* combining the results of two independent collections of eval data into
* a single meaningful result. The purpose of this interface is to
* allow us to cut up the input data into many independent stratifications, and then
* at the end of the eval run decide which stratifications to combine. This is
* important in the case of AC, where you may have thousands of distinct AC
* values that chop up the number of variants to too small a number of variants,
* and you'd like to combine the AC values into ranges containing some percent
* of the data.
*
* For example, suppose you have an eval that
* counts variants in a variable nVariants. If you want to be able to combine
* multiple evaluations of this type, overload the combine function
* with a function that sets this.nVariants += other.nVariants.
*
* Add in the appropriate fields of the VariantEvaluator T
* (of the same type as this object) to the values of this object.
*
* The values in this and other are implicitly independent, so that
* the values can be added together.
*
* @param other a VariantEvaluator of the same type of this object
*/
public void combine(final VariantEvaluator other) {
throw new ReviewedStingException(getSimpleName() + " doesn't support combining results, sorry");
}
/**
* Must be overloaded to return true for evaluation modules that support the combine operation
*
* @return
*/
public boolean supportsCombine() {
return false;
}
}

View File

@ -0,0 +1,69 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Tag this stratification as dynamically determining the final strat based on the input data
*
* The paradigm here is simple. We upfront create a strat with N states that reflect the finest grained
* possible division of the data. The data is processed, and statistics collected for each of the N states.
* An update call is made to the stratification for evaluation VariantContext during each map call,
* allowing the strat to collect data about the usage of each state. A final call requests that
* the stratification map down the N states into M states (typically less than N, not necessarily
* a subset of N). This is provided by returning a map from each of M state -> N states and
* the VariantEval walker will combine all of the evaluations for N into a single value for
* each M.
*
* For example, suppose I have a dynamic strat called AC, adopting 7 possible values 0,1,2,3,4,5,6. This
* strats tracks the number of eval vcs for each state, with final counts 0=1, 1=100, 2=10, 3=5, 4=3, 5=2, 6=1.
* The stratification attempts to combine the strats down to so that each state has approximately the same
* fraction of the data in each bin. Overall there is 1+100+10+5+3+2+1=124 observations and 7 bins so we really
* want ~ 18 observations in each bin. So we merge 3-6 with 5+3+2+1 = 11 and keep 2, 1, and 0 as distinct bins. We
* return a map from 0 -> 0, 1 -> 1, 2 -> 2, 3-6 -> {3,4,5,6}.
*
* TODO - some open implementation questions
* -- We should only create one stratifier overall. How do we track this? When we create the stratifiers
* perhaps we can look at them and create a tracker?
* -- How do we create a new stratifier based on the finalStratifications() given the framework? Conceptually
* this new thing is itself a stratifier, just like before, but it's states are determined at the end. We'd
* then like to call not getRelevantStates but a different function that accepts an old state and returns
* the new state. Perhaps the process should look like:
* finalizeStratification -> new Stratifier whose states are the final ones
* getNewState(old state) -> new state (one of those in getFinalStratification)
*
* @author Mark DePristo
* @since 4/9/12
*/
public interface DynamicStratification {
public void update(final VariantContext eval);
public VariantStratifier finalizeStratification();
public Object getFinalState(final Object oldState);
}

View File

@ -50,7 +50,7 @@ public class OneBPIndel extends VariantStratifier {
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
if (eval != null && eval.isIndel()) {
for ( int l : eval.getIndelLengths() )
if ( l > 1 )
if ( Math.abs(l) > 1 )
return TWO_PLUS_BP; // someone is too long
return ONE_BP; // all lengths are one
} else

View File

@ -26,6 +26,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manage
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -54,16 +56,27 @@ public class StratificationManager<K extends Stratifier, V> implements Map<List<
//
// -------------------------------------------------------------------------------------
/**
* Create a new StratificationManager with nodes to store data for all combinations
* of the ordered list of strats
*
* @param strats ordered list of stratifications to representation
*/
@Requires("!strats.isEmpty()")
public StratificationManager(final List<K> strats) {
stratifiers = new ArrayList<K>(strats);
this.stratifiers = new ArrayList<K>(strats);
// construct and store the full tree of strats
this.root = buildStratificationTree(new LinkedList<K>(strats));
// assign the linear key ordering to the leafs
assignKeys(root);
// cache the size, and check for a bad state
this.size = root.size();
if ( this.size == 0 )
throw new ReviewedStingException("Size == 0 in StratificationManager");
// prepare the assocated data vectors mapping from key -> data
this.valuesByKey = new ArrayList<V>(size());
this.stratifierValuesByKey = new ArrayList<List<Object>>(size());
this.keyStrings = new ArrayList<String>(size());
@ -72,9 +85,20 @@ public class StratificationManager<K extends Stratifier, V> implements Map<List<
this.stratifierValuesByKey.add(null);
this.keyStrings.add(null);
}
assignStratifierValuesByKey(root);
}
/**
* Recursive construction helper for main constructor that fills into the
* complete tree of StratNodes. This function returns the complete tree
* suitable for associating data with each combinatino of keys. Note
* that the tree is not fully complete as the keys are not yet set for
* each note (see assignStratifierValuesByKey)
*
* @param strats
* @return
*/
private StratNode<K> buildStratificationTree(final Queue<K> strats) {
final K first = strats.poll();
if ( first == null ) {
@ -97,6 +121,10 @@ public class StratificationManager<K extends Stratifier, V> implements Map<List<
}
}
/**
* Set the key for each leaf from root, in order from 0 to N - 1 for N leaves in the tree
* @param root
*/
@Requires("root == this.root")
private void assignKeys(final StratNode<K> root) {
int key = 0;
@ -106,15 +134,23 @@ public class StratificationManager<K extends Stratifier, V> implements Map<List<
}
}
public void assignStratifierValuesByKey(final StratNode<K> root) {
/**
* Entry point to recursive tool that fills in the list of state values corresponding
* to each key. After this function is called you can map from key -> List of StateValues
* instead of walking the tree to find the key and reading the list of state values
*
* @param root
*/
private void assignStratifierValuesByKey(final StratNode<K> root) {
assignStratifierValuesByKey(root, new LinkedList<Object>());
// do a last sanity check that no key has null value after assigning
for ( List<Object> stateValues : stratifierValuesByKey )
if ( stateValues == null )
throw new ReviewedStingException("Found a null state value set that's null");
}
public void assignStratifierValuesByKey(final StratNode<K> node, final LinkedList<Object> states) {
private void assignStratifierValuesByKey(final StratNode<K> node, final LinkedList<Object> states) {
if ( node.isLeaf() ) { // we're here!
if ( states.isEmpty() )
throw new ReviewedStingException("Found a leaf node with an empty state values vector");
@ -134,13 +170,17 @@ public class StratificationManager<K extends Stratifier, V> implements Map<List<
//
// -------------------------------------------------------------------------------------
/**
* How many states are held in this stratification manager?
* @return
*/
@Ensures("result >= 0")
public int size() {
return size;
}
@Ensures("result != null")
public StratNode<K> getRoot() {
protected StratNode<K> getRoot() {
return root;
}
@ -188,7 +228,7 @@ public class StratificationManager<K extends Stratifier, V> implements Map<List<
return states;
}
public String getStratsAndStatesForKeyString(final int key) {
public String getStratsAndStatesStringForKey(final int key) {
if ( keyStrings.get(key) == null ) {
StringBuilder b = new StringBuilder();
for ( int i = 0; i < stratifiers.size(); i++ ) {
@ -299,7 +339,7 @@ public class StratificationManager<K extends Stratifier, V> implements Map<List<
// -------------------------------------------------------------------------------------
public static List<List<Object>> combineStates(final List<Object> first, final List<Object> second) {
List<List<Object>> combined = new ArrayList<List<Object>>(first.size());
final List<List<Object>> combined = new ArrayList<List<Object>>(first.size());
for ( int i = 0; i < first.size(); i++ ) {
final Object firstI = first.get(i);
final Object secondI = second.get(i);
@ -310,4 +350,77 @@ public class StratificationManager<K extends Stratifier, V> implements Map<List<
}
return combined;
}
}
public interface Combiner<V> {
/** take two values of type V and return a combined value of type V */
public V combine(final V lhs, final V rhs);
}
/**
* Remaps the stratifications from one stratification set to another, combining
* the values in V according to the combiner function.
*
* stratifierToReplace defines a set of states S1, while newStratifier defines
* a new set S2. remappedStates is a map from all of S1 into at least some of
* S2. This function creates a new, fully initialized manager where all of the
* data in this new manager is derived from the original data in this object
* combined according to the mapping remappedStates. When multiple
* elements of S1 can map to the same value in S2, these are sequentially
* combined by the function combiner. Suppose for example at states s1, s2, and
* s3 all map to N1. Eventually the value associated with state N1 would be
*
* value(N1) = combine(value(s1), combine(value(s2), value(s3))
*
* in some order for s1, s2, and s3, which is not defined. Note that this function
* only supports combining one stratification at a time, but in principle a loop over
* stratifications and this function could do the multi-dimensional collapse.
*
* @param stratifierToReplace
* @param newStratifier
* @param combiner
* @param remappedStates
* @return
*/
public StratificationManager<K, V> combineStrats(final K stratifierToReplace,
final K newStratifier,
final Combiner<V> combiner,
final Map<Object, Object> remappedStates) {
// make sure the mapping is reasonable
if ( ! newStratifier.getAllStates().containsAll(remappedStates.values()) )
throw new ReviewedStingException("combineStrats: remapped states contains states not found in newStratifer state set");
if ( ! remappedStates.keySet().containsAll(stratifierToReplace.getAllStates()) )
throw new ReviewedStingException("combineStrats: remapped states missing mapping for some states");
// the new strats are the old ones with the single replacement
final List<K> newStrats = new ArrayList<K>(getStratifiers());
final int stratOffset = newStrats.indexOf(stratifierToReplace);
if ( stratOffset == -1 )
throw new ReviewedStingException("Could not find strat to replace " + stratifierToReplace + " in existing strats " + newStrats);
newStrats.set(stratOffset, newStratifier);
// create an empty but fully initialized new manager
final StratificationManager<K, V> combined = new StratificationManager<K, V>(newStrats);
// for each key, get its state, update it according to the map, and update the combined manager
for ( int key = 0; key < size(); key++ ) {
// the new state is just the old one with the replacement
final List<Object> newStates = new ArrayList<Object>(getStatesForKey(key));
final Object oldState = newStates.get(stratOffset);
final Object newState = remappedStates.get(oldState);
newStates.set(stratOffset, newState);
// look up the new key given the new state
final int combinedKey = combined.getKey(newStates);
if ( combinedKey == -1 ) throw new ReviewedStingException("Couldn't find key for states: " + Utils.join(",", newStates));
// combine the old value with whatever new value is in combined already
final V combinedValue = combiner.combine(combined.get(combinedKey), get(key));
// update the value associated with combined key
combined.set(combinedKey, combinedValue);
}
return combined;
}
}

View File

@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.StratificationManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -14,15 +15,23 @@ import java.util.*;
public final class EvaluationContext {
// NOTE: must be hashset to avoid O(log n) cost of iteration in the very frequently called apply function
private final HashSet<VariantEvaluator> evaluationInstances;
final VariantEvalWalker walker;
private final ArrayList<VariantEvaluator> evaluationInstances;
private final Set<Class<? extends VariantEvaluator>> evaluationClasses;
public EvaluationContext(final VariantEvalWalker walker, final Set<Class<? extends VariantEvaluator>> evaluationClasses) {
evaluationInstances = new HashSet<VariantEvaluator>(evaluationClasses.size());
this(walker, evaluationClasses, true);
}
private EvaluationContext(final VariantEvalWalker walker, final Set<Class<? extends VariantEvaluator>> evaluationClasses, final boolean doInitialize) {
this.walker = walker;
this.evaluationClasses = evaluationClasses;
this.evaluationInstances = new ArrayList<VariantEvaluator>(evaluationClasses.size());
for ( final Class<? extends VariantEvaluator> c : evaluationClasses ) {
try {
final VariantEvaluator eval = c.newInstance();
eval.initialize(walker);
if ( doInitialize ) eval.initialize(walker);
evaluationInstances.add(eval);
} catch (InstantiationException e) {
throw new ReviewedStingException("Unable to instantiate eval module '" + c.getSimpleName() + "'", e);
@ -62,4 +71,20 @@ public final class EvaluationContext {
}
}
}
public void combine(final EvaluationContext rhs) {
for ( int i = 0; i < evaluationInstances.size(); i++ )
evaluationInstances.get(i).combine(rhs.evaluationInstances.get(i));
}
public final static EvaluationContextCombiner COMBINER = new EvaluationContext.EvaluationContextCombiner();
private static class EvaluationContextCombiner implements StratificationManager.Combiner<EvaluationContext> {
@Override
public EvaluationContext combine(EvaluationContext lhs, final EvaluationContext rhs) {
if ( lhs == null )
lhs = new EvaluationContext(rhs.walker, rhs.evaluationClasses, false);
lhs.combine(rhs);
return lhs;
}
}
}

View File

@ -165,9 +165,9 @@ public class VariantDataManager {
bottomPercentage = ((float) numToAdd) / ((float) data.size());
}
int index = 0, numAdded = 0;
while( numAdded < numToAdd ) {
while( numAdded < numToAdd && index < data.size() ) {
final VariantDatum datum = data.get(index++);
if( !datum.atAntiTrainingSite && !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) {
if( datum != null && !datum.atAntiTrainingSite && !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) {
datum.atAntiTrainingSite = true;
trainingData.add( datum );
numAdded++;

View File

@ -157,6 +157,12 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="minimumN", shortName="minN", doc="Combine variants and output site only if the variant is present in at least N input files.", required=false)
public int minimumN = 1;
/**
* This option allows the suppression of the command line in the VCF header. This is most often usefully when combining variants for dozens or hundreds of smaller VCFs.
*/
@Argument(fullName="suppressCommandLineHeader", shortName="suppressCommandLineHeader", doc="If true, do not output the header containing the command line used", required=false)
public boolean SUPPRESS_COMMAND_LINE_HEADER = false;
@Hidden
@Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false)
public boolean MERGE_INFO_WITH_MAX_AC = false;
@ -183,7 +189,9 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
if ( SET_KEY != null )
headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record in CombineVariants"));
vcfWriter.writeHeader(new VCFHeader(headerLines, sitesOnlyVCF ? Collections.<String>emptySet() : samples));
VCFHeader vcfHeader = new VCFHeader(headerLines, sitesOnlyVCF ? Collections.<String>emptySet() : samples);
vcfHeader.setWriteCommandLine(!SUPPRESS_COMMAND_LINE_HEADER);
vcfWriter.writeHeader(vcfHeader);
if ( vcfWriter instanceof VCFWriterStub) {
sitesOnlyVCF = ((VCFWriterStub)vcfWriter).doNotWriteGenotypes();

View File

@ -0,0 +1,250 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.apache.commons.io.FilenameUtils;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
import org.broadinstitute.sting.utils.text.ListFileUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import java.io.File;
import java.util.*;
/**
* Selects headers from a VCF source.
* <p/>
* <p>
* Often, a VCF containing many headers will need to be subset in order to facilitate certain formatting guidelines.
* SelectHeaders can be used for this purpose. Given a single VCF file, one or more headers can be extracted from the
* file (based on a complete header name or a pattern match).
* <p/>
* <h2>Input</h2>
* <p>
* A set of VCFs.
* </p>
* <p/>
* <h2>Output</h2>
* <p>
* A header selected VCF.
* </p>
* <p/>
* <h2>Examples</h2>
* <pre>
* Select only the FILTER, FORMAT, and INFO headers:
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T SelectHeaders \
* --variant input.vcf \
* -o output.vcf \
* -hn FILTER \
* -hn FORMAT \
* -hn INFO
*
* Select only the FILTER, FORMAT, and INFO headers and add in the reference file names:
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T SelectHeaders \
* --variant input.vcf \
* -o output.vcf \
* -hn FILTER \
* -hn FORMAT \
* -hn INFO \
* -irn \
* -iln
*
* Select only the FILTER, FORMAT, and INFO headers, plus any headers with SnpEff:
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T SelectHeaders \
* --variant input.vcf \
* -o output.vcf \
* -hn FILTER \
* -hn FORMAT \
* -hn INFO \
* -he '.*SnpEff.*'
* </pre>
*/
@SuppressWarnings("unused")
public class SelectHeaders extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@Output(doc = "File to which variants should be written", required = true)
protected VCFWriter vcfWriter;
@Argument(fullName = "header_name", shortName = "hn", doc = "Include header. Can be specified multiple times", required = false)
public Set<String> headerNames;
@Argument(fullName = "header_expression", shortName = "he", doc = "Regular expression to select many headers from the tracks provided. Can be specified multiple times", required = false)
public Set<String> headerExpressions;
/**
* Note that header exclusion takes precedence over inclusion, so that if a header is in both lists it will be excluded.
*/
@Argument(fullName = "exclude_header_name", shortName = "xl_hn", doc = "Exclude header. Can be specified multiple times", required = false)
public Set<String> XLheaderNames;
/**
* Note that reference inclusion takes precedence over other header matching. If set other reference lines may be excluded but the file name will still be added.
*/
@Argument(fullName = "include_reference_name", shortName = "irn", doc = "If set the reference file name minus the file extension will be added to the headers", required = false)
public boolean includeReference;
/**
* Note that interval name inclusion takes precedence over other header matching. If set other interval lines may be excluded but the intervals will still be added.
*/
@Argument(fullName = "include_interval_names", shortName = "iln", doc = "If set the interval file name minus the file extension, or the command line intervals, will be added to the headers", required = false)
public boolean includeIntervals;
/**
* Note that engine header inclusion takes precedence over other header matching. If set other engine lines may be excluded but the intervals will still be added.
*/
@Hidden // TODO: Determine if others find this valuable and either remove @Hidden or remove -ieh.
@Argument(fullName = "include_engine_headers", shortName = "ieh", doc = "If set the headers normally output by the engine will be added to the headers", required = false)
public boolean includeEngineHeaders;
private static final ListFileUtils.StringConverter<VCFHeaderLine> headerKey = new ListFileUtils.StringConverter<VCFHeaderLine>() {
@Override
public String convert(VCFHeaderLine value) {
return value.getKey();
}
};
/**
* Set up the VCF writer, the header expressions and regexps
*/
@Override
public void initialize() {
// Get list of samples to include in the output
List<String> rodNames = Arrays.asList(variantCollection.variants.getName());
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
headerLines.add(new VCFHeaderLine(VCFHeader.SOURCE_KEY, "SelectHeaders"));
// Select only the headers requested by name or expression.
headerLines = new LinkedHashSet<VCFHeaderLine>(getSelectedHeaders(headerLines));
// Optionally add in the reference.
if (includeReference && getToolkit().getArguments().referenceFile != null)
headerLines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, FilenameUtils.getBaseName(getToolkit().getArguments().referenceFile.getName())));
// Optionally add in the intervals.
if (includeIntervals && getToolkit().getArguments().intervals != null) {
for (IntervalBinding<Feature> intervalBinding : getToolkit().getArguments().intervals) {
String source = intervalBinding.getSource();
if (source == null)
continue;
File file = new File(source);
if (file.exists()) {
headerLines.add(new VCFHeaderLine(VCFHeader.INTERVALS_KEY, FilenameUtils.getBaseName(file.getName())));
} else {
headerLines.add(new VCFHeaderLine(VCFHeader.INTERVALS_KEY, source));
}
}
}
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
VCFHeader vcfHeader = new VCFHeader(headerLines, vcfSamples);
vcfHeader.setWriteEngineHeaders(includeEngineHeaders);
vcfWriter.writeHeader(vcfHeader);
}
private Set<VCFHeaderLine> getSelectedHeaders(Set<VCFHeaderLine> headerLines) {
Set<VCFHeaderLine> selectedHeaders = new TreeSet<VCFHeaderLine>();
if (headerNames == null && headerExpressions == null) {
// Include everything if nothing was explicitly included.
selectedHeaders.addAll(headerLines);
} else {
// Only include the selected headers.
if (headerNames != null)
selectedHeaders.addAll(ListFileUtils.includeMatching(headerLines, headerKey, headerNames, true));
if (headerExpressions != null)
selectedHeaders.addAll(ListFileUtils.includeMatching(headerLines, headerKey, headerExpressions, false));
}
// Remove any excluded headers.
if (XLheaderNames != null)
selectedHeaders = ListFileUtils.excludeMatching(selectedHeaders, headerKey, XLheaderNames, true);
return selectedHeaders;
}
/**
* Pass through the VC record
*
* @param tracker the ROD tracker
* @param ref reference information
* @param context alignment info
* @return number of records processed
*/
@Override
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
int count = 0;
if (tracker != null) {
Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation());
if (vcs != null) {
for (VariantContext vc : vcs) {
vcfWriter.add(vc);
count++;
}
}
}
return count;
}
@Override
public Integer reduceInit() {
return 0;
}
@Override
public Integer reduce(Integer value, Integer sum) {
return value + sum;
}
@Override
public Integer treeReduce(Integer lhs, Integer rhs) {
return lhs + rhs;
}
@Override
public void onTraversalDone(Integer result) {
logger.info(result + " records processed.");
}
}

View File

@ -194,6 +194,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
*/
private static final List<String> gatkPackages = Arrays.asList(
"org.broadinstitute.sting.gatk",
"org.broadinstitute.sting.pipeline",
"org.broadinstitute.sting.analyzecovariates",
"org.broadinstitute.sting.gatk.datasources.reads.utilities");
@ -251,7 +252,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
*/
private void writeFilter(String className, List<? extends ArgumentField> argumentFields, Set<Class<?>> dependents) throws IOException {
String content = getContent(TRAIT_TEMPLATE, "org.broadinstitute.sting.queue.function.CommandLineFunction",
className, "", false, String.format(" + \" -read_filter %s\"", className), argumentFields, dependents);
className, "", false, String.format(" + \" --read_filter %s\"", className), argumentFields, dependents);
writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content);
}

View File

@ -52,7 +52,7 @@ public class PairHMM {
}
public void initializeArrays(final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray,
public static void initializeArrays(final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray,
final int X_METRIC_LENGTH) {
for( int iii=0; iii < X_METRIC_LENGTH; iii++ ) {

View File

@ -0,0 +1,90 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.R;
import org.apache.commons.lang.StringUtils;
import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.Date;
public class RUtils {
/**
* Converts a collection of values to an R compatible list. A null list will return NA,
* otherwise the values will be escaped with single quotes and combined with c().
* @param list Collection of values
* @return The R representation of the list
*/
public static String toStringList(Collection<? extends CharSequence> list) {
if (list == null)
return "NA";
if (list.size() == 0)
return "c()";
return "c('" + StringUtils.join(list, "','") + "')";
}
/**
* Converts a collection of values to an R compatible list. A null list will return NA,
* otherwise the values will be combined with c().
* @param list Collection of values
* @return The R representation of the list
*/
public static String toNumberList(Collection<? extends Number> list) {
return list == null ? "NA": "c(" + StringUtils.join(list, ",") + ")";
}
/**
* Converts a collection of values to an R compatible list. A null list will return NA,
* otherwise the date will be escaped with single quotes and combined with c().
* @param list Collection of values
* @return The R representation of the list
*/
public static String toDateList(Collection<? extends Date> list) {
return toDateList(list, "''yyyy-MM-dd''");
}
/**
* Converts a collection of values to an R compatible list formatted by pattern.
* @param list Collection of values
* @param pattern format pattern string for each date
* @return The R representation of the list
*/
public static String toDateList(Collection<? extends Date> list, String pattern) {
if (list == null)
return "NA";
SimpleDateFormat format = new SimpleDateFormat(pattern);
StringBuilder sb = new StringBuilder();
sb.append("c(");
boolean first = true;
for (Date date : list) {
if (!first) sb.append(",");
sb.append(format.format(date));
first = false;
}
sb.append(")");
return sb.toString();
}
}

View File

@ -31,14 +31,13 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.text.ListFileUtils;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
@ -74,10 +73,10 @@ public class SampleUtils {
* Same as @link getSAMFileSamples but gets all of the samples
* in the SAM files loaded by the engine
*
* @param engine
* @return
* @param engine engine
* @return samples
*/
public final static Set<String> getSAMFileSamples(GenomeAnalysisEngine engine) {
public static Set<String> getSAMFileSamples(GenomeAnalysisEngine engine) {
return SampleUtils.getSAMFileSamples(engine.getSAMFileHeader());
}
@ -209,89 +208,24 @@ public class SampleUtils {
* we try to read a file named E from disk, and if possible all lines from that file are expanded
* into unique sample names.
*
* @param sampleArgs
* @return
* @param sampleArgs args
* @return samples
*/
public static Set<String> getSamplesFromCommandLineInput(Collection<String> sampleArgs) {
if (sampleArgs != null) {
// Let's first go through the list and see if we were given any files. We'll add every entry in the file to our
// sample list set, and treat the entries as if they had been specified on the command line.
Set<String> samplesFromFiles = new HashSet<String>();
for (String SAMPLE_EXPRESSION : sampleArgs) {
File sampleFile = new File(SAMPLE_EXPRESSION);
try {
XReadLines reader = new XReadLines(sampleFile);
List<String> lines = reader.readLines();
for (String line : lines) {
samplesFromFiles.add(line.trim());
}
} catch (FileNotFoundException e) {
samplesFromFiles.add(SAMPLE_EXPRESSION); // not a file, so must be a sample
}
}
return samplesFromFiles;
return ListFileUtils.unpackSet(sampleArgs);
}
return new HashSet<String>();
}
public static Set<String> getSamplesFromCommandLineInput(Collection<String> vcfSamples, Collection<String> sampleExpressions) {
Set<String> samples = new HashSet<String>();
if (sampleExpressions != null) {
// Let's first go through the list and see if we were given any files. We'll add every entry in the file to our
// sample list set, and treat the entries as if they had been specified on the command line.
Set<String> samplesFromFiles = new HashSet<String>();
for (String sampleExpression : sampleExpressions) {
File sampleFile = new File(sampleExpression);
try {
XReadLines reader = new XReadLines(sampleFile);
List<String> lines = reader.readLines();
for (String line : lines) {
samplesFromFiles.add(line);
}
} catch (FileNotFoundException e) {
// ignore exception
}
}
sampleExpressions.addAll(samplesFromFiles);
// Let's now assume that the values in sampleExpressions are literal sample names and not regular
// expressions. Extract those samples specifically so we don't make the mistake of selecting more
// than what the user really wants.
Set<String> possibleSampleRegexs = new HashSet<String>();
for (String sampleExpression : sampleExpressions) {
if (!(new File(sampleExpression).exists())) {
if (vcfSamples.contains(sampleExpression)) {
samples.add(sampleExpression);
} else {
possibleSampleRegexs.add(sampleExpression);
}
}
}
// Now, check the expressions that weren't used in the previous step, and use them as if they're regular expressions
for (String sampleRegex : possibleSampleRegexs) {
Pattern p = Pattern.compile(sampleRegex);
for (String vcfSample : vcfSamples) {
Matcher m = p.matcher(vcfSample);
if (m.find()) {
samples.add(vcfSample);
}
}
}
Set<String> samples = ListFileUtils.unpackSet(vcfSamples);
if (sampleExpressions == null) {
return samples;
} else {
samples.addAll(vcfSamples);
return ListFileUtils.includeMatching(samples, sampleExpressions, false);
}
return samples;
}
/**
@ -304,16 +238,7 @@ public class SampleUtils {
// Now, check the expressions that weren't used in the previous step, and use them as if they're regular expressions
Set<String> samples = new HashSet<String>();
if (sampleExpressions != null) {
for (String expression : sampleExpressions) {
Pattern p = Pattern.compile(expression);
for (String originalSample : originalSamples) {
Matcher m = p.matcher(originalSample);
if (m.find()) {
samples.add(originalSample);
}
}
}
samples.addAll(ListFileUtils.includeMatching(originalSamples, sampleExpressions, false));
}
return samples;
}

View File

@ -750,4 +750,18 @@ public class Utils {
public static String formattedRatio(final long num, final long denom) {
return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
}
/**
* Create a constant map that maps each value in values to itself
* @param values
* @param <T>
* @return
*/
public static <T> Map<T, T> makeIdentityFunctionMap(Collection<T> values) {
Map<T,T> map = new HashMap<T, T>(values.size());
for ( final T value : values )
map.put(value, value);
return Collections.unmodifiableMap(map);
}
}

View File

@ -15,7 +15,7 @@ import java.util.ArrayList;
* Date: 1/4/12
*/
public class ActiveRegion implements HasGenomeLocation {
public class ActiveRegion implements HasGenomeLocation, Comparable<ActiveRegion> {
private final ArrayList<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
private final GenomeLoc activeRegionLoc;
@ -73,6 +73,11 @@ public class ActiveRegion implements HasGenomeLocation {
Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases();
}
@Override
public int compareTo( final ActiveRegion other ) {
return this.getLocation().compareTo(other.getLocation());
}
@Override
public GenomeLoc getLocation() { return activeRegionLoc; }
public GenomeLoc getExtendedLoc() { return extendedLoc; }

View File

@ -24,8 +24,10 @@
package org.broadinstitute.sting.utils.activeregion;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.ArrayList;
@ -45,8 +47,16 @@ public class ActivityProfile {
final boolean presetRegions;
GenomeLoc regionStartLoc = null;
final List<Double> isActiveList;
private GenomeLoc lastLoc = null;
private static final int FILTER_SIZE = 65;
private static final Double[] GaussianKernel;
static {
GaussianKernel = new Double[2*FILTER_SIZE + 1];
for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) {
GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 40.0, iii);
}
}
// todo -- add upfront the start and stop of the intervals
// todo -- check that no regions are unexpectedly missing
@ -85,15 +95,13 @@ public class ActivityProfile {
public ActivityProfile bandPassFilter() {
final Double[] activeProbArray = isActiveList.toArray(new Double[isActiveList.size()]);
final Double[] filteredProbArray = new Double[activeProbArray.length];
final int FILTER_SIZE = ( presetRegions ? 0 : 50 ); // TODO: needs to be set-able by the walker author
for( int iii = 0; iii < activeProbArray.length; iii++ ) {
double maxVal = 0;
for( int jjj = Math.max(0, iii-FILTER_SIZE); jjj < Math.min(isActiveList.size(), iii+FILTER_SIZE+1); jjj++ ) {
if( activeProbArray[jjj] > maxVal ) { maxVal = activeProbArray[jjj]; }
if( !presetRegions ) {
for( int iii = 0; iii < activeProbArray.length; iii++ ) {
final Double[] kernel = (Double[]) ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii));
final Double[] activeProbSubArray = (Double[]) ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1));
filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel);
}
filteredProbArray[iii] = maxVal;
}
return new ActivityProfile(parser, presetRegions, Arrays.asList(filteredProbArray), regionStartLoc);
}
@ -102,9 +110,9 @@ public class ActivityProfile {
* @param activeRegionExtension
* @return
*/
public List<ActiveRegion> createActiveRegions( final int activeRegionExtension ) {
final int MAX_ACTIVE_REGION = ( presetRegions ? 16001 : 425 ); // TODO: needs to be set-able by the walker author
final double ACTIVE_PROB_THRESHOLD = 0.2; // TODO: needs to be set-able by the walker author
public List<ActiveRegion> createActiveRegions( final int activeRegionExtension, final int maxRegionSize ) {
final double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author
final ArrayList<ActiveRegion> returnList = new ArrayList<ActiveRegion>();
if( isActiveList.size() == 0 ) {
// no elements in the active list, just return an empty one
@ -112,25 +120,22 @@ public class ActivityProfile {
} else if( isActiveList.size() == 1 ) {
// there's a single element, it's either active or inactive
boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD;
final ActiveRegion region = createActiveRegion(isActive, 0, 0, activeRegionExtension );
return Collections.singletonList(region);
returnList.addAll(createActiveRegion(isActive, 0, 0, activeRegionExtension, maxRegionSize));
} else {
// there are 2+ elements, divide these up into regions
final ArrayList<ActiveRegion> returnList = new ArrayList<ActiveRegion>();
boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD;
int curStart = 0;
for(int iii = 1; iii < isActiveList.size(); iii++ ) {
final boolean thisStatus = isActiveList.get(iii) > ACTIVE_PROB_THRESHOLD;
if( isActive != thisStatus || (iii-curStart) > MAX_ACTIVE_REGION ) {
returnList.add( createActiveRegion(isActive, curStart, iii-1, activeRegionExtension) );
if( isActive != thisStatus ) {
returnList.addAll(createActiveRegion(isActive, curStart, iii - 1, activeRegionExtension, maxRegionSize));
isActive = thisStatus;
curStart = iii;
}
}
returnList.add( createActiveRegion(isActive, curStart, isActiveList.size()-1, activeRegionExtension) ); // close out the current active region
return returnList;
returnList.addAll(createActiveRegion(isActive, curStart, isActiveList.size() - 1, activeRegionExtension, maxRegionSize)); // close out the current active region
}
return returnList;
}
/**
@ -141,8 +146,25 @@ public class ActivityProfile {
* @param activeRegionExtension
* @return a fully initialized ActiveRegion with the above properties
*/
private final ActiveRegion createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension) {
final GenomeLoc loc = parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getStart() + curStart, regionStartLoc.getStart() + curEnd);
return new ActiveRegion( loc, isActive, parser, activeRegionExtension );
private final List<ActiveRegion> createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) {
return createActiveRegion(isActive, curStart, curEnd, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
}
private final List<ActiveRegion> createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize, final List<ActiveRegion> returnList) {
if( !isActive || curEnd - curStart < maxRegionSize ) {
final GenomeLoc loc = parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getStart() + curStart, regionStartLoc.getStart() + curEnd);
returnList.add(new ActiveRegion(loc, isActive, parser, activeRegionExtension));
return returnList;
}
// find the best place to break up the large active region
Double minProb = Double.MAX_VALUE;
int cutPoint = -1;
for( int iii = curStart + 45; iii < curEnd - 45; iii++ ) { // BUGBUG: assumes maxRegionSize >> 45
if( isActiveList.get(iii) < minProb ) { minProb = isActiveList.get(iii); cutPoint = iii; }
}
final List<ActiveRegion> leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
final List<ActiveRegion> rightList = createActiveRegion(isActive, cutPoint, curEnd, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
returnList.addAll( leftList );
returnList.addAll( rightList );
return returnList;
}
}

View File

@ -210,7 +210,12 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
final List<Allele> alleles = parseAlleles(ref, alts, lineNo);
// find out our location
final int start = Integer.valueOf(locParts[1]);
int start = 0;
try {
start = Integer.valueOf(locParts[1]);
} catch (Exception e) {
generateException("the value in the POS field must be an integer but it was " + locParts[1], lineNo);
}
int stop = start;
// ref alleles don't need to be single bases for monomorphic sites

View File

@ -1,5 +1,28 @@
package org.broadinstitute.sting.utils.codecs.vcf;
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.vcf;
import org.broad.tribble.util.ParsingUtils;
@ -35,6 +58,11 @@ public class VCFHeader {
// the header string indicator
public static final String HEADER_INDICATOR = "#";
public static final String SOURCE_KEY = "source";
public static final String REFERENCE_KEY = "reference";
public static final String CONTIG_KEY = "contig";
public static final String INTERVALS_KEY = "intervals";
// were the input samples sorted originally (or are we sorting them)?
private boolean samplesWereAlreadySorted = true;
@ -42,6 +70,8 @@ public class VCFHeader {
protected ArrayList<String> sampleNamesInOrder = null;
protected HashMap<String, Integer> sampleNameToOffset = null;
private boolean writeEngineHeaders = true;
private boolean writeCommandLine = true;
/**
* create a VCF header, given a list of meta data and auxillary tags
@ -79,6 +109,7 @@ public class VCFHeader {
* using this header (i.e., read by the VCFCodec) will have genotypes
* occurring in the same order
*
* @param genotypeSampleNamesInAppearenceOrder genotype sample names
*/
protected void buildVCFReaderMaps(List<String> genotypeSampleNamesInAppearenceOrder) {
@ -144,10 +175,7 @@ public class VCFHeader {
* @return a set of the header fields, in order
*/
public Set<HEADER_FIELDS> getHeaderFields() {
Set<HEADER_FIELDS> fields = new LinkedHashSet<HEADER_FIELDS>();
for (HEADER_FIELDS field : HEADER_FIELDS.values())
fields.add(field);
return fields;
return new LinkedHashSet<HEADER_FIELDS>(Arrays.asList(HEADER_FIELDS.values()));
}
/**
@ -217,7 +245,36 @@ public class VCFHeader {
public VCFHeaderLine getOtherHeaderLine(String key) {
return mOtherMetaData.get(key);
}
/**
* If true additional engine headers will be written to the VCF, otherwise only the walker headers will be output.
* @return true if additional engine headers will be written to the VCF
*/
public boolean isWriteEngineHeaders() {
return writeEngineHeaders;
}
/**
* If true additional engine headers will be written to the VCF, otherwise only the walker headers will be output.
* @param writeEngineHeaders true if additional engine headers will be written to the VCF
*/
public void setWriteEngineHeaders(boolean writeEngineHeaders) {
this.writeEngineHeaders = writeEngineHeaders;
}
/**
* If true, and isWriteEngineHeaders also returns true, the command line will be written to the VCF.
* @return true if the command line will be written to the VCF
*/
public boolean isWriteCommandLine() {
return writeCommandLine;
}
/**
* If true, and isWriteEngineHeaders also returns true, the command line will be written to the VCF.
* @param writeCommandLine true if the command line will be written to the VCF
*/
public void setWriteCommandLine(boolean writeCommandLine) {
this.writeCommandLine = writeCommandLine;
}
}

View File

@ -677,11 +677,11 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PileupElementTracker<PE> filteredElements = tracker.getElements(sampleNames);
return filteredElements != null ? (RBP) createNewPileup(loc, filteredElements) : null;
} else {
HashSet<String> hashSampleNames = new HashSet<String>(sampleNames); // to speed up the "contains" access in the for loop
HashSet<String> hashSampleNames = new HashSet<String>(sampleNames); // to speed up the "contains" access in the for loop
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for (PE p : pileupElementTracker) {
GATKSAMRecord read = p.getRead();
if (sampleNames != null) { // still checking on sampleNames because hashSampleNames will never be null. And empty means something else.
if (sampleNames != null) { // still checking on sampleNames because hashSampleNames will never be null. And empty means something else.
if (read.getReadGroup() != null && hashSampleNames.contains(read.getReadGroup().getSample()))
filteredTracker.add(p);
} else {
@ -693,6 +693,38 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
}
}
@Override
public Map<String, ReadBackedPileup> getPileupsForSamples(Collection<String> sampleNames) {
Map<String, ReadBackedPileup> result = new HashMap<String, ReadBackedPileup>();
if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
for (String sample : sampleNames) {
PileupElementTracker<PE> filteredElements = tracker.getElements(sampleNames);
if (filteredElements != null)
result.put(sample, createNewPileup(loc, filteredElements));
}
} else {
Map<String, UnifiedPileupElementTracker<PE>> trackerMap = new HashMap<String, UnifiedPileupElementTracker<PE>>();
for (String sample : sampleNames) { // initialize pileups for each sample
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
trackerMap.put(sample, filteredTracker);
}
for (PE p : pileupElementTracker) { // go through all pileup elements only once and add them to the respective sample's pileup
GATKSAMRecord read = p.getRead();
if (read.getReadGroup() != null) {
String sample = read.getReadGroup().getSample();
UnifiedPileupElementTracker<PE> tracker = trackerMap.get(sample);
if (tracker != null) // we only add the pileup the requested samples. Completely ignore the rest
tracker.add(p);
}
}
for (Map.Entry<String, UnifiedPileupElementTracker<PE>> entry : trackerMap.entrySet()) // create the RBP for each sample
result.put(entry.getKey(), createNewPileup(loc, entry.getValue()));
}
return result;
}
@Override
public RBP getPileupForSample(String sampleName) {

View File

@ -32,8 +32,6 @@ public class PileupElement implements Comparable<PileupElement> {
protected final int eventLength; // what is the length of the event (insertion or deletion) *after* this base
protected final String eventBases; // if it is a deletion, we do not have information about the actual deleted bases in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases
/**
* Creates a new pileup element.
*

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
/**
* A data retrieval interface for accessing parts of the pileup.
@ -159,6 +160,16 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
*/
public ReadBackedPileup getPileupForSamples(Collection<String> sampleNames);
/**
* Gets the particular subset of this pileup for each given sample name.
*
* Same as calling getPileupForSample for all samples, but in O(n) instead of O(n^2).
*
* @param sampleNames Name of the sample to use.
* @return A subset of this pileup containing only reads with the given sample.
*/
public Map<String, ReadBackedPileup> getPileupsForSamples(Collection<String> sampleNames);
/**
* Gets the particular subset of this pileup with the given sample name.

View File

@ -34,9 +34,9 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.io.IOException;
import java.util.*;
import java.util.regex.Pattern;
/**
* A collection of convenience methods for working with list files.
@ -54,6 +54,7 @@ public class ListFileUtils {
* LIST_FILE_COMMENT_START are ignored.
*
* @param samFiles The sam files, in string format.
* @param parser Parser
* @return a flattened list of the bam files provided
*/
public static List<SAMReaderID> unpackBAMFileList(final List<String> samFiles, final ParsingEngine parser) {
@ -63,10 +64,8 @@ public class ListFileUtils {
inputFileName = expandFileName(inputFileName);
if (inputFileName.toLowerCase().endsWith(".list") ) {
try {
for ( String fileName : new XReadLines(new File(inputFileName), true) ) {
if ( fileName.length() > 0 && ! fileName.startsWith(LIST_FILE_COMMENT_START) ) {
unpackedReads.add(new SAMReaderID(fileName,parser.getTags(inputFileName)));
}
for ( String fileName : new XReadLines(new File(inputFileName), true, LIST_FILE_COMMENT_START) ) {
unpackedReads.add(new SAMReaderID(fileName,parser.getTags(inputFileName)));
}
}
catch( FileNotFoundException ex ) {
@ -91,9 +90,11 @@ public class ListFileUtils {
/**
* Convert command-line argument representation of ROD bindings to something more easily understandable by the engine.
* @param RODBindings a text equivale
* @param parser Parser
* @return a list of expanded, bound RODs.
*/
@Deprecated
@SuppressWarnings("unused") // TODO: Who is still using this? External walkers?
public static Collection<RMDTriplet> unpackRODBindingsOldStyle(final Collection<String> RODBindings, final ParsingEngine parser) {
// todo -- this is a strange home for this code. Move into ROD system
Collection<RMDTriplet> rodBindings = new ArrayList<RMDTriplet>();
@ -112,7 +113,7 @@ public class ListFileUtils {
String name = positionalTags.get(0);
String type = positionalTags.get(1);
RMDTriplet.RMDStorageType storageType = null;
RMDTriplet.RMDStorageType storageType;
if(tags.getValue("storage") != null)
storageType = Enum.valueOf(RMDTriplet.RMDStorageType.class,tags.getValue("storage"));
else if(fileName.toLowerCase().endsWith("stdin"))
@ -129,9 +130,11 @@ public class ListFileUtils {
/**
* Convert command-line argument representation of ROD bindings to something more easily understandable by the engine.
* @param RODBindings a text equivale
* @param parser Parser
* @return a list of expanded, bound RODs.
*/
public static Collection<RMDTriplet> unpackRODBindings(final Collection<RodBinding> RODBindings, final ParsingEngine parser) {
@SuppressWarnings("unchecked")
public static Collection<RMDTriplet> unpackRODBindings(final Collection<RodBinding> RODBindings, @SuppressWarnings("unused") final ParsingEngine parser) {
// todo -- this is a strange home for this code. Move into ROD system
Collection<RMDTriplet> rodBindings = new ArrayList<RMDTriplet>();
FeatureManager builderForValidation = new FeatureManager();
@ -142,7 +145,7 @@ public class ListFileUtils {
String name = rodBinding.getName();
String type = rodBinding.getTribbleType();
RMDTriplet.RMDStorageType storageType = null;
RMDTriplet.RMDStorageType storageType;
if(rodBinding.getTags().getValue("storage") != null)
storageType = Enum.valueOf(RMDTriplet.RMDStorageType.class,rodBinding.getTags().getValue("storage"));
else if(fileName.toLowerCase().endsWith("stdin"))
@ -184,4 +187,157 @@ public class ListFileUtils {
return "/dev/stdin";
return argument;
}
/**
* Returns a new set of values, containing a final set of values expanded from values
* <p/>
* Each element E of values can either be a literal string or a file ending in .list.
* For each E ending in .list we try to read a file named E from disk, and if possible
* all lines from that file are expanded into unique values.
*
* @param values Original values
* @return entries from values or the files listed in values
*/
public static Set<String> unpackSet(Collection<String> values) {
if (values == null)
throw new NullPointerException("values cannot be null");
Set<String> unpackedValues = new LinkedHashSet<String>();
// Let's first go through the list and see if we were given any files.
// We'll add every entry in the file to our set, and treat the entries as
// if they had been specified on the command line.
for (String value : values) {
File file = new File(value);
if (value.toLowerCase().endsWith(".list") && file.exists()) {
try {
unpackedValues.addAll(new XReadLines(file, true, LIST_FILE_COMMENT_START).readLines());
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(file, e);
}
} else {
unpackedValues.add(value);
}
}
return unpackedValues;
}
/**
* Returns a new set of values including only values listed by filters
* <p/>
* Each element E of values can either be a literal string or a file. For each E,
* we try to read a file named E from disk, and if possible all lines from that file are expanded
* into unique names.
* <p/>
* Filters may also be a file of filters.
*
* @param values Values or files with values
* @param filters Filters or files with filters
* @param exactMatch If true match filters exactly, otherwise use as both exact and regular expressions
* @return entries from values or the files listed in values, filtered by filters
*/
public static Set<String> includeMatching(Collection<String> values, Collection<String> filters, boolean exactMatch) {
return includeMatching(values, IDENTITY_STRING_CONVERTER, filters, exactMatch);
}
/**
* Converts a type T to a String representation.
*
* @param <T> Type to convert to a String.
*/
public static interface StringConverter<T> {
String convert(T value);
}
/**
* Returns a new set of values including only values matching filters
* <p/>
* Filters may also be a file of filters.
* <p/>
* The converter should convert T to a unique String for each value in the set.
*
* @param values Values or files with values
* @param converter Converts values to strings
* @param filters Filters or files with filters
* @param exactMatch If true match filters exactly, otherwise use as both exact and regular expressions
* @return entries from values including only values matching filters
*/
public static <T> Set<T> includeMatching(Collection<T> values, StringConverter<T> converter, Collection<String> filters, boolean exactMatch) {
if (values == null)
throw new NullPointerException("values cannot be null");
if (converter == null)
throw new NullPointerException("converter cannot be null");
if (filters == null)
throw new NullPointerException("filters cannot be null");
Set<String> unpackedFilters = unpackSet(filters);
Set<T> filteredValues = new LinkedHashSet<T>();
Collection<Pattern> patterns = null;
if (!exactMatch)
patterns = compilePatterns(unpackedFilters);
for (T value : values) {
String converted = converter.convert(value);
if (unpackedFilters.contains(converted)) {
filteredValues.add(value);
} else if (!exactMatch) {
for (Pattern pattern : patterns)
if (pattern.matcher(converted).find())
filteredValues.add(value);
}
}
return filteredValues;
}
/**
* Returns a new set of values excluding any values matching filters.
* <p/>
* Filters may also be a file of filters.
* <p/>
* The converter should convert T to a unique String for each value in the set.
*
* @param values Values or files with values
* @param converter Converts values to strings
* @param filters Filters or files with filters
* @param exactMatch If true match filters exactly, otherwise use as both exact and regular expressions
* @return entries from values exluding any values matching filters
*/
public static <T> Set<T> excludeMatching(Collection<T> values, StringConverter<T> converter, Collection<String> filters, boolean exactMatch) {
if (values == null)
throw new NullPointerException("values cannot be null");
if (converter == null)
throw new NullPointerException("converter cannot be null");
if (filters == null)
throw new NullPointerException("filters cannot be null");
Set<String> unpackedFilters = unpackSet(filters);
Set<T> filteredValues = new LinkedHashSet<T>();
filteredValues.addAll(values);
Collection<Pattern> patterns = null;
if (!exactMatch)
patterns = compilePatterns(unpackedFilters);
for (T value : values) {
String converted = converter.convert(value);
if (unpackedFilters.contains(converted)) {
filteredValues.remove(value);
} else if (!exactMatch) {
for (Pattern pattern : patterns)
if (pattern.matcher(converted).find())
filteredValues.remove(value);
}
}
return filteredValues;
}
private static Collection<Pattern> compilePatterns(Collection<String> filters) {
Collection<Pattern> patterns = new ArrayList<Pattern>();
for (String filter: filters) {
patterns.add(Pattern.compile(filter));
}
return patterns;
}
protected static final StringConverter<String> IDENTITY_STRING_CONVERTER = new StringConverter<String>() {
@Override
public String convert(String value) {
return value;
}
};
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2010 The Broad Institute
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -12,15 +12,14 @@
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.text;
@ -48,75 +47,92 @@ import java.util.List;
* For the love of god, please use this system for reading lines in a file.
*/
public class XReadLines implements Iterator<String>, Iterable<String> {
private BufferedReader in; // The stream we're reading from
private String nextline = null; // Return value of next call to next()
private boolean trimWhitespace = true;
private final BufferedReader in; // The stream we're reading from
private String nextLine = null; // Return value of next call to next()
private final boolean trimWhitespace;
private final String commentPrefix;
public XReadLines(final File filename) throws FileNotFoundException {
this(new FileReader(filename), true, null);
}
public XReadLines(final File filename, final boolean trimWhitespace) throws FileNotFoundException {
this(new FileReader(filename), trimWhitespace, null);
}
/**
* Creates a new xReadLines object to read lines from filename
*
* @param filename
* @throws FileNotFoundException
* @param filename file name
* @param trimWhitespace trim whitespace
* @param commentPrefix prefix for comments or null if no prefix is set
* @throws FileNotFoundException when the file is not found
*/
public XReadLines(final File filename, final boolean trimWhitespace) throws FileNotFoundException {
this(new FileReader(filename), trimWhitespace);
public XReadLines(final File filename, final boolean trimWhitespace, final String commentPrefix) throws FileNotFoundException {
this(new FileReader(filename), trimWhitespace, commentPrefix);
}
public XReadLines(final File filename) throws FileNotFoundException {
this(filename, true);
public XReadLines(final InputStream inputStream) throws FileNotFoundException {
this(new InputStreamReader(inputStream), true, null);
}
/**
* Creates a new xReadLines object to read lines from fileReader
*
* @param fileReader
* @throws FileNotFoundException
*/
public XReadLines(final FileReader fileReader, final boolean trimWhitespace) throws FileNotFoundException {
this(new BufferedReader(fileReader), trimWhitespace);
}
public XReadLines(final FileReader fileReader) throws FileNotFoundException {
this(fileReader, true);
public XReadLines(final InputStream inputStream, final boolean trimWhitespace) {
this(new InputStreamReader(inputStream), trimWhitespace, null);
}
/**
* Creates a new xReadLines object to read lines from an input stream
*
* @param inputStream
* @param inputStream input stream
* @param trimWhitespace trim whitespace
* @param commentPrefix prefix for comments or null if no prefix is set
*/
public XReadLines(final InputStream inputStream, final boolean trimWhitespace) {
this(new BufferedReader(new InputStreamReader(inputStream)), trimWhitespace);
}
public XReadLines(final InputStream inputStream) throws FileNotFoundException {
this(inputStream, true);
public XReadLines(final InputStream inputStream, final boolean trimWhitespace, final String commentPrefix) {
this(new InputStreamReader(inputStream), trimWhitespace, commentPrefix);
}
/**
* Creates a new xReadLines object to read lines from an bufferedReader
* Creates a new xReadLines object to read lines from a reader
*
* @param reader
* @param reader reader
*/
public XReadLines(final Reader reader) {
this(reader, true, null);
}
/**
* Creates a new xReadLines object to read lines from an reader
*
* @param reader reader
* @param trimWhitespace trim whitespace
*/
public XReadLines(final Reader reader, final boolean trimWhitespace) {
this(reader, trimWhitespace, null);
}
/**
* Creates a new xReadLines object to read lines from an bufferedReader
*
* @param reader file name
* @param trimWhitespace trim whitespace
* @param commentPrefix prefix for comments or null if no prefix is set
*/
public XReadLines(final Reader reader, final boolean trimWhitespace, final String commentPrefix) {
this.in = (reader instanceof BufferedReader) ? (BufferedReader)reader : new BufferedReader(reader);
this.trimWhitespace = trimWhitespace;
this.commentPrefix = commentPrefix;
try {
this.in = new BufferedReader(reader);
nextline = readNextLine();
this.trimWhitespace = trimWhitespace;
this.nextLine = readNextLine();
} catch(IOException e) {
throw new IllegalArgumentException(e);
}
}
public XReadLines(final Reader reader) {
this(reader, true);
}
/**
* Reads all of the lines in the file, and returns them as a list of strings
*
* @return
* @return all of the lines in the file.
*/
public List<String> readLines() {
List<String> lines = new LinkedList<String>();
@ -128,38 +144,48 @@ public class XReadLines implements Iterator<String>, Iterable<String> {
/**
* I'm an iterator too...
* @return
* @return an iterator
*/
public Iterator<String> iterator() {
return this;
}
public boolean hasNext() {
return nextline != null;
return this.nextLine != null;
}
/**
* Actually reads the next line from the stream, not accessible publically
* @return
* Actually reads the next line from the stream, not accessible publicly
* @return the next line or null
* @throws IOException if an error occurs
*/
private String readNextLine() throws IOException {
String nextline = in.readLine(); // Read another line
if (nextline != null && trimWhitespace )
nextline = nextline.trim();
return nextline;
String nextLine;
while ((nextLine = this.in.readLine()) != null) {
if (this.trimWhitespace) {
nextLine = nextLine.trim();
if (nextLine.length() == 0)
continue;
}
if (this.commentPrefix != null)
if (nextLine.startsWith(this.commentPrefix))
continue;
break;
}
return nextLine;
}
/**
* Returns the next line (minus whitespace)
* @return
* Returns the next line (optionally minus whitespace)
* @return the next line
*/
public String next() {
try {
String result = nextline;
nextline = readNextLine();
String result = this.nextLine;
this.nextLine = readNextLine();
// If we haven't reached EOF yet
if (nextline == null) {
if (this.nextLine == null) {
in.close(); // And close on EOF
}

View File

@ -223,12 +223,12 @@ public class GenotypeLikelihoods {
/**
* The maximum number of alleles that we can represent as genotype likelihoods
*/
final static int MAX_ALLELES_THAT_CAN_BE_GENOTYPED = 50;
public final static int MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED = 50;
/*
* a cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles
*/
private final static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex = calculatePLcache(MAX_ALLELES_THAT_CAN_BE_GENOTYPED);
private final static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex = calculatePLcache(MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED);
private static GenotypeLikelihoodsAllelePair[] calculatePLcache(final int altAlleles) {
final int numLikelihoods = calculateNumLikelihoods(1+altAlleles, 2);
@ -311,7 +311,7 @@ public class GenotypeLikelihoods {
public static GenotypeLikelihoodsAllelePair getAllelePair(final int PLindex) {
// make sure that we've cached enough data
if ( PLindex >= PLIndexToAlleleIndex.length )
throw new ReviewedStingException("GATK limitation: cannot genotype more than " + MAX_ALLELES_THAT_CAN_BE_GENOTYPED + " alleles");
throw new ReviewedStingException("GATK limitation: cannot genotype more than " + MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED + " alleles");
return PLIndexToAlleleIndex[PLindex];
}

View File

@ -26,18 +26,17 @@
package org.broadinstitute.sting;
import org.apache.commons.lang.StringUtils;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.Tribble;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.gatk.CommandLineExecutable;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
@ -315,9 +314,10 @@ public class WalkerTest extends BaseTest {
// it's the type we expected
System.out.println(String.format(" => %s PASSED", name));
} else {
e.printStackTrace();
Assert.fail(String.format("Test %s expected exception %s but got %s instead",
name, expectedException, e.getClass()));
if ( e.getCause() != null )
e.getCause().printStackTrace(System.out); // must print to stdout to see the message
Assert.fail(String.format("Test %s expected exception %s but instead got %s with error message %s",
name, expectedException, e.getClass(), e.getMessage()));
}
} else {
// we didn't expect an exception but we got one :-(

View File

@ -86,13 +86,15 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------
private class EngineErrorHandlingTestProvider extends TestDataProvider {
Class expectedException;
boolean multiThreaded;
final Class expectedException;
final boolean multiThreaded;
final int iterationsToTest;
public EngineErrorHandlingTestProvider(Class exceptedException, final boolean multiThreaded) {
super(EngineErrorHandlingTestProvider.class);
this.expectedException = exceptedException;
this.multiThreaded = multiThreaded;
this.iterationsToTest = multiThreaded ? 10 : 1;
setName(String.format("Engine error handling: expected %s, is-multithreaded %b", exceptedException, multiThreaded));
}
}
@ -113,9 +115,11 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
//
@Test(dataProvider = "EngineErrorHandlingTestProvider")
public void testEngineErrorHandlingTestProvider(EngineErrorHandlingTestProvider cfg) {
final String root = "-T ErrorThrowing -R " + b37KGReference;
final String args = root + (cfg.multiThreaded ? " -nt 2" : "") + " -E " + cfg.expectedException.getSimpleName();
WalkerTestSpec spec = new WalkerTestSpec(args, 0, cfg.expectedException);
executeTest(cfg.toString(), spec);
for ( int i = 0; i < cfg.iterationsToTest; i++ ) {
final String root = "-T ErrorThrowing -R " + b37KGReference;
final String args = root + (cfg.multiThreaded ? " -nt 2" : "") + " -E " + cfg.expectedException.getSimpleName();
WalkerTestSpec spec = new WalkerTestSpec(args, 0, cfg.expectedException);
executeTest(cfg.toString(), spec);
}
}
}

View File

@ -42,13 +42,13 @@ public class GATKReportUnitTest extends BaseTest {
Assert.assertEquals(report.getTables().size(), 5);
GATKReportTable countVariants = report.getTable("CountVariants");
Object countVariantsPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.all");
Object countVariantsPK = countVariants.getPrimaryKeyByData("CountVariants", "dbsnp", "eval", "none", "all");
Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "63025520");
Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "0");
Assert.assertEquals(countVariants.get(countVariantsPK, "heterozygosity"), 4.73e-06);
GATKReportTable validationReport = report.getTable("ValidationReport");
Object validationReportPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.novel");
Object validationReportPK = countVariants.getPrimaryKeyByData("CountVariants", "dbsnp", "eval", "none", "novel");
Assert.assertEquals(validationReport.get(validationReportPK, "PPV"), Double.NaN);
}
@ -79,6 +79,49 @@ public class GATKReportUnitTest extends BaseTest {
Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'");
}
private GATKReportTable makeBasicTable() {
GATKReport report = GATKReport.newSimpleReport("TableName", "sample", "value");
GATKReportTable table = report.getTable("TableName");
report.addRow("foo.1", "hello");
report.addRow("foo.2", "world");
return table;
}
@Test
public void testDottedSampleName() {
GATKReportTable table = makeBasicTable();
Object pk;
pk = table.getPrimaryKeyByData("foo.1");
Assert.assertEquals(table.get(pk, "value"), "hello");
pk = table.getPrimaryKeyByData("foo.2");
Assert.assertEquals(table.get(pk, "value"), "world");
}
@Test
public void testFindPrimaryKeyByData() {
GATKReportTable table = makeBasicTable();
Assert.assertNotNull(table.findPrimaryKeyByData("foo.1"));
Assert.assertNotNull(table.findPrimaryKeyByData("foo.1", "hello"));
Assert.assertNotNull(table.findPrimaryKeyByData("foo.2"));
Assert.assertNotNull(table.findPrimaryKeyByData("foo.2", "world"));
Assert.assertNull(table.findPrimaryKeyByData("list", "longer", "than", "column", "count"));
Assert.assertNull(table.findPrimaryKeyByData("short"));
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testEmptyFindPrimaryKeyByData() {
GATKReportTable table = makeBasicTable();
table.findPrimaryKeyByData();
}
@Test(expectedExceptions = NullPointerException.class)
public void testNullFindPrimaryKeyByData() {
GATKReportTable table = makeBasicTable();
table.findPrimaryKeyByData((Object[]) null);
}
@Test
public void testSimpleGATKReport() {
// Create a new simple GATK report named "TableName" with columns: Roger, is, and Awesome

View File

@ -0,0 +1,20 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.util.Arrays;
public class FlagStatIntegrationTest extends WalkerTest {
@Test
public void testFlagStat() {
String md5 = "9c4039662f24bfd23ccf67973cb5df29";
WalkerTestSpec spec = new WalkerTestSpec(
"-T FlagStat -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s",
1,
Arrays.asList(md5));
executeTest("test flag stat", spec);
}
}

View File

@ -38,7 +38,7 @@ public class CountReadsInActiveRegionsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CountReadsInActiveRegions -R " + b37KGReference + " -I " + b37GoodNA12878BAM + " -L 20:10,000,000-10,200,000 -o %s",
1,
Arrays.asList("fcd581aa6befe85c7297509fa7b34edf"));
Arrays.asList("1e9e8d637d2acde23fa99fe9dc07e3e2"));
executeTest("CountReadsInActiveRegions:", spec);
}
}

View File

@ -94,4 +94,18 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
}
}
@Test
public void testLargeGLs() {
final double[] BB = new double[]{-20000000.0, -20000000.0, 0.0};
GetGLsTest cfg = new GetGLsTest("B6", 1, createGenotype("1", BB), createGenotype("2", BB), createGenotype("3", BB));
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2);
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result);
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[0];
Assert.assertEquals(calculatedAlleleCount, 6);
}
}

View File

@ -122,16 +122,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
@Test
public void testCallingParameters() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "--min_base_quality_score 26", "258c1b33349eb3b2d395ec4d69302725" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 " + entry.getKey(), 1,
Arrays.asList(entry.getValue()));
executeTest(String.format("test calling parameter[%s]", entry.getKey()), spec);
}
public void testMinBaseQualityScore() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
Arrays.asList("258c1b33349eb3b2d395ec4d69302725"));
executeTest("test min_base_quality_score 26", spec);
}
@Test
@ -142,6 +137,22 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
executeTest("test SLOD", spec);
}
@Test
public void testNDA() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("443b2f8882393c4c65277c34cdb6060c"));
executeTest("test NDA", spec);
}
@Test
public void testCompTrack() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("71251d8893649ea9abd5d9aa65739ba1"));
executeTest("test using comp track", spec);
}
@Test
public void testOutputParameter() {
HashMap<String, String> e = new HashMap<String, String>();

View File

@ -302,7 +302,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("4c00cfa0fd343fef62d19af0edeb4f65"));
1, Arrays.asList("8d4530e9cef8531c46bbb693b84d04c7"));
executeTestParallel("testSelect1", spec);
}
@ -330,7 +330,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testCompVsEvalAC() {
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("4df6654860ad63b7e24e6bc5fbbbcb00"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("bb076f7239039191fde883c5e68483ea"));
executeTestParallel("testCompVsEvalAC",spec);
}
@ -360,7 +360,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --dbsnp " + b37dbSNP132 +
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("3b85cd0fa37539ff51d34e026f26fef2"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9d24f34d94d74417e00e3b7bcf84650f"));
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
}
@ -372,7 +372,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("bed8751c773b9568218f78c90f13348a"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("7329b0bc73c9ccaf5facd754f3410c38"));
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
}
@ -488,7 +488,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("9726c0c8f19d271cf680f5f16f0926b3")
Arrays.asList("aad01b26198b30da5d59a05c08d863bb")
);
executeTest("testModernVCFWithLargeIndels", spec);
}
@ -508,7 +508,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("c89705147ef4233d5de3a539469bd1d1")
Arrays.asList("4fa2557663ef8fb4cdeecd667791985c")
);
executeTest("testStandardIndelEval", spec);
}

View File

@ -0,0 +1,277 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
// our package
package org.broadinstitute.sting.gatk.walkers.varianteval;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.StratificationManager;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
public class VariantEvalWalkerUnitTest extends BaseTest {
VariantEvalWalker VEwalker;
VariantContext eval;
@BeforeMethod
public void init() {
VEwalker = new VariantEvalWalker();
eval = new VariantContextBuilder("x", "chr1", 1, 1, Collections.singleton(Allele.create("A", true))).make();
}
// --------------------------------------------------------------------------------
//
// Test stratifications / evaluations
//
// --------------------------------------------------------------------------------
private class StratifiedEvalTestProvider extends TestDataProvider {
final List<VariantStratifier> stratificationObjects = new ArrayList<VariantStratifier>();
final Set<Class<? extends VariantEvaluator>> evaluationObjects = new HashSet<Class<? extends VariantEvaluator>>();
final List<Integer> expectedCounts;
final int maxI;
/**
*
* @param maxI test integers from 1 ... maxI
* @param expectedCounts the expected number of integers from 1 ... maxI divisible by each combination, in order, of allStates
* @param allStates all stratification tests, in order
*/
public StratifiedEvalTestProvider(int maxI,
final List<Integer> expectedCounts,
final List<Integer> ... allStates) {
super(StratifiedEvalTestProvider.class);
this.maxI = maxI;
this.expectedCounts = expectedCounts;
this.evaluationObjects.add(CounterEval.class);
String stateName = "";
for ( List<Integer> states : allStates ) {
stratificationObjects.add(new IntegerStratifier(states));
stateName = stateName + Utils.join(",", states) + " ";
}
setName(String.format("maxI=%d expectedCounts=%s states=%s", maxI, Utils.join(",", expectedCounts), stateName));
}
}
/**
* Test stratifier -> holds a list of integers, and the states are if the integer value of evalName is divisable
* by that number
*/
public static class IntegerStratifier extends VariantStratifier {
final List<Integer> integers;
private IntegerStratifier(final List<Integer> integers) {
this.integers = integers;
initialize();
}
@Override
public void initialize() {
states.addAll(integers);
}
@Override
public List<Object> getRelevantStates(final ReferenceContext ref, final RefMetaDataTracker tracker, final VariantContext comp, final String compName, final VariantContext eval, final String evalName, final String sampleName) {
int i = Integer.valueOf(evalName); // a terrible hack, but we can now provide accessible states
List<Object> states = new ArrayList<Object>();
for ( int state : integers )
if ( i % state == 0 )
states.add(state);
return states;
}
}
/**
* Test evaluator -> just counts the number of calls to update1
*/
public static class CounterEval extends VariantEvaluator {
public int count = 0;
@Override public int getComparisonOrder() { return 1; }
@Override
public void update1(final VariantContext eval, final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
count++;
}
@Override
public boolean supportsCombine() {
return true;
}
@Override
public void combine(final VariantEvaluator other) {
this.count += ((CounterEval)other).count;
}
}
private void initialize(StratifiedEvalTestProvider cfg) {
VEwalker.createStratificationStates(cfg.stratificationObjects, cfg.evaluationObjects);
final RefMetaDataTracker tracker = new RefMetaDataTracker();
final ReferenceContext ref = null;
final VariantContext comp = null;
final String compName = null, sampleName = null;
// increment eval counts for each stratification of divisors of i from from 1...maxI
for ( int i = 1; i <= cfg.maxI; i++ ) {
final String evalName = String.valueOf(i); // terrible hack to stratify by divisor
for ( EvaluationContext nec : VEwalker.getEvaluationContexts(tracker, ref, eval, evalName, comp, compName, sampleName) ) {
synchronized (nec) {
nec.apply(tracker, ref, null, comp, eval);
}
}
}
}
@DataProvider(name = "StratifiedEvalTestProvider")
public Object[][] makeStratifiedEvalTestProvider() {
new StratifiedEvalTestProvider(4, // test 1, 2, 3, 4
Arrays.asList(4, 2), // 4 divisible by 1, 2 by 2
Arrays.asList(1, 2));
new StratifiedEvalTestProvider(6, // test 1, 2, 3, 4, 5, 6
Arrays.asList(6, 3, 2), // 6 divisible by 1, 3 by 2, 2 by 3
Arrays.asList(1, 2, 3));
// test that some states can be empty -- does this work in VE?
new StratifiedEvalTestProvider(6,
Arrays.asList(3, 2),
Arrays.asList(2, 3));
// test a single stratification
new StratifiedEvalTestProvider(6,
Arrays.asList(3),
Arrays.asList(2));
// test a meaningless state
new StratifiedEvalTestProvider(4, // test 1, 2, 3, 4
Arrays.asList(4, 2), // 4 divisible by 1, 2 by 2
Arrays.asList(1, 2), Arrays.asList(1));
// test a adding a state that divides space in half
new StratifiedEvalTestProvider(4,
Arrays.asList(2, 2),
Arrays.asList(1, 2), Arrays.asList(2));
// test pairs of strats
new StratifiedEvalTestProvider(12,
Arrays.asList(4, 3, 2, 3),
Arrays.asList(1, 2), Arrays.asList(3, 4));
return StratifiedEvalTestProvider.getTests(StratifiedEvalTestProvider.class);
}
/**
* Ensures that counting and stratifications all are working properly by iterating
* over integers 1...cfg.N and stratify according to cfg, and that the counts in
* each bin are as expected.
*
* @param cfg
*/
@Test(dataProvider = "StratifiedEvalTestProvider")
public void testBasicOperation(StratifiedEvalTestProvider cfg) {
initialize(cfg);
checkStratificationCountsAreExpected(VEwalker.stratManager, cfg.expectedCounts);
}
private final void checkStratificationCountsAreExpected(final StratificationManager<VariantStratifier, EvaluationContext> manager,
final List<Integer> expectedCounts) {
for ( int key = 0; key < manager.size(); key++ ) {
final String stratStateString = manager.getStratsAndStatesStringForKey(key);
final EvaluationContext nec = manager.get(key);
for ( final VariantEvaluator ve : nec.getVariantEvaluators() ) {
// test for count here
final CounterEval counterEval = (CounterEval)ve;
final int expected = expectedCounts.get(key);
Assert.assertEquals(counterEval.count, expected, "Count seen of " + counterEval.count + " not expected " + expected + " at " + stratStateString);
}
}
}
/**
* A derived test on testBasicOperation that checks that combining stratifications
* works as expected by ensuring the results are the same when the remapped
* strats are the identity map (A -> A, B -> B, etc)
*/
@Test(dataProvider = "StratifiedEvalTestProvider", dependsOnMethods = {"testBasicOperation"})
public void testIdentityCombine(StratifiedEvalTestProvider cfg) {
for ( int i = 0; i < cfg.stratificationObjects.size(); i++ ) {
initialize(cfg);
final VariantStratifier toReplace = cfg.stratificationObjects.get(i);
final VariantStratifier newStrat = cfg.stratificationObjects.get(i);
final Map<Object, Object> remappedStates = Utils.makeIdentityFunctionMap(newStrat.getAllStates());
StratificationManager<VariantStratifier, EvaluationContext> combined =
VEwalker.stratManager.combineStrats(toReplace, newStrat, EvaluationContext.COMBINER, remappedStates);
checkStratificationCountsAreExpected(combined, cfg.expectedCounts);
}
}
// /**
// * A derived test on testBasicOperation that checks that combining stratifications
// * works as expected. We look into cfg, and if there are multiple states we create
// * dynamically create a combinations of the stratifications, and ensure that the
// * combined results are as we expected.
// */
// @Test(dataProvider = "StratifiedEvalTestProvider", dependsOnMethods = {"testBasicOperation"})
// public void testCombinedEachStrat(StratifiedEvalTestProvider cfg) {
// for ( int i = 0; i < cfg.stratificationObjects.size(); i++ ) {
// initialize(cfg);
// final VariantStratifier toReplace = cfg.stratificationObjects.get(i);
//
// // TODO -- replace this code with something that combines values in strat
// final VariantStratifier newStrat = cfg.stratificationObjects.get(i);
// final Map<Object, Object> remappedStates = Utils.makeIdentityFunctionMap(newStrat.getAllStates());
// final List<Integer> expected = cfg.expectedCounts;
//
// StratificationManager<VariantStratifier, EvaluationContext> combined =
// VEwalker.stratManager.combineStrats(toReplace, newStrat, EvaluationContext.COMBINER, remappedStates);
// checkStratificationCountsAreExpected(combined, expected);
// }
// }
}

View File

@ -0,0 +1,64 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.R;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
public class RUtilsUnitTest {
@DataProvider(name = "stringLists")
public Object[][] getStringLists() {
return new Object[][] {
new Object[] { null, "NA" },
new Object[] { Collections.EMPTY_LIST, "c()" },
new Object[] { Arrays.asList("1", "2", "3"), "c('1','2','3')" }
};
}
@Test(dataProvider = "stringLists")
public void testToStringList(List<? extends CharSequence> actual, String expected) {
Assert.assertEquals(RUtils.toStringList(actual), expected);
}
@DataProvider(name = "numberLists")
public Object[][] getNumberLists() {
return new Object[][] {
new Object[] { null, "NA" },
new Object[] { Collections.EMPTY_LIST, "c()" },
new Object[] { Arrays.asList(1, 2, 3), "c(1,2,3)" },
new Object[] { Arrays.asList(1D, 2D, 3D), "c(1.0,2.0,3.0)" }
};
}
@Test(dataProvider = "numberLists")
public void testToNumberList(List<? extends Number> actual, String expected) {
Assert.assertEquals(RUtils.toNumberList(actual), expected);
}
}

View File

@ -130,7 +130,7 @@ public class ActivityProfileUnitTest extends BaseTest {
Assert.assertEquals(profile.size(), cfg.probs.size());
Assert.assertEquals(profile.isActiveList, cfg.probs);
assertRegionsAreEqual(profile.createActiveRegions(0), cfg.expectedRegions);
assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions);
}
private void assertRegionsAreEqual(List<ActiveRegion> actual, List<ActiveRegion> expected) {

View File

@ -28,17 +28,14 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.ParsingEngine;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.*;
/**
* Tests selected functionality in the CommandLineExecutable class
@ -74,6 +71,76 @@ public class ListFileUtilsUnitTest extends BaseTest {
performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking);
}
@Test
public void testUnpackSet() throws Exception {
Set<String> expected = new HashSet<String>(Arrays.asList("public/testdata/exampleBAM.bam"));
Set<String> actual;
actual = ListFileUtils.unpackSet(Arrays.asList("public/testdata/exampleBAM.bam"));
Assert.assertEquals(actual, expected);
File tempListFile = createTempListFile("testUnpackSet",
"#",
"public/testdata/exampleBAM.bam",
"#public/testdata/foo.bam",
" # public/testdata/bar.bam"
);
actual = ListFileUtils.unpackSet(Arrays.asList(tempListFile.getAbsolutePath()));
Assert.assertEquals(actual, expected);
}
@DataProvider(name="includeMatchingTests")
public Object[][] getIncludeMatchingTests() {
return new Object[][] {
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a"), true, asSet("a") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a"), false, asSet("a", "ab", "abc") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("b"), true, Collections.EMPTY_SET },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("b"), false, asSet("ab", "abc") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "b"), true, asSet("a") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "b"), false, asSet("a", "ab", "abc") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "ab"), true, asSet("a", "ab") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "ab"), false, asSet("a", "ab", "abc") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*b.*"), true, Collections.EMPTY_SET },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*b.*"), false, asSet("ab", "abc") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*"), true, Collections.EMPTY_SET },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*"), false, asSet("a", "ab", "abc") }
};
}
@Test(dataProvider = "includeMatchingTests")
public void testIncludeMatching(Set<String> values, Collection<String> filters, boolean exactMatch, Set<String> expected) {
Set<String> actual = ListFileUtils.includeMatching(values, ListFileUtils.IDENTITY_STRING_CONVERTER, filters, exactMatch);
Assert.assertEquals(actual, expected);
}
@DataProvider(name="excludeMatchingTests")
public Object[][] getExcludeMatchingTests() {
return new Object[][] {
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a"), true, asSet("ab", "abc") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a"), false, Collections.EMPTY_SET },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("b"), true, asSet("a", "ab", "abc") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("b"), false, asSet("a") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "b"), true, asSet("ab", "abc") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "b"), false, Collections.EMPTY_SET },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "ab"), true, asSet("abc") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList("a", "ab"), false, Collections.EMPTY_SET },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*b.*"), true, asSet("a", "ab", "abc") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*b.*"), false, asSet("a") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*"), true, asSet("a", "ab", "abc") },
new Object[] { asSet("a", "ab", "abc"), Arrays.asList(".*"), false, Collections.EMPTY_SET }
};
}
@Test(dataProvider = "excludeMatchingTests")
public void testExcludeMatching(Set<String> values, Collection<String> filters, boolean exactMatch, Set<String> expected) {
Set<String> actual = ListFileUtils.excludeMatching(values, ListFileUtils.IDENTITY_STRING_CONVERTER, filters, exactMatch);
Assert.assertEquals(actual, expected);
}
private static <T> Set<T> asSet(T... args){
return new HashSet<T>(Arrays.asList(args));
}
private File createTempListFile( String tempFilePrefix, String... lines ) throws Exception {
File tempListFile = File.createTempFile(tempFilePrefix, ".list");
tempListFile.deleteOnExit();

View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.qscripts.examples
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk._
/**
* Script used for testing output to /dev/null
*/
class ExampleReadFilter extends QScript {
@Input(doc="The reference file for the bam files.", shortName="R")
var referenceFile: File = _
@Input(doc="Bam file to genotype.", shortName="I")
var bamFile: File = _
def script() {
val genotyper = new UnifiedGenotyper with BadMate
genotyper.reference_sequence = referenceFile
genotyper.memoryLimit = 2
genotyper.input_file :+= bamFile
add(genotyper)
}
}

View File

@ -49,7 +49,6 @@ case class GATKIntervals(reference: File, intervals: Seq[String]) {
else
IntervalUtils.parseIntervalArguments(parser, intervals)
Collections.sort(parsedLocs)
Collections.unmodifiableList(parsedLocs)
val mergedLocs = IntervalUtils.mergeIntervalLocations(parsedLocs, IntervalMergingRule.OVERLAPPING_ONLY)
Collections.unmodifiableList(mergedLocs)
}

View File

@ -32,6 +32,8 @@ import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor
* Merges a vcf text file.
*/
class VcfGatherFunction extends CombineVariants with GatherFunction {
this.assumeIdenticalSamples = true
this.suppressCommandLineHeader = true
private lazy val originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
@ -43,7 +45,6 @@ class VcfGatherFunction extends CombineVariants with GatherFunction {
this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) }
this.out = this.originalOutput
this.assumeIdenticalSamples = true
// NO_HEADER and sites_only from VCFWriterArgumentTypeDescriptor
// are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK

View File

@ -136,7 +136,7 @@ object PipelineTest extends BaseTest with Logging {
println(" value (min,target,max) table key metric")
for (validation <- evalSpec.validations) {
val table = report.getTable(validation.table)
val key = table.getPrimaryKeyByData(validation.key)
val key = table.getPrimaryKeyByData(validation.table +: validation.key.split('.') : _*)
val value = String.valueOf(table.get(key, validation.metric))
val inRange = if (value == null) false else validation.inRange(value)
val flag = if (!inRange) "*" else " "

View File

@ -0,0 +1,90 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.pipeline.examples
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.testng.annotations.Test
import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
import org.broadinstitute.sting.BaseTest
class ExampleReadFilterPipelineTest {
@Test
def testExampleReadFilter() {
val spec = new PipelineTestSpec
spec.name = "examplereadfilter"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleReadFilter.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -I " + BaseTest.testDir + "exampleBAM.bam").mkString
PipelineTest.executeTest(spec)
}
}