From 44d0cb6cde447e3a78bd9a394073ae3b09447cc0 Mon Sep 17 00:00:00 2001 From: depristo Date: Sat, 13 Nov 2010 16:19:56 +0000 Subject: [PATCH] New version of cutting routines for VQSR. Old code removed. Working unit tests. Best practice with testng integration test (everyone look at it). Walker test now allows you to not specify no. input files, if it can infer input counts from MD5s git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4664 348d0f76-0448-11de-a6fe-93d51630548a --- R/plot_Tranches.R | 33 +-- .../ApplyVariantCuts.java | 9 +- .../walkers/variantrecalibration/Tranche.java | 99 +++++++-- .../variantrecalibration/VariantDatum.java | 1 + .../VariantGaussianMixtureModel.java | 170 ++------------ .../VariantRecalibrator.java | 42 ++-- .../org/broadinstitute/sting/WalkerTest.java | 14 +- .../VariantEvalIntegrationTest.java | 3 +- .../VariantGaussianMixtureModelUnitTest.java | 116 +++++++--- ...ntRecalibrationWalkersIntegrationTest.java | 209 ++++++++---------- 10 files changed, 327 insertions(+), 369 deletions(-) diff --git a/R/plot_Tranches.R b/R/plot_Tranches.R index 343d48e28..858d2ddf3 100755 --- a/R/plot_Tranches.R +++ b/R/plot_Tranches.R @@ -3,7 +3,7 @@ args <- commandArgs(TRUE) verbose = TRUE -input = args[1] +tranchesFile = args[1] targetTITV = as.numeric(args[2]) suppressLegend = ! is.na(args[3]) @@ -37,12 +37,15 @@ leftShift <- function(x, leftValue = 0) { # ----------------------------------------------------------------------------------------------- # Tranches plot # ----------------------------------------------------------------------------------------------- -data2 = read.table(paste(input,".tranches",sep=""),sep=",",head=T) +data2 = read.table(tranchesFile,sep=",",head=T) +data2 = data2[order(data2$FDRtranche, decreasing=T),] cols = c("cornflowerblue", "cornflowerblue", "darkorange", "darkorange") density=c(20, -1, -1, 20) -outfile = paste(input, ".FDRtranches.pdf", sep="") +outfile = paste(tranchesFile, ".pdf", sep="") pdf(outfile, height=5, width=8) -alpha = 1 - titvFPEstV(targetTITV, data2$novelTITV) +par(mar = c(5, 5, 4, 2) + 0.1) +novelTiTv = c(data2$novelTITV,data2$novelTiTv) +alpha = 1 - titvFPEstV(targetTITV, novelTiTv) #print(alpha) numGood = round(alpha * data2$numNovel); @@ -61,26 +64,10 @@ barplot(d/1000,horiz=TRUE,col=cols,space=0.2,xlab="Number of Novel Variants (100 #abline(v= d[2,dim(d)[2]], lty=2) #abline(v= d[1,3], lty=2) if ( ! suppressLegend ) - legend(5, 2.25, c('Cumulative TPs','Tranch-specific TPs', 'Tranch-specific FPs', 'Cumulative FPs' ), fill=cols, density=density, bg='white', cex=1.25) + legend("topright", c('Cumulative TPs','Tranch-specific TPs', 'Tranch-specific FPs', 'Cumulative FPs' ), fill=cols, density=density, bg='white', cex=1.25) mtext("Ti/Tv",2,line=2.25,at=length(data2$FDRtranche)*1.2,las=1, cex=1) mtext("FDR",2,line=0,at=length(data2$FDRtranche)*1.2,las=1, cex=1) -axis(2,line=-1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$FDRtranche, las=1, cex.axis=1.25) -axis(2,line=1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$novelTITV, las=1, cex.axis=1.25) +axis(2,line=-1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$FDRtranche, las=1, cex.axis=1.0) +axis(2,line=1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=round(novelTiTv,3), las=1, cex.axis=1.0) dev.off() - - -# -#data2 = read.table(paste(input,".tranches",sep=""),sep=",",head=T) -#cols = c("steelblue","orange") -#outfile = paste(input, ".FDRtranches.pdf", sep="") -#pdf(outfile, height=7, width=8) -#alpha = (data2$novelTITV - 0.5) / (targetTITV - 0.5); -#numGood = round(alpha * data2$numNovel); -#numBad = data2$numNovel - numGood; -#d=matrix(c(numGood,numBad),2,byrow=TRUE) -#barplot(d,horiz=TRUE,col=cols,space=0.2,xlab="Number of Novel Variants",ylab="Novel Ti/Tv --> FDR (%)") -#legend('topright',c('implied TP','implied FP'),col=cols,lty=1,lwd=16) -#axis(2,line=-1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$FDRtranche) -#axis(2,line=0.4,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$novelTITV) -#dev.off() diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java index eab6b76b9..d4e0153d6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.vcf.VCFUtils; import java.io.File; @@ -86,9 +87,9 @@ public class ApplyVariantCuts extends RodWalker { tranches.add(t); //statusMsg = "Keeping, above FDR threshold"; } - logger.info(String.format("Tranche %s with %.2f FDR, TsTv %.2f and pCut %.2f, threshold %.2f", - t.name, t.fdr, t.novelTiTv, t.pCut, FDR_FILTER_LEVEL)); + logger.info(String.format("Read tranche " + t)); } + Collections.reverse(tranches); // this algorithm wants the tranches ordered from worst to best // setup the header fields final Set hInfo = new HashSet(); @@ -104,8 +105,12 @@ public class ApplyVariantCuts extends RodWalker { } if( tranches.size() >= 1 ) { hInfo.add(new VCFFilterHeaderLine(tranches.get(0).name + "+", String.format("FDR tranche level at qual < " + tranches.get(0).pCut))); + } else { + throw new UserException("No tranches were found in the file or were above the FDR Filter level " + FDR_FILTER_LEVEL); } + logger.info("Keeping all variants in tranche " + tranches.get(tranches.size()-1)); + final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java index 63a7faafb..b4ffbfeff 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java @@ -36,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.text.XReadLines; @@ -45,39 +46,69 @@ import java.util.*; /** */ -public class Tranche { - public double fdr, pCut, knownTiTv, novelTiTv; +public class Tranche implements Comparable { + private static final int CURRENT_VERSION = 2; + + public double fdr, pCut, targetTiTv, knownTiTv, novelTiTv; public int numKnown,numNovel; public String name; - public Tranche(double fdr, double pCut, int numKnown, double knownTiTv, int numNovel, double novelTiTv) { - this(fdr, pCut, numKnown, knownTiTv, numNovel, novelTiTv, "anonymous"); + public Tranche(double fdr, double targetTiTv, double pCut, int numKnown, double knownTiTv, int numNovel, double novelTiTv) { + this(fdr, targetTiTv, pCut, numKnown, knownTiTv, numNovel, novelTiTv, "anonymous"); } - public Tranche(double fdr, double pCut, int numKnown, double knownTiTv, int numNovel, double novelTiTv, String name) { + public Tranche(double fdr, double targetTiTv, double pCut, int numKnown, double knownTiTv, int numNovel, double novelTiTv, String name) { this.fdr = fdr; + this.targetTiTv = targetTiTv; this.pCut = pCut; this.novelTiTv = novelTiTv; this.numNovel = numNovel; this.knownTiTv = knownTiTv; this.numKnown = numKnown; this.name = name; + +// if ( targetTiTv < 0.5 || targetTiTv > 10 ) +// throw new UserException("Target Ti/Tv ratio is unreasonable " + targetTiTv); +// +// if ( numKnown < 0 || numNovel < 0) +// throw new ReviewedStingException("Invalid tranch - no. variants is < 0 : known " + numKnown + " novel " + numNovel); + + if ( name == null ) + throw new ReviewedStingException("BUG -- name cannot be null"); + + } + + public int compareTo(Tranche other) { + return Double.compare(this.fdr, other.fdr); } public String toString() { - return String.format("[Tranche cut = %.3f with %d novels @ %.2f]", pCut, numNovel, novelTiTv); + return String.format("Tranche fdr=%.2f minQual=%.2f known=(%d @ %.2f) novel=(%d @ %.2f) name=%s]", + fdr, pCut, numKnown, knownTiTv, numNovel, novelTiTv, name); } - public static String tranchesString(List tranches) { + /** + * Returns an appropriately formated string representing the raw tranches file on disk. + * + * @param rawTranches + * @return + */ + public static String tranchesString(List rawTranches) { ByteArrayOutputStream bytes = new ByteArrayOutputStream(); PrintStream stream = new PrintStream(bytes); - stream.println("FDRtranche,numKnown,numNovel,knownTiTv,novelTiTv,pCut,filterName"); + List tranches = new ArrayList(); + tranches.addAll(rawTranches); + Collections.sort(tranches); + + stream.println("# Variant quality score tranches file"); + stream.println("# Version number " + CURRENT_VERSION); + stream.println("FDRtranche,targetTiTv,numKnown,numNovel,knownTiTv,novelTiTv,pCut,filterName"); Tranche prev = null; for ( Tranche t : tranches ) { - stream.printf("%.2f,%d,%d,%.4f,%.4f,%.4f,FDRtranche%.2fto%.2f%n", - t.fdr,t.numKnown,t.numNovel,t.knownTiTv,t.novelTiTv, t.pCut, + stream.printf("%.2f,%.2f,%d,%d,%.4f,%.4f,%.2f,FDRtranche%.2fto%.2f%n", + t.fdr,t.targetTiTv,t.numKnown,t.numNovel,t.knownTiTv,t.novelTiTv, t.pCut, (prev == null ? 0.0 : prev.fdr), t.fdr); prev = t; } @@ -85,39 +116,65 @@ public class Tranche { return bytes.toString(); } - private static double getDouble(Map bindings, String key) { - return bindings.containsKey(key) ? Double.valueOf(bindings.get(key)) : -1.0; + private static double getDouble(Map bindings, String key, boolean required) { + if ( bindings.containsKey(key) ) + return Double.valueOf(bindings.get(key)); + else if ( required ) { + throw new UserException("Malformed tranches file. Missing required key " + key); + } + else + return -1; } - private static int getInteger(Map bindings, String key) { - return bindings.containsKey(key) ? Integer.valueOf(bindings.get(key)) : -1; + private static int getInteger(Map bindings, String key, boolean required) { + if ( bindings.containsKey(key) ) + return Integer.valueOf(bindings.get(key)); + else if ( required ) { + throw new UserException("Malformed tranches file. Missing required key " + key); + } + else + return -1; } + /** + * Returns a list of tranches, sorted from most to least specific, read in from file f + * + * @param f + * @return + */ public static List readTraches(File f) { String[] header = null; List tranches = new ArrayList(); try { for( final String line : new XReadLines(f) ) { + if ( line.startsWith("#") ) + continue; + final String[] vals = line.split(","); if( header == null ) { header = vals; } else { + if ( header.length != vals.length ) + throw new UserException.MalformedFile(f, "Line had too few/many fields. Header = " + header.length + " vals " + vals.length + " line " + line); + Map bindings = new HashMap(); for ( int i = 0; i < vals.length; i++ ) bindings.put(header[i], vals[i]); - tranches.add(new Tranche(getDouble(bindings,"FDRtranche"), - getDouble(bindings,"pCut"), - getInteger(bindings,"numKnown"), - getDouble(bindings,"knownTiTv"), - getInteger(bindings,"numNovel"), - Math.max(getDouble(bindings,"novelTiTv"), getDouble(bindings,"novelTITV")), + tranches.add(new Tranche(getDouble(bindings,"FDRtranche", true), + getDouble(bindings,"targetTiTv", false), + getDouble(bindings,"pCut", true), + getInteger(bindings,"numKnown", false), + getDouble(bindings,"knownTiTv", false), + getInteger(bindings,"numNovel", true), + Math.max(getDouble(bindings,"novelTiTv", false), getDouble(bindings,"novelTITV", false)), bindings.get("filterName"))); } } + Collections.sort(tranches); // sort this in the standard order return tranches; } catch( FileNotFoundException e ) { - throw new UserException.CouldNotCreateOutputFile(f, e); + throw new UserException.CouldNotReadInputFile(f, e); } } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java index d8f283093..dc2ba54da 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java @@ -32,6 +32,7 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; */ public class VariantDatum implements Comparable { + public int pos; public double[] annotations; public boolean isTransition; public boolean isKnown; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java index e80aae9d3..67376edca 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java @@ -474,6 +474,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel List tranches = new ArrayList(); for ( double fdr : FDRtranches ) { Tranche t = findTranche(tranchesData, runningTiTv, fdr, targetTiTv); + // todo -- should abort early when t's qual is 0 -- that's the lowest we'll get to if ( t == null ) { if ( tranches.size() == 0 ) @@ -514,7 +515,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel for ( int i = 0; i < runningTiTv.length; i++ ) { if ( runningTiTv[i] >= titvThreshold ) { // we've found the largest group of variants with Ti/Tv >= our target titv - return trancheOfVariants(data, i, desiredFDR); + return trancheOfVariants(data, i, desiredFDR, targetTiTv); } } @@ -522,173 +523,36 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel return null; } - public final static Tranche trancheOfVariants( final List data, int minI, double fdr ) { + public final static Tranche trancheOfVariants( final List data, int minI, double fdr, double targetTiTv ) { int numKnown = 0, numNovel = 0, knownTi = 0, knownTv = 0, novelTi = 0, novelTv = 0; - for ( int i = minI; i < data.size(); i++ ) { - VariantDatum datum = data.get(i); - if ( datum.isKnown ) { - numKnown++; - if ( datum.isTransition ) { knownTi++; } else { knownTv++; } - } else { - numNovel++; - if ( datum.isTransition ) { novelTi++; } else { novelTv++; } + double qualThreshold = data.get(minI).qual; + VariantDatum last = null; + for ( VariantDatum datum : data ) { + if ( datum.qual >= qualThreshold ) { + //if( ! datum.isKnown ) System.out.println(datum.pos); + if ( datum.isKnown ) { + numKnown++; + if ( datum.isTransition ) { knownTi++; } else { knownTv++; } + } else { + numNovel++; + if ( datum.isTransition ) { novelTi++; } else { novelTv++; } + } } + last = datum; } double knownTiTv = knownTi / Math.max(1.0 * knownTv, 1.0); double novelTiTv = novelTi / Math.max(1.0 * novelTv, 1.0); - return new Tranche(fdr, data.get(minI).qual, numKnown, knownTiTv, numNovel, novelTiTv); + return new Tranche(fdr, targetTiTv, qualThreshold, numKnown, knownTiTv, numNovel, novelTiTv); } public final static double fdrToTiTv(double desiredFDR, double targetTiTv) { return (1.0 - desiredFDR / 100.0) * (targetTiTv - 0.5) + 0.5; } - public final void outputOptimizationCurve( final VariantDatum[] data, final PrintStream outputReportDatFile, final PrintStream tranchesOutputFile, - final int desiredNumVariants, final Double[] FDRtranches, final double MAX_QUAL ) { - final int numVariants = data.length; - final boolean[] markedVariant = new boolean[numVariants]; - - int NUM_BINS = 400 * 2; - double QUAL_STEP1 = (MAX_QUAL * 9.0 / 10.0 ) / ((double) NUM_BINS / 2.0); - if( QUAL_STEP1 < 0.01 ) { QUAL_STEP1 = 0.01; } // QUAL field in VCF file is rounded to two decimal places - double QUAL_STEP2 = (MAX_QUAL * 1.0 / 10.0) / ((double) NUM_BINS / 2.0); - if( QUAL_STEP2 < 0.01 ) { QUAL_STEP2 = 0.01; } // QUAL field in VCF file is rounded to two decimal places - - final int numKnownAtCut[] = new int[NUM_BINS+2]; - final int numNovelAtCut[] = new int[NUM_BINS+2]; - final double knownTiTvAtCut[] = new double[NUM_BINS+2]; - final double novelTiTvAtCut[] = new double[NUM_BINS+2]; - final double theCut[] = new double[NUM_BINS+2]; - - final double fdrCutAsTiTv[] = new double[FDRtranches.length]; - for( int iii = 0; iii < FDRtranches.length; iii++ ) { - fdrCutAsTiTv[iii] = (1.0 - FDRtranches[iii] / 100.0) * (targetTITV - 0.5) + 0.5; - } - - for( int iii = 0; iii < numVariants; iii++ ) { - markedVariant[iii] = false; - } - - tranchesOutputFile.println("FDRtranche,novelTITV,pCut,numNovel,filterName"); - - int numKnown = 0; - int numNovel = 0; - int numKnownTi = 0; - int numKnownTv = 0; - int numNovelTi = 0; - int numNovelTv = 0; - boolean foundDesiredNumVariants = false; - int jjj = 0; - outputReportDatFile.println("pCut,numKnown,numNovel,knownTITV,novelTITV"); - double qCut = MAX_QUAL; - - while( qCut >= 0.0 - 1E-2 + 1E-8 ) { - if( qCut < 1E-2 ) { qCut = 0.0; } - - for( int iii = 0; iii < numVariants; iii++ ) { - if( !markedVariant[iii] ) { - if( data[iii].qual >= qCut ) { - markedVariant[iii] = true; - if( data[iii].isKnown ) { // known - numKnown++; - if( data[iii].isTransition ) { // transition - numKnownTi++; - } else { // transversion - numKnownTv++; - } - } else { // novel - numNovel++; - if( data[iii].isTransition ) { // transition - numNovelTi++; - } else { // transversion - numNovelTv++; - } - } - } - } - } - if( desiredNumVariants != 0 && !foundDesiredNumVariants && (numKnown + numNovel) >= desiredNumVariants ) { - logger.info( "Keeping variants with QUAL >= " + String.format("%.2f",qCut) + " results in a filtered set with: " ); - logger.info("\t" + numKnown + " known variants"); - logger.info("\t" + numNovel + " novel variants, (dbSNP rate = " + String.format("%.2f",((double) numKnown * 100.0) / ((double) numKnown + numNovel) ) + "%)"); - logger.info("\t" + String.format("%.4f known Ti/Tv ratio", ((double)numKnownTi) / ((double)numKnownTv))); - logger.info("\t" + String.format("%.4f novel Ti/Tv ratio", ((double)numNovelTi) / ((double)numNovelTv))); - foundDesiredNumVariants = true; - } - outputReportDatFile.println( String.format("%.2f,%d,%d,%.4f,%.4f", qCut, numKnown, numNovel, - ( numKnownTv == 0 ? 0.0 : ( ((double)numKnownTi) / ((double)numKnownTv) ) ), - ( numNovelTv == 0 ? 0.0 : ( ((double)numNovelTi) / ((double)numNovelTv) ) ))); - - numKnownAtCut[jjj] = numKnown; - numNovelAtCut[jjj] = numNovel; - knownTiTvAtCut[jjj] = ( numKnownTi == 0 || numKnownTv == 0 ? 0.0 : ( ((double)numKnownTi) / ((double)numKnownTv) ) ); - novelTiTvAtCut[jjj] = ( numNovelTi == 0 || numNovelTv == 0 ? 0.0 : ( ((double)numNovelTi) / ((double)numNovelTv) ) ); - theCut[jjj] = qCut; - jjj++; - - if( qCut >= (MAX_QUAL / 10.0) ) { - qCut -= QUAL_STEP1; - } else { - qCut -= QUAL_STEP2; - } - } - - // loop back through the data points looking for appropriate places to cut the data to get the target novel titv ratio - int checkQuantile = 0; - int tranche = FDRtranches.length - 1; - for( ; jjj >= 0; jjj-- ) { - - if( tranche >= 0 && novelTiTvAtCut[jjj] >= fdrCutAsTiTv[tranche] ) { - tranchesOutputFile.println(String.format("%.2f,%.4f,%.4f,%d,FDRtranche%.2fto%.2f", - FDRtranches[tranche],novelTiTvAtCut[jjj],theCut[jjj],numNovelAtCut[jjj], - (tranche == 0 ? 0.0 : FDRtranches[tranche-1]) ,FDRtranches[tranche])); - tranche--; - } - - boolean foundCut = false; - if( checkQuantile == 0 ) { - if( novelTiTvAtCut[jjj] >= 0.9 * targetTITV ) { - foundCut = true; - checkQuantile++; - } - } else if( checkQuantile == 1 ) { - if( novelTiTvAtCut[jjj] >= 0.95 * targetTITV ) { - foundCut = true; - checkQuantile++; - } - } else if( checkQuantile == 2 ) { - if( novelTiTvAtCut[jjj] >= 0.98 * targetTITV ) { - foundCut = true; - checkQuantile++; - } - } else if( checkQuantile == 3 ) { - if( novelTiTvAtCut[jjj] >= targetTITV ) { - foundCut = true; - checkQuantile++; - } - } else if( checkQuantile == 4 ) { - break; // break out - } - - if( foundCut ) { - logger.info( "Keeping variants with QUAL >= " + String.format("%.2f",theCut[jjj]) + " results in a filtered set with: " ); - logger.info("\t" + numKnownAtCut[jjj] + " known variants"); - logger.info("\t" + numNovelAtCut[jjj] + " novel variants, (dbSNP rate = " + - String.format("%.2f",((double) numKnownAtCut[jjj] * 100.0) / ((double) numKnownAtCut[jjj] + numNovelAtCut[jjj]) ) + "%)"); - logger.info("\t" + String.format("%.4f known Ti/Tv ratio", knownTiTvAtCut[jjj])); - logger.info("\t" + String.format("%.4f novel Ti/Tv ratio", novelTiTvAtCut[jjj])); - logger.info("\t" + String.format("--> with an implied novel FDR of %.2f percent", Math.abs(100.0 * (1.0-((novelTiTvAtCut[jjj] - 0.5) / (targetTITV - 0.5)))))); - } - } - if( tranche >= 0 ) { // Didn't find all the tranches - throw new UserException.BadInput("Couldn't find appropriate cuts for all the requested tranches. Please ask for fewer tranches with higher false discovery rates using the --FDRtranche argument"); - } - } - private double evaluateGaussians( final VariantDatum[] data, final double[][] pVarInCluster, final int startCluster, final int stopCluster ) { final int numAnnotations = data[0].annotations.length; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index b20a1eacc..defe1ceb4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -68,9 +68,7 @@ public class VariantRecalibrator extends RodWalker maxQualObserved ) { maxQualObserved = variantDatum.qual; } @@ -329,25 +333,17 @@ public class VariantRecalibrator extends RodWalker tranches = VariantGaussianMixtureModel.findTranches( dataManager.data, FDR_TRANCHES, TARGET_TITV ); + tranchesStream.print(Tranche.tranchesString(tranches)); // Execute Rscript command to plot the optimization curve // Print out the command line to make it clear to the user what is being executed and how one might modify it - final String rScriptOptimizationCurveCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_OptimizationCurve.R" + " " + REPORT_DAT_FILE.getName() + " " + TARGET_TITV; - final String rScriptTranchesCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Tranches.R" + " " + REPORT_DAT_FILE.getName() + " " + TARGET_TITV; - logger.info( rScriptOptimizationCurveCommandLine ); + final String rScriptTranchesCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Tranches.R" + " " + TRANCHES_FILE.getAbsolutePath() + " " + TARGET_TITV; logger.info( rScriptTranchesCommandLine ); // Execute the RScript command to plot the table of truth values try { Process p; - p = Runtime.getRuntime().exec( rScriptOptimizationCurveCommandLine ); - p.waitFor(); p = Runtime.getRuntime().exec( rScriptTranchesCommandLine ); p.waitFor(); } catch ( Exception e ) { diff --git a/java/test/org/broadinstitute/sting/WalkerTest.java b/java/test/org/broadinstitute/sting/WalkerTest.java index 22a5180d5..9349fe0b4 100755 --- a/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/java/test/org/broadinstitute/sting/WalkerTest.java @@ -71,7 +71,7 @@ public class WalkerTest extends BaseTest { } } - private static File getFileForMD5(final String md5) { + protected static File getFileForMD5(final String md5) { final String basename = String.format("%s.integrationtest", md5); return new File(MD5_FILE_DB_SUBDIR + "/" + basename); } @@ -208,15 +208,23 @@ public class WalkerTest extends BaseTest { protected Map auxillaryFiles = new HashMap(); + public WalkerTestSpec(String args, List md5s) { + this(args, -1, md5s); + } + public WalkerTestSpec(String args, int nOutputFiles, List md5s) { this.args = args; - this.nOutputFiles = nOutputFiles; + this.nOutputFiles = md5s.size(); this.md5s = md5s; } + public WalkerTestSpec(String args, List exts, List md5s) { + this(args, -1, exts, md5s); + } + public WalkerTestSpec(String args, int nOutputFiles, List exts, List md5s) { this.args = args; - this.nOutputFiles = nOutputFiles; + this.nOutputFiles = md5s.size(); this.md5s = md5s; this.exts = exts; } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 3b39767b1..9f84a3e65 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -7,8 +7,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.Map; -public class - VariantEvalIntegrationTest extends WalkerTest { +public class VariantEvalIntegrationTest extends WalkerTest { private static String cmdRoot = "-T VariantEval" + " -R " + b36KGReference; diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModelUnitTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModelUnitTest.java index 075af3006..b0933735c 100644 --- a/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModelUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModelUnitTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 The Broad Institute + * Copyright (c) 2010, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -12,15 +12,14 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; @@ -34,6 +33,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.BaseTest; import org.testng.annotations.Test; @@ -58,35 +58,93 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest { private static int N_VARIANTS = 100; VariantDatum[] variantData1 = new VariantDatum[N_VARIANTS]; - @BeforeTest(enabled=false) - public void beforeTest() { - for ( int i = 0; i < N_VARIANTS; i++ ) { - variantData1[i].isKnown = i % 2 == 0; // every other is know - variantData1[i].qual = (N_VARIANTS - i) * 1.0; + private final File QUAL_DATA = new File(testDir + "tranches.raw.dat"); + private final double[] FDRS = new double[]{0.1, 1, 10, 100}; + private final double TARGET_TITV = 2.8; + private final File EXPECTED_TRANCHES_NEW = new File(testDir + "tranches.6.txt"); + private final File EXPECTED_TRANCHES_OLD = new File(testDir + "tranches.4.txt"); + + private List readData() { + List vd = new ArrayList(); + try { + for ( String line : new XReadLines(QUAL_DATA, true) ) { + String[] parts = line.split(" "); + if ( ! parts[0].equals("QUAL") ) { + VariantDatum datum = new VariantDatum(); + datum.qual = Double.valueOf(parts[0]); + datum.isTransition = parts[1].equals("1"); + datum.isKnown = parts[2].equals("1"); + vd.add(datum); + } + } + } catch (FileNotFoundException e) { + throw new StingException("foo", e); } - // first 25 are tv, 25-75 are 50/50, and 75+ are all transitions - int i = 0; - for ( ; i < (N_VARIANTS * 0.25); i++ ) { variantData1[i].isTransition = true; } - for ( ; i < (N_VARIANTS * 0.75); i++ ) { variantData1[i].isTransition = i % 2 == 0; } - for ( ; i < N_VARIANTS; i++ ) { variantData1[i].isTransition = false; } + return vd; } - @Test(enabled=false) + @Test(expectedExceptions = {UserException.MalformedFile.class}) + public final void readBadFormat() { + Tranche.readTraches(QUAL_DATA); + } + + @Test + public final void readNewFormat() { + read(EXPECTED_TRANCHES_NEW); + } + + @Test + public final void readOldFormat() { + read(EXPECTED_TRANCHES_OLD); + } + + public final List read(File f) { + return Tranche.readTraches(f); + } + + @Test + public final void testNewAndOldAretheSame() { + List newFormat = read(EXPECTED_TRANCHES_NEW); + List oldFormat = read(EXPECTED_TRANCHES_OLD); + assertTranchesAreTheSame(newFormat, oldFormat, false, true); + } + + private static void assertTranchesAreTheSame(List newFormat, List oldFormat, boolean completeP, boolean includeName) { + Assert.assertEquals(oldFormat.size(), newFormat.size()); + for ( int i = 0; i < newFormat.size(); i++ ) { + Tranche n = newFormat.get(i); + Tranche o = oldFormat.get(i); + Assert.assertEquals(n.fdr, o.fdr, 1e-3); + Assert.assertEquals(n.numNovel, o.numNovel); + Assert.assertEquals(n.novelTiTv, o.novelTiTv, 1e-3); + if ( includeName ) + Assert.assertEquals(n.name, o.name); + if ( completeP ) { + Assert.assertEquals(n.numKnown, o.numKnown); + Assert.assertEquals(n.knownTiTv, o.knownTiTv, 1e-3); + Assert.assertEquals(n.targetTiTv, o.targetTiTv, 1e-3); + } + } + } + + @Test public final void testFindTranches1() { - List tranches = VariantGaussianMixtureModel.findTranches(variantData1, new double[]{0.1, 20}, 2.0); - Assert.assertEquals( tranches.size(), 2 ); - - Tranche t1 = tranches.get(0); - Assert.assertEquals( t1.fdr, 0.1 ); - Assert.assertEquals( t1.pCut, 26 ); - Assert.assertEquals( t1.numKnown, 37 ); - Assert.assertEquals( t1.numNovel, 37 ); - - Tranche t2 = tranches.get(1); - Assert.assertEquals( t2.fdr, 20 ); - Assert.assertEquals( t2.pCut, 21 ); - Assert.assertEquals( t2.numKnown, 37 ); - Assert.assertEquals( t2.numNovel, 37 ); + List vd = readData(); + List tranches = VariantGaussianMixtureModel.findTranches(vd.toArray(new VariantDatum[0]), FDRS, TARGET_TITV); + System.out.printf(Tranche.tranchesString(tranches)); + assertTranchesAreTheSame(read(EXPECTED_TRANCHES_NEW), tranches, true, false); } + +// @Test(expectedExceptions = {UserException.class}) +// public final void testBadFDR() { +// List vd = readData(); +// VariantGaussianMixtureModel.findTranches(vd.toArray(new VariantDatum[0]), new double[]{-1}, TARGET_TITV); +// } +// +// @Test(expectedExceptions = {UserException.class}) +// public final void testBadTargetTiTv() { +// List vd = readData(); +// VariantGaussianMixtureModel.findTranches(vd.toArray(new VariantDatum[0]), FDRS, 0.1); +// } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 8e2768aa3..52f6958ee 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.Test; +import org.testng.annotations.DataProvider; import java.util.*; import java.io.File; @@ -12,130 +13,112 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { static HashMap tranchesFiles = new HashMap(); static HashMap inputVCFFiles = new HashMap(); - @Test(enabled = true) - public void testGenerateVariantClusters() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "ab2629d67e378fd3aceb8318f0fbfe04" ); - e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf", "725489156426e4ddd8d623ab3d4b1023" ); - - for ( Map.Entry entry : e.entrySet() ) { - String vcf = entry.getKey(); - String md5 = entry.getValue(); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -NO_HEADER" + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + - " -B:hapmap,VCF " + comparisonDataLocation + "Validated/HapMap/3.2/genotypes_r27_nr.b36_fwd.vcf" + - " -weightDBSNP 0.2 -weightHapMap 1.0" + - " -T GenerateVariantClusters" + - " -B:input,VCF " + vcf + - " -L 1:50,000,000-200,000,000" + - " -qual 50.0" + - " --ignore_filter GATK_STANDARD" + - " -an QD -an HRun -an SB" + - " -clusterFile %s", - 1, // just one output file - Arrays.asList(md5)); - List result = executeTest("testGenerateVariantClusters", spec).getFirst(); - clusterFiles.put(vcf, result.get(0).getAbsolutePath()); + private static class VRTest { + String inVCF; + String clusterMD5; + String tranchesMD5; + String recalVCFMD5; + String cutVCFMD5; + public VRTest(String inVCF, String clusterMD5, String tranchesMD5, String recalVCFMD5, String cutVCFMD5) { + this.inVCF = validationDataLocation + inVCF; + this.clusterMD5 = clusterMD5; + this.tranchesMD5 = tranchesMD5; + this.recalVCFMD5 = recalVCFMD5; + this.cutVCFMD5 = cutVCFMD5; } } - @Test(enabled = true) - public void testVariantRecalibrator() { - HashMap> e = new HashMap>(); - e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", - Arrays.asList("4e893672230fca625f70b0491f3b36cb", "40b3fb6632304cebc56a9ed5853cc72e","0cfbeb9f2db7edc1d69b5b43ea17670c")); // Each test checks the md5 of three output files - e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf", - Arrays.asList("d52e4f511c9c00f8c21dffea81c47103", "780920fcfa3009c66ebcf18265edaa75","010023ddb95fac071e0f208e6bb40c61")); // Each test checks the md5 of three output files + VRTest yriTrio = new VRTest("yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", + "ab2629d67e378fd3aceb8318f0fbfe04", // in vcf + "4dd95e9d8e5d21a6ab73de67d9663492", // tranches + "4e893672230fca625f70b0491f3b36cb", // recalVCF + "371b0e2796982485ae050e46892f6660"); // cut VCF - for ( Map.Entry> entry : e.entrySet() ) { - String vcf = entry.getKey(); - List md5s = entry.getValue(); - String clusterFile = clusterFiles.get(vcf); - System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile); - if ( clusterFile != null ) { - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -NO_HEADER" + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + - " -B:hapmap,VCF " + comparisonDataLocation + "Validated/HapMap/3.2/genotypes_r27_nr.b36_fwd.vcf" + - " -T VariantRecalibrator" + - " -B:input,VCF " + vcf + - " -L 1:20,000,000-100,000,000" + - " --ignore_filter GATK_STANDARD" + - " --ignore_filter HARD_TO_VALIDATE" + - " -clusterFile " + clusterFile + - " -titv 2.07" + - " -o %s" + - " -tranchesFile %s" + - " -reportDatFile %s", - 3, // three output files - md5s); - List result = executeTest("testVariantRecalibrator", spec).getFirst(); - inputVCFFiles.put(vcf, result.get(0).getAbsolutePath()); - tranchesFiles.put(vcf, result.get(1).getAbsolutePath()); - } - } + VRTest lowPass = new VRTest("lowpass.N3.chr1.raw.vcf", + "725489156426e4ddd8d623ab3d4b1023", // in vcf + "dfc07132f592a811d0d6c25a4cb67a09", // tranches + "d52e4f511c9c00f8c21dffea81c47103", // recalVCF + "39716b2f03b50e88855d7975dd1b9b3e"); // cut VCF + + @DataProvider(name = "VRTest") + public Object[][] createData1() { + return new Object[][]{ {yriTrio}, {lowPass} }; } - @Test - public void testApplyVariantCuts() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "646805c4619a15d3f9f4976b96b89ecd" ); - e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf", "df9e37af16610ccd44d375eae2c4479c" ); + @Test(dataProvider = "VRTest", enabled = true) + public void testGenerateVariantClusters(VRTest params) { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b36KGReference + + " -NO_HEADER" + + " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + + " -B:hapmap,VCF " + comparisonDataLocation + "Validated/HapMap/3.2/genotypes_r27_nr.b36_fwd.vcf" + + " -weightDBSNP 0.2 -weightHapMap 1.0" + + " -T GenerateVariantClusters" + + " -B:input,VCF " + params.inVCF + + " -L 1:50,000,000-200,000,000" + + " -qual 50.0" + + " --ignore_filter GATK_STANDARD" + + " -an QD -an HRun -an SB" + + " -clusterFile %s", + Arrays.asList(params.clusterMD5)); + executeTest("testGenerateVariantClusters-"+params.inVCF, spec).getFirst(); + } - for ( Map.Entry entry : e.entrySet() ) { - String vcf = entry.getKey(); - String md5 = entry.getValue(); - String inputVCFFile = inputVCFFiles.get(vcf); - String tranchesFile = tranchesFiles.get(vcf); - System.out.printf("PARAMS FOR %s is %s%n", vcf, inputVCFFile); - System.out.printf("PARAMS FOR %s is %s%n", vcf, tranchesFile); - if ( inputVCFFile != null && tranchesFile != null ) { - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -NO_HEADER" + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + - " -T ApplyVariantCuts" + - " -L 1:20,000,000-100,000,000" + - " -B:input,VCF " + inputVCFFile + - " -o %s" + - " -tranchesFile " + tranchesFile, - 1, // just one output file - Arrays.asList(md5)); - executeTest("testApplyVariantCuts", spec); - } - } + @Test(dataProvider = "VRTest", enabled = true) + public void testVariantRecalibrator(VRTest params) { + //System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile); + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b36KGReference + + " -NO_HEADER" + + " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + + " -B:hapmap,VCF " + comparisonDataLocation + "Validated/HapMap/3.2/sites_r27_nr.b36_fwd.vcf" + + " -T VariantRecalibrator" + + " -B:input,VCF " + params.inVCF + + " -L 1:20,000,000-100,000,000" + + " --ignore_filter GATK_STANDARD" + + " --ignore_filter HARD_TO_VALIDATE" + + " -clusterFile " + getFileForMD5(params.clusterMD5) + + " -titv 2.07" + + " -o %s" + + " -tranchesFile %s", + Arrays.asList(params.recalVCFMD5, params.tranchesMD5)); + executeTest("testVariantRecalibrator-"+params.inVCF, spec).getFirst(); + } + + @Test(dataProvider = "VRTest", enabled = true) + public void testApplyVariantCuts(VRTest params) { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b36KGReference + + " -NO_HEADER" + + " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + + " -T ApplyVariantCuts" + + " -L 1:20,000,000-100,000,000" + + " -B:input,VCF " + getFileForMD5(params.recalVCFMD5) + + " -o %s" + + " -tranchesFile " + getFileForMD5(params.tranchesMD5), + Arrays.asList(params.cutVCFMD5)); + executeTest("testApplyVariantCuts-"+params.inVCF, spec); } - @Test + @Test() public void testFailWithBadAnnotation() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf", "" ); - - for ( Map.Entry entry : e.entrySet() ) { - String vcf = entry.getKey(); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -NO_HEADER" + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + - " -B:hapmap,VCF " + comparisonDataLocation + "Validated/HapMap/3.2/genotypes_r27_nr.b36_fwd.vcf" + - " -weightDBSNP 0.2 -weightHapMap 1.0" + - " -T GenerateVariantClusters" + - " -B:input,VCF " + vcf + - " -L 1:50,000,000-200,000,000" + - " -qual 50.0" + - " --ignore_filter GATK_STANDARD" + - " -an QD -an HRun -an ThisAnnotationIsBAD" + // There is a bad annotation here - " -clusterFile %s", - 1, // just one output file - UserException.MalformedFile.class); - executeTest("testFailWithBadAnnotation", spec); - } + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b36KGReference + + " -NO_HEADER" + + " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + + " -B:hapmap,VCF " + comparisonDataLocation + "Validated/HapMap/3.2/sites_r27_nr.b36_fwd.vcf" + + " -weightDBSNP 0.2 -weightHapMap 1.0" + + " -T GenerateVariantClusters" + + " -B:input,VCF " + lowPass.inVCF + + " -L 1:50,000,000-200,000,000" + + " -qual 50.0" + + " --ignore_filter GATK_STANDARD" + + " -an QD -an HRun -an ThisAnnotationIsBAD" + // There is a bad annotation here + " -clusterFile %s", + 1, // just one output file + UserException.MalformedFile.class); + executeTest("testFailWithBadAnnotation", spec); } }