Added a utility method to retrieve the contig lengths for WG chunking.

Added a rudimentary GATKReportParser for parsing VE3 results.
Re-enabled the FCPTest using VE3, the GATKRP, and the PicardAggregationUtils.
The tag type for .rod files is DBSNP, not ROD.
More explicit return types on implicit methods.
Added null checks for implicit string to/from file conversions.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5668 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2011-04-20 19:22:21 +00:00
parent 59dd79faab
commit 8619f49d20
15 changed files with 483 additions and 179 deletions

View File

@ -0,0 +1,83 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
public class GATKReportParser {
private List<GATKReportTableParser> tables = new ArrayList<GATKReportTableParser>();
public void parse(File file) throws IOException {
InputStream stream = FileUtils.openInputStream(file);
try {
parse(stream);
} finally {
IOUtils.closeQuietly(stream);
}
}
public void parse(InputStream input) throws IOException {
GATKReportTableParser table = null;
for (String line: new XReadLines(input)) {
if (line.startsWith("##:GATKReport.v0.1 ")) {
table = newTableParser(line);
tables.add(table);
table.parse(line);
} else if (table != null) {
if (line.trim().length() == 0)
table = null;
else
table.parse(line);
}
}
}
public String getValue(String tableName, String[] key, String column) {
for (GATKReportTableParser table: tables)
if (table.getTableName().equals(tableName))
return table.getValue(key, column);
return null;
}
public String getValue(String tableName, String key, String column) {
for (GATKReportTableParser table: tables)
if (table.getTableName().equals(tableName))
return table.getValue(key, column);
return null;
}
private GATKReportTableParser newTableParser(String header) {
return new GATKReportTableParser();
}
}

View File

@ -0,0 +1,75 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.lang.StringUtils;
import java.util.*;
public class GATKReportTableParser {
private int lineNum = 0;
private String[] descriptions;
private Map<String, Integer> headers = new HashMap<String, Integer>();
private List<String[]> values = new ArrayList<String[]>();
public void parse(String line) {
lineNum++;
switch (lineNum) {
case 1:
descriptions = parseLine(line);
case 2:
String[] columnHeaders = parseLine(line);
for (int i = 0; i < columnHeaders.length; i++)
headers.put(columnHeaders[i], i);
default:
values.add(parseLine(line));
}
}
public String getTableName() {
return descriptions[1];
}
public String getValue(String[] key, String column) {
if (!headers.containsKey(column))
return null;
for (String[] row: values)
if (Arrays.equals(key, Arrays.copyOfRange(row, 1, key.length + 1)))
return row[headers.get(column)];
return null;
}
public String getValue(String key, String column) {
return getValue(key.split("\\."), column);
}
private String generateKey(String[] row, int i) {
return StringUtils.join(row, ".", 0, i);
}
private String[] parseLine(String line) {
return line.split(" +");
}
}

View File

@ -110,16 +110,15 @@ public enum ReferenceData {
}
/**
* Returns the dbsnp type for the version, "VCF" or "ROD".
* Returns the dbsnp type for the version, "VCF" or "DBSNP".
* @param version version from getDbsnpVersions()
* @return the dbsnp type for the version, "VCF" or "ROD".
* @return the dbsnp type for the version, "VCF" or "DBSNP".
*/
public String getDbsnpType(int version) {
String dbsnp = getDbsnp(version);
if (dbsnp == null)
return null;
int len = dbsnp.length();
return dbsnp.substring(len - 3, len).toUpperCase();
return dbsnp.toLowerCase().endsWith(".vcf") ? "VCF" : "DBSNP";
}
/**

View File

@ -9,10 +9,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.LinkedList;
import java.util.List;
import java.util.ArrayList;
import java.util.Collections;
import java.util.*;
import java.io.File;
/**
@ -240,6 +237,20 @@ public class IntervalUtils {
return contigs;
}
/**
* Returns a map of contig names with their lengths from the reference.
* @param reference The reference for the intervals.
* @return A map of contig names with their lengths.
*/
public static Map<String, Integer> getContigLengths(File reference) {
ReferenceDataSource referenceSource = new ReferenceDataSource(reference);
List<GenomeLoc> locs = parseIntervalArguments(referenceSource, Collections.<String>emptyList());
Map<String, Integer> lengths = new LinkedHashMap<String, Integer>();
for (GenomeLoc loc: locs)
lengths.put(loc.getContig(), loc.getStop());
return lengths;
}
/**
* Counts the number of interval files an interval list can be split into using scatterIntervalArguments.
* @param reference The reference for the intervals.

View File

@ -237,7 +237,7 @@ public class WalkerTest extends BaseTest {
String[] command = Utils.escapeExpressions(args);
// add the logging level to each of the integration test commands
command = Utils.appendArray(command, "-l", "WARN", "-et", ENABLE_REPORTING ? "STANDARD" : "NO_ET");
command = Utils.appendArray(command, "-et", ENABLE_REPORTING ? "STANDARD" : "NO_ET");
// run the executable
boolean gotAnException = false;

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.File;
public class GATKReportParserUnitTest extends BaseTest {
@Test
public void testParse() throws Exception {
GATKReportParser parser = new GATKReportParser();
parser.parse(new File(validationDataLocation + "exampleGATKReport.eval"));
Assert.assertEquals(parser.getValue("CountVariants", "none.eval.none.all", "nProcessedLoci"), "100000");
Assert.assertEquals(parser.getValue("CountVariants", "none.eval.none.all", "nNoCalls"), "99872");
Assert.assertEquals(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2", "AC"), "2");
Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2.bad", "AC"));
Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2", "AC.bad"));
Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics.bad", "none.eval.none.novel.ac2", "AC"));
Assert.assertEquals(parser.getValue("ValidationReport", "none.eval.none.known", "sensitivity"), "NaN");
}
}

View File

@ -34,7 +34,7 @@ public class ReferenceDataUnitTest {
@Test
public void testDbsnpTypes() {
Assert.assertEquals(ReferenceData.HG18.getDbsnpType(129), "ROD");
Assert.assertEquals(ReferenceData.HG18.getDbsnpType(129), "DBSNP");
Assert.assertEquals(ReferenceData.HG19.getDbsnpType(129), "VCF");
Assert.assertEquals(ReferenceData.HG19.getDbsnpType(132), "VCF");
Assert.assertNull(ReferenceData.HG19.getDbsnpType(130));

View File

@ -14,10 +14,7 @@ import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.*;
/**
* test out the interval utility methods
@ -119,6 +116,16 @@ public class IntervalUtilsUnitTest extends BaseTest {
Assert.assertEquals(IntervalUtils.distinctContigs(reference, Arrays.asList("chr2:1-1", "chr1:1-1", "chr3:2-2")), Arrays.asList("chr1","chr2","chr3"));
}
@Test
public void testGetContigLengths() {
Map<String, Integer> lengths = IntervalUtils.getContigLengths(reference);
Assert.assertEquals((int)lengths.get("chr1"), 247249719);
Assert.assertEquals((int)lengths.get("chr2"), 242951149);
Assert.assertEquals((int)lengths.get("chr3"), 199501827);
Assert.assertEquals((int)lengths.get("chr20"), 62435964);
Assert.assertEquals((int)lengths.get("chrX"), 154913754);
}
@Test
public void testCountIntervals() {
Assert.assertEquals(IntervalUtils.countIntervalArguments(reference, Collections.<String>emptyList(), false), 45);

View File

@ -16,7 +16,7 @@ class FullCallingPipeline extends QScript {
@Argument(doc="the YAML file specifying inputs, interval lists, reference sequence, etc.", shortName="Y")
var yamlFile: File = _
@Input(doc="path to GATK jar", shortName="G")
@Input(doc="path to GATK jar", shortName="G", required=false)
var gatkJar: File = _
@Input(doc="level of parallelism for IndelRealigner. By default is set to 1.", shortName="cleanerScatter", required=false)
@ -212,6 +212,7 @@ class FullCallingPipeline extends QScript {
snps.jobOutputFile = new File(".queue/logs/SNPCalling/UnifiedGenotyper.snps.out")
snps.memoryLimit = 6
snps.downsample_to_coverage = 600
snps.genotype_likelihoods_model = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.SNP
snps.input_file = bamFiles
snps.genotype_likelihoods_model = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.SNP
snps.rodBind :+= RodBind("dbsnp", qscript.pipeline.getProject.getGenotypeDbsnpType, qscript.pipeline.getProject.getGenotypeDbsnp)

View File

@ -30,55 +30,55 @@ package org.broadinstitute.sting.queue.util
object PrimitiveOptionConversions {
// Conversion from Option
implicit def byteOption2byte(x: Option[Byte]) = x.get
implicit def shortOption2short(x: Option[Short]) = x.get
implicit def charOption2char(x: Option[Char]) = x.get
implicit def intOption2int(x: Option[Int]) = x.get
implicit def longOption2long(x: Option[Long]) = x.get
implicit def floatOption2float(x: Option[Float]) = x.get
implicit def doubleOption2double(x: Option[Double]) = x.get
implicit def booleanOption2boolean(x: Option[Boolean]) = x.get
implicit def byteOption2byte(x: Option[Byte]): Byte = x.get
implicit def shortOption2short(x: Option[Short]): Short = x.get
implicit def charOption2char(x: Option[Char]): Char = x.get
implicit def intOption2int(x: Option[Int]): Int = x.get
implicit def longOption2long(x: Option[Long]): Long = x.get
implicit def floatOption2float(x: Option[Float]): Float = x.get
implicit def doubleOption2double(x: Option[Double]): Double = x.get
implicit def booleanOption2boolean(x: Option[Boolean]): Boolean = x.get
// Conversion to Option
implicit def byte2byteOption(x: Byte) = Some(x)
implicit def short2shortOption(x: Short) = Some(x)
implicit def char2charOption(x: Char) = Some(x)
implicit def int2intOption(x: Int) = Some(x)
implicit def long2longOption(x: Long) = Some(x)
implicit def float2floatOption(x: Float) = Some(x)
implicit def double2doubleOption(x: Double) = Some(x)
implicit def boolean2booleanOption(x: Boolean) = Some(x)
implicit def byte2byteOption(x: Byte): Option[Byte] = Some(x)
implicit def short2shortOption(x: Short): Option[Short] = Some(x)
implicit def char2charOption(x: Char): Option[Char] = Some(x)
implicit def int2intOption(x: Int): Option[Int] = Some(x)
implicit def long2longOption(x: Long): Option[Long] = Some(x)
implicit def float2floatOption(x: Float): Option[Float] = Some(x)
implicit def double2doubleOption(x: Double): Option[Double] = Some(x)
implicit def boolean2booleanOption(x: Boolean): Option[Boolean] = Some(x)
// Narrowing for constants to byte, short, and float
implicit def int2byteOption(x: Int) = Some(x.toByte)
implicit def int2shortOption(x: Int) = Some(x.toShort)
implicit def double2floatOption(x: Float) = Some(x.toFloat)
implicit def int2byteOption(x: Int): Option[Byte] = Some(x.toByte)
implicit def int2shortOption(x: Int): Option[Short] = Some(x.toShort)
implicit def double2floatOption(x: Float): Option[Float] = Some(x.toFloat)
// Widening
implicit def byte2shortOption(x: Byte) = Some(x.toShort)
implicit def byte2intOption(x: Byte) = Some(x.toInt)
implicit def byte2longOption(x: Byte) = Some(x.toLong)
implicit def byte2floatOption(x: Byte) = Some(x.toFloat)
implicit def byte2doubleOption(x: Byte) = Some(x.toDouble)
implicit def byte2shortOption(x: Byte): Option[Short] = Some(x.toShort)
implicit def byte2intOption(x: Byte): Option[Int] = Some(x.toInt)
implicit def byte2longOption(x: Byte): Option[Long] = Some(x.toLong)
implicit def byte2floatOption(x: Byte): Option[Float] = Some(x.toFloat)
implicit def byte2doubleOption(x: Byte): Option[Double] = Some(x.toDouble)
implicit def short2intOption(x: Short) = Some(x.toInt)
implicit def short2longOption(x: Short) = Some(x.toLong)
implicit def short2floatOption(x: Short) = Some(x.toFloat)
implicit def short2doubleOption(x: Short) = Some(x.toDouble)
implicit def short2intOption(x: Short): Option[Int] = Some(x.toInt)
implicit def short2longOption(x: Short): Option[Long] = Some(x.toLong)
implicit def short2floatOption(x: Short): Option[Float] = Some(x.toFloat)
implicit def short2doubleOption(x: Short): Option[Double] = Some(x.toDouble)
implicit def char2intOption(x: Char) = Some(x.toInt)
implicit def char2longOption(x: Char) = Some(x.toLong)
implicit def char2floatOption(x: Char) = Some(x.toFloat)
implicit def char2doubleOption(x: Char) = Some(x.toDouble)
implicit def char2intOption(x: Char): Option[Int] = Some(x.toInt)
implicit def char2longOption(x: Char): Option[Long] = Some(x.toLong)
implicit def char2floatOption(x: Char): Option[Float] = Some(x.toFloat)
implicit def char2doubleOption(x: Char): Option[Double] = Some(x.toDouble)
implicit def int2longOption(x: Int) = Some(x.toLong)
implicit def int2floatOption(x: Int) = Some(x.toFloat)
implicit def int2doubleOption(x: Int) = Some(x.toDouble)
implicit def int2longOption(x: Int): Option[Long] = Some(x.toLong)
implicit def int2floatOption(x: Int): Option[Float] = Some(x.toFloat)
implicit def int2doubleOption(x: Int): Option[Double] = Some(x.toDouble)
implicit def long2floatOption(x: Long) = Some(x.toFloat)
implicit def long2doubleOption(x: Long) = Some(x.toDouble)
implicit def long2floatOption(x: Long): Option[Float] = Some(x.toFloat)
implicit def long2doubleOption(x: Long): Option[Double] = Some(x.toDouble)
implicit def float2doubleOption(x: Float) = Some(x.toDouble)
implicit def float2doubleOption(x: Float): Option[Double] = Some(x.toDouble)
}
@ -90,50 +90,50 @@ trait PrimitiveOptionConversions {
// import PrimitiveOptionConversion._ inside of a trait does not seem to work?
// Declaring them in a trait like this does work but does not seem scala-ish.
implicit def byteOption2byte(x: Option[Byte]) = PrimitiveOptionConversions.byteOption2byte(x)
implicit def shortOption2short(x: Option[Short]) = PrimitiveOptionConversions.shortOption2short(x)
implicit def charOption2char(x: Option[Char]) = PrimitiveOptionConversions.charOption2char(x)
implicit def intOption2int(x: Option[Int]) = PrimitiveOptionConversions.intOption2int(x)
implicit def longOption2long(x: Option[Long]) = PrimitiveOptionConversions.longOption2long(x)
implicit def floatOption2float(x: Option[Float]) = PrimitiveOptionConversions.floatOption2float(x)
implicit def doubleOption2double(x: Option[Double]) = PrimitiveOptionConversions.doubleOption2double(x)
implicit def booleanOption2boolean(x: Option[Boolean]) = PrimitiveOptionConversions.booleanOption2boolean(x)
implicit def byteOption2byte(x: Option[Byte]): Byte = PrimitiveOptionConversions.byteOption2byte(x)
implicit def shortOption2short(x: Option[Short]): Short = PrimitiveOptionConversions.shortOption2short(x)
implicit def charOption2char(x: Option[Char]): Char = PrimitiveOptionConversions.charOption2char(x)
implicit def intOption2int(x: Option[Int]): Int = PrimitiveOptionConversions.intOption2int(x)
implicit def longOption2long(x: Option[Long]): Long = PrimitiveOptionConversions.longOption2long(x)
implicit def floatOption2float(x: Option[Float]): Float = PrimitiveOptionConversions.floatOption2float(x)
implicit def doubleOption2double(x: Option[Double]): Double = PrimitiveOptionConversions.doubleOption2double(x)
implicit def booleanOption2boolean(x: Option[Boolean]): Boolean = PrimitiveOptionConversions.booleanOption2boolean(x)
implicit def byte2byteOption(x: Byte) = PrimitiveOptionConversions.byte2byteOption(x)
implicit def short2shortOption(x: Short) = PrimitiveOptionConversions.short2shortOption(x)
implicit def char2charOption(x: Char) = PrimitiveOptionConversions.char2charOption(x)
implicit def int2intOption(x: Int) = PrimitiveOptionConversions.int2intOption(x)
implicit def long2longOption(x: Long) = PrimitiveOptionConversions.long2longOption(x)
implicit def float2floatOption(x: Float) = PrimitiveOptionConversions.float2floatOption(x)
implicit def double2doubleOption(x: Double) = PrimitiveOptionConversions.double2doubleOption(x)
implicit def boolean2booleanOption(x: Boolean) = PrimitiveOptionConversions.boolean2booleanOption(x)
implicit def byte2byteOption(x: Byte): Option[Byte] = PrimitiveOptionConversions.byte2byteOption(x)
implicit def short2shortOption(x: Short): Option[Short] = PrimitiveOptionConversions.short2shortOption(x)
implicit def char2charOption(x: Char): Option[Char] = PrimitiveOptionConversions.char2charOption(x)
implicit def int2intOption(x: Int): Option[Int] = PrimitiveOptionConversions.int2intOption(x)
implicit def long2longOption(x: Long): Option[Long] = PrimitiveOptionConversions.long2longOption(x)
implicit def float2floatOption(x: Float): Option[Float] = PrimitiveOptionConversions.float2floatOption(x)
implicit def double2doubleOption(x: Double): Option[Double] = PrimitiveOptionConversions.double2doubleOption(x)
implicit def boolean2booleanOption(x: Boolean): Option[Boolean] = PrimitiveOptionConversions.boolean2booleanOption(x)
implicit def int2byteOption(x: Int) = PrimitiveOptionConversions.int2byteOption(x)
implicit def int2shortOption(x: Int) = PrimitiveOptionConversions.int2shortOption(x)
implicit def double2floatOption(x: Float) = PrimitiveOptionConversions.double2floatOption(x)
implicit def int2byteOption(x: Int): Option[Byte] = PrimitiveOptionConversions.int2byteOption(x)
implicit def int2shortOption(x: Int): Option[Short] = PrimitiveOptionConversions.int2shortOption(x)
implicit def double2floatOption(x: Float): Option[Float] = PrimitiveOptionConversions.double2floatOption(x)
implicit def byte2shortOption(x: Byte) = PrimitiveOptionConversions.byte2shortOption(x)
implicit def byte2intOption(x: Byte) = PrimitiveOptionConversions.byte2intOption(x)
implicit def byte2longOption(x: Byte) = PrimitiveOptionConversions.byte2longOption(x)
implicit def byte2floatOption(x: Byte) = PrimitiveOptionConversions.byte2floatOption(x)
implicit def byte2doubleOption(x: Byte) = PrimitiveOptionConversions.byte2doubleOption(x)
implicit def byte2shortOption(x: Byte): Option[Short] = PrimitiveOptionConversions.byte2shortOption(x)
implicit def byte2intOption(x: Byte): Option[Int] = PrimitiveOptionConversions.byte2intOption(x)
implicit def byte2longOption(x: Byte): Option[Long] = PrimitiveOptionConversions.byte2longOption(x)
implicit def byte2floatOption(x: Byte): Option[Float] = PrimitiveOptionConversions.byte2floatOption(x)
implicit def byte2doubleOption(x: Byte): Option[Double] = PrimitiveOptionConversions.byte2doubleOption(x)
implicit def short2intOption(x: Short) = PrimitiveOptionConversions.short2intOption(x)
implicit def short2longOption(x: Short) = PrimitiveOptionConversions.short2longOption(x)
implicit def short2floatOption(x: Short) = PrimitiveOptionConversions.short2floatOption(x)
implicit def short2doubleOption(x: Short) = PrimitiveOptionConversions.short2doubleOption(x)
implicit def short2intOption(x: Short): Option[Int] = PrimitiveOptionConversions.short2intOption(x)
implicit def short2longOption(x: Short): Option[Long] = PrimitiveOptionConversions.short2longOption(x)
implicit def short2floatOption(x: Short): Option[Float] = PrimitiveOptionConversions.short2floatOption(x)
implicit def short2doubleOption(x: Short): Option[Double] = PrimitiveOptionConversions.short2doubleOption(x)
implicit def char2intOption(x: Char) = PrimitiveOptionConversions.char2intOption(x)
implicit def char2longOption(x: Char) = PrimitiveOptionConversions.char2longOption(x)
implicit def char2floatOption(x: Char) = PrimitiveOptionConversions.char2floatOption(x)
implicit def char2doubleOption(x: Char) = PrimitiveOptionConversions.char2doubleOption(x)
implicit def char2intOption(x: Char): Option[Int] = PrimitiveOptionConversions.char2intOption(x)
implicit def char2longOption(x: Char): Option[Long] = PrimitiveOptionConversions.char2longOption(x)
implicit def char2floatOption(x: Char): Option[Float] = PrimitiveOptionConversions.char2floatOption(x)
implicit def char2doubleOption(x: Char): Option[Double] = PrimitiveOptionConversions.char2doubleOption(x)
implicit def int2longOption(x: Int) = PrimitiveOptionConversions.int2longOption(x)
implicit def int2floatOption(x: Int) = PrimitiveOptionConversions.int2floatOption(x)
implicit def int2doubleOption(x: Int) = PrimitiveOptionConversions.int2doubleOption(x)
implicit def int2longOption(x: Int): Option[Long] = PrimitiveOptionConversions.int2longOption(x)
implicit def int2floatOption(x: Int): Option[Float] = PrimitiveOptionConversions.int2floatOption(x)
implicit def int2doubleOption(x: Int): Option[Double] = PrimitiveOptionConversions.int2doubleOption(x)
implicit def long2floatOption(x: Long) = PrimitiveOptionConversions.long2floatOption(x)
implicit def long2doubleOption(x: Long) = PrimitiveOptionConversions.long2doubleOption(x)
implicit def long2floatOption(x: Long): Option[Float] = PrimitiveOptionConversions.long2floatOption(x)
implicit def long2doubleOption(x: Long): Option[Double] = PrimitiveOptionConversions.long2doubleOption(x)
implicit def float2doubleOption(x: Float) = PrimitiveOptionConversions.float2doubleOption(x)
implicit def float2doubleOption(x: Float): Option[Double] = PrimitiveOptionConversions.float2doubleOption(x)
}

View File

@ -48,6 +48,7 @@ object StringFileConversions {
x.map(_ match {
case string: String => stringAsFile(string)
case file: File => file
case null => null
})
}
@ -55,6 +56,7 @@ object StringFileConversions {
x.map(_ match {
case file: File => fileAsString(file)
case string: String => string
case null => null
})
}
@ -62,6 +64,7 @@ object StringFileConversions {
x.map(_ match {
case string: String => stringAsFile(string)
case file: File => file
case null => null
})
}
@ -69,6 +72,7 @@ object StringFileConversions {
x.map(_ match {
case file: File => fileAsString(file)
case string: String => string
case null => null
})
}
}

View File

@ -30,11 +30,14 @@ import org.testng.Assert
import org.broadinstitute.sting.commandline.CommandLineProgram
import java.util.Date
import java.text.SimpleDateFormat
import org.broadinstitute.sting.{WalkerTest, BaseTest}
import org.broadinstitute.sting.BaseTest
import org.broadinstitute.sting.queue.{QException, QCommandLine}
import org.broadinstitute.sting.datasources.pipeline.{Pipeline, PipelineProject, PipelineSample}
import org.broadinstitute.sting.utils.broad.PicardAggregationUtils
import org.broadinstitute.sting.queue.util.{Logging, ProcessController}
import java.io.{FileNotFoundException, File}
import org.broadinstitute.sting.gatk.report.GATKReportParser
import org.apache.commons.io.FileUtils
object PipelineTest extends BaseTest with Logging {
@ -135,7 +138,6 @@ object PipelineTest extends BaseTest with Logging {
val sample = new PipelineSample
sample.setId(idPrefix + "_" + k1gBam.sampleId)
sample.setBamFiles(Map("cleaned" -> getPicardBam(k1gBam)))
sample.setTags(Map("SQUIDProject" -> k1gBam.squidId, "CollaboratorID" -> k1gBam.sampleId))
sample
}
@ -146,7 +148,7 @@ object PipelineTest extends BaseTest with Logging {
def executeTest(pipelineTest: PipelineTestSpec) {
val name = pipelineTest.name
if (name == null)
throw new QException("PipelineTestSpec.name is null.")
Assert.fail("PipelineTestSpec.name is null.")
println(Utils.dupString('-', 80));
executeTest(name, pipelineTest.args, pipelineTest.jobQueue, pipelineTest.expectedException)
if (run) {
@ -174,27 +176,28 @@ object PipelineTest extends BaseTest with Logging {
// write the report to the shared validation data location
val formatter = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss")
val reportLocation = "%s%s/validation.%s.eval".format(validationReportsDataLocation, name, formatter.format(new Date))
new File(reportLocation).getParentFile.mkdirs
val report = new File(reportLocation)
// Run variant eval generating the report and validating the pipeline vcf.
var walkerCommand = "-T VariantEval -R %s -B:eval,VCF %s -E %s -reportType R -reportLocation %s -L %s"
.format(evalSpec.reference, evalSpec.vcf, evalSpec.evalModules.mkString(" -E "), reportLocation, evalSpec.intervals)
FileUtils.copyFile(new File(runDir(name) + evalSpec.evalReport), report);
if (evalSpec.dbsnp != null) {
val dbsnpArg = if (evalSpec.dbsnp.getName.toLowerCase.endsWith(".vcf")) "-B:dbsnp,VCF" else "-D"
walkerCommand += " %s %s".format(dbsnpArg, evalSpec.dbsnp)
}
val parser = new GATKReportParser
parser.parse(report)
if (evalSpec.intervals != null)
walkerCommand += " -L %s".format(evalSpec.intervals)
var allInRange = true
println()
println(name + " validation values:")
println(" value (min,target,max) table key metric")
for (validation <- evalSpec.validations) {
walkerCommand += " -summary %s".format(validation.metric)
walkerCommand += " -validate '%1$s >= %2$s' -validate '%1$s <= %3$s'".format(
validation.metric, validation.min, validation.max)
val value = parser.getValue(validation.table, validation.key, validation.metric)
val inRange = validation.inRange(value)
val flag = if (!inRange) "*" else " "
println(" %s %s (%s,%s,%s) %s %s %s".format(flag, value, validation.min, validation.target, validation.max, validation.table, validation.key, validation.metric))
allInRange &= inRange
}
WalkerTest.executeTest(name + "-validate", walkerCommand, null)
if (!allInRange)
Assert.fail("Eval outside of expected range")
}
/**
@ -209,7 +212,7 @@ object PipelineTest extends BaseTest with Logging {
// add the logging level to each of the integration test commands
command = Utils.appendArray(command, "-bsub", "-l", "WARN", "-tempDir", tempDir(name), "-runDir", runDir(name))
command = Utils.appendArray(command, "-bsub", "-tempDir", tempDir(name), "-runDir", runDir(name))
if (jobQueue == null)
command = Utils.appendArray(command, "-jobQueue", "hour")
@ -281,29 +284,16 @@ object PipelineTest extends BaseTest with Logging {
}
private def getPicardBam(k1gBam: K1gBam): File =
getPicardBam(k1gBam.squidId, k1gBam.sampleId, k1gBam.version)
private def getPicardBam(squidId: String, sampleId: String, version: Int): File =
new File(getPicardDir(squidId, sampleId, version), sampleId + ".bam")
private def getPicardDir(squidId: String, sampleId: String, version: Int) =
new File("/seq/picard_aggregation/%1$s/%2$s/v%3$s/".format(squidId, sampleId, version))
new File(PicardAggregationUtils.getSampleBam(k1gBam.squidId, k1gBam.sampleId, k1gBam.version))
private def getLatestVersion(k1gBam: K1gBam): Int =
getLatestVersion(k1gBam.squidId, k1gBam.sampleId, k1gBam.version)
private def getLatestVersion(squidId: String, sampleId: String, startVersion: Int): Int = {
var version = startVersion
while (new File(getPicardDir(squidId, sampleId, version + 1), "finished.txt").exists)
version += 1
version
}
PicardAggregationUtils.getLatestVersion(k1gBam.squidId, k1gBam.sampleId, k1gBam.version)
private var runningCommandLines = Set.empty[QCommandLine]
Runtime.getRuntime.addShutdownHook(new Thread {
/** Cleanup as the JVM shuts down. */
override def run {
override def run() {
try {
ProcessController.shutdown()
} catch {

View File

@ -1,44 +1,34 @@
package org.broadinstitute.sting.queue.pipeline
import java.io.File
/**
* Data validations to evaluate on a VCF using VariantEval.
* Data validations to evaluate on a GATKReport.
*/
class PipelineTestEvalSpec {
// TODO: Reuse the Project "YAML" object for reference, intervals, etc.
/** VCF to eval */
var vcf: File = _
/** Reference for the VCF */
var reference: File = _
/** Intervals for the VCF */
var intervals: File = _
/** DBSNP to use for comparisons, via -B:dbsnp,VCF or -D */
var dbsnp: File = _
/** List of eval modules to output. */
var evalModules = List("CompOverlap", "CountFunctionalClasses", "CountVariants", "SimpleMetricsBySample", "TiTvVariantEvaluator")
var evalReport: String = _
/** Validations to assert. */
var validations: List[PipelineValidation] = Nil
var validations: List[PipelineValidation[_]] = Nil
}
/** A VariantEval JEXL and range of values to validate. */
class PipelineValidation(val metric: String, val min: String, val max: String) {
abstract class PipelineValidation[T <: AnyVal](val table: String, val key: String, val metric: String, val target: T, val min: T, val max: T) {
def parse(x: String): T
def inRange(x: String): Boolean
}
/** A VariantEval JEXL and target to validate within a 1% tolerance. */
class IntegerValidation(metric: String, target: Int)
extends PipelineValidation(metric,
(target * .99).floor.toInt.toString, (target * 1.01).ceil.toInt.toString) {
class IntegerValidation(table: String, key: String, metric: String, target: Int)
extends PipelineValidation[Int](table, key, metric, target,
(target * .99).floor.toInt, (target * 1.01).ceil.toInt) {
def parse(x: String) = x.toInt
def inRange(x: String) = parse(x) >= min && parse(x) <= max
}
/** A VariantEval JEXL and target to validate within a 1% tolerance. */
class DoubleValidation(metric: String, target: Double)
extends PipelineValidation(metric,
"%.2f".format((target * 99).floor / 100), "%.2f".format((target * 101).ceil / 100)) {
class DoubleValidation(table: String, key: String, metric: String, target: Double)
extends PipelineValidation(table, key, metric, target,
(target * 99).floor / 100, (target * 101).ceil / 100) {
def parse(x: String) = x.toDouble
def inRange(x: String) = parse(x) >= min && parse(x) <= max
}

View File

@ -37,12 +37,12 @@ class FullCallingPipelineTest {
val k1gChr20Dataset = {
val dataset = newK1gDataset("Barcoded_1000G_WEx_chr20", true)
dataset.validations :+= new IntegerValidation("eval.dbsnp.all.called.all.counter.nCalledLoci", 1348)
dataset.validations :+= new IntegerValidation("eval.dbsnp.all.called.known.counter.nCalledLoci", 1124)
dataset.validations :+= new IntegerValidation("eval.dbsnp.all.called.novel.counter.nCalledLoci", 224)
dataset.validations :+= new DoubleValidation("eval.dbsnp.all.called.all.titv.tiTvRatio", 3.6644)
dataset.validations :+= new DoubleValidation("eval.dbsnp.all.called.known.titv.tiTvRatio", 3.7426)
dataset.validations :+= new DoubleValidation("eval.dbsnp.all.called.novel.titv.tiTvRatio", 3.3077)
dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.all", "nCalledLoci", 1398)
dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.known", "nCalledLoci", 1143)
dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.novel", "nCalledLoci", 255)
dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.all", "tiTvRatio", 3.6250)
dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.known", "tiTvRatio", 3.7190)
dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.novel", "tiTvRatio", 3.2037)
dataset
}
@ -50,12 +50,12 @@ class FullCallingPipelineTest {
val k1gExomeDataset = {
val dataset = newK1gDataset("Barcoded_1000G_WEx", false)
dataset.validations :+= new IntegerValidation("eval.dbsnp.all.called.all.counter.nCalledLoci", 50755)
dataset.validations :+= new IntegerValidation("eval.dbsnp.all.called.known.counter.nCalledLoci", 40894)
dataset.validations :+= new IntegerValidation("eval.dbsnp.all.called.novel.counter.nCalledLoci", 9861)
dataset.validations :+= new DoubleValidation("eval.dbsnp.all.called.all.titv.tiTvRatio", 3.2820)
dataset.validations :+= new DoubleValidation("eval.dbsnp.all.called.known.titv.tiTvRatio", 3.3384)
dataset.validations :+= new DoubleValidation("eval.dbsnp.all.called.novel.titv.tiTvRatio", 3.0630)
dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.all", "nCalledLoci", 52668)
dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.known", "nCalledLoci", 41248)
dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.novel", "nCalledLoci", 11420)
dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.all", "tiTvRatio", 3.271)
dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.known", "tiTvRatio", 3.3299)
dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.novel", "tiTvRatio", 3.0487)
dataset.jobQueue = "gsa"
@ -74,18 +74,16 @@ class FullCallingPipelineTest {
final def convertDatasets: Array[Array[AnyRef]] =
datasets.map(dataset => Array(dataset.asInstanceOf[AnyRef])).toArray
@Test(dataProvider="datasets", enabled=false)
def testFullCallingPipeline(dataset: PipelineDataset) = {
@Test(dataProvider="datasets")
def testFullCallingPipeline(dataset: PipelineDataset) {
val projectName = dataset.pipeline.getProject.getName
val testName = "FullCallingPipeline-" + projectName
val yamlFile = writeYaml(testName, dataset.pipeline)
// Run the pipeline with the expected inputs.
val pipelineCommand = ("-retry 1 -S scala/qscript/playground/FullCallingPipeline.q" +
" -jobProject %s -Y %s" +
" -tearScript %s/R/DataProcessingReport/GetTearsheetStats.R" +
" --gatkjar %s")
.format(projectName, yamlFile, PipelineTest.currentStingDir, PipelineTest.currentGATK)
val pipelineCommand =
"-retry 1 -S scala/qscript/playground/FullCallingPipeline.q -jobProject %s -Y %s"
.format(projectName, yamlFile)
val pipelineSpec = new PipelineTestSpec
pipelineSpec.name = testName
@ -93,10 +91,7 @@ class FullCallingPipelineTest {
pipelineSpec.jobQueue = dataset.jobQueue
pipelineSpec.evalSpec = new PipelineTestEvalSpec
pipelineSpec.evalSpec.vcf = new File(PipelineTest.runDir(testName) + "SnpCalls/%s.cleaned.annotated.handfiltered.vcf".format(projectName))
pipelineSpec.evalSpec.reference = dataset.pipeline.getProject.getReferenceFile
pipelineSpec.evalSpec.intervals = dataset.pipeline.getProject.getIntervalList
pipelineSpec.evalSpec.dbsnp = dataset.pipeline.getProject.getEvalDbsnp
pipelineSpec.evalSpec.evalReport = projectName + ".cleaned.snps_and_indels.filtered.annotated.eval"
pipelineSpec.evalSpec.validations = dataset.validations
PipelineTest.executeTest(pipelineSpec)
@ -111,7 +106,7 @@ class FullCallingPipelineTest {
}
class PipelineDataset(var pipeline: Pipeline = null,
var validations: List[PipelineValidation] = Nil,
var validations: List[PipelineValidation[_]] = Nil,
var jobQueue: String = null) {
override def toString = pipeline.getProject.getName
}

View File

@ -32,16 +32,20 @@ import StringFileConversions._
class StringFileConversionsUnitTest {
@Test
def testStringToFile() {
val file = new File("foo")
val string: String = file
var string: String = new File("foo")
Assert.assertEquals(string, "foo")
string = null.asInstanceOf[File]
Assert.assertNull(string)
}
@Test
def testFileToString() {
val string = "foo"
val file: File = string
var file: File = "foo"
Assert.assertEquals(file, new File("foo"))
file = null.asInstanceOf[String]
Assert.assertNull(file)
}
@Test
@ -49,6 +53,18 @@ class StringFileConversionsUnitTest {
var files = List(new File("foo"))
files :+= "bar"
Assert.assertEquals(files, List(new File("foo"), new File("bar")))
files = List(new File("foo"))
files :+= null.asInstanceOf[String]
Assert.assertEquals(files, List(new File("foo"), null))
files = List[File](null)
files :+= "foo"
Assert.assertEquals(files, List(null, new File("foo")))
files = List[File](null)
files :+= null.asInstanceOf[String]
Assert.assertEquals(files, List(null, null))
}
@Test
@ -56,6 +72,18 @@ class StringFileConversionsUnitTest {
var strings = List("foo")
strings :+= new File("bar")
Assert.assertEquals(strings, List("foo", "bar"))
strings = List("foo")
strings :+= null.asInstanceOf[File]
Assert.assertEquals(strings, List("foo", null))
strings = List[String](null)
strings :+= new File("foo")
Assert.assertEquals(strings, List(null, "foo"))
strings = List[String](null)
strings :+= null.asInstanceOf[File]
Assert.assertEquals(strings, List(null, null))
}
@Test
@ -63,6 +91,18 @@ class StringFileConversionsUnitTest {
var files = Set(new File("foo"))
files += "bar"
Assert.assertEquals(files, Set(new File("foo"), new File("bar")))
files = Set(new File("foo"))
files += null.asInstanceOf[String]
Assert.assertEquals(files, Set(new File("foo"), null))
files = Set[File](null)
files += "foo"
Assert.assertEquals(files, Set(new File("foo"), null))
files = Set[File](null)
files += null.asInstanceOf[String]
Assert.assertEquals(files, Set(null))
}
@Test
@ -70,6 +110,18 @@ class StringFileConversionsUnitTest {
var strings = Set("foo")
strings += new File("bar")
Assert.assertEquals(strings, Set("foo", "bar"))
strings = Set("foo")
strings += null.asInstanceOf[File]
Assert.assertEquals(strings, Set("foo", null))
strings = Set[String](null)
strings += new File("foo")
Assert.assertEquals(strings, Set("foo", null))
strings = Set[String](null)
strings += null.asInstanceOf[File]
Assert.assertEquals(strings, Set(null))
}
@Test
@ -77,6 +129,18 @@ class StringFileConversionsUnitTest {
var files = List(new File("foo"))
files ++= List("bar")
Assert.assertEquals(files, List(new File("foo"), new File("bar")))
files = List(new File("foo"))
files ++= List[String](null)
Assert.assertEquals(files, List(new File("foo"), null))
files = List[File](null)
files ++= List("foo")
Assert.assertEquals(files, List(null, new File("foo")))
files = List[File](null)
files ++= List[String](null)
Assert.assertEquals(files, List(null, null))
}
@Test
@ -84,6 +148,18 @@ class StringFileConversionsUnitTest {
var strings = List("foo")
strings ++= List(new File("bar"))
Assert.assertEquals(strings, List("foo", "bar"))
strings = List("foo")
strings ++= List[File](null)
Assert.assertEquals(strings, List("foo", null))
strings = List[String](null)
strings ++= List(new File("foo"))
Assert.assertEquals(strings, List(null, "foo"))
strings = List[String](null)
strings ++= List[File](null)
Assert.assertEquals(strings, List(null, null))
}
@Test
@ -91,6 +167,18 @@ class StringFileConversionsUnitTest {
var files = Set(new File("foo"))
files ++= Set("bar")
Assert.assertEquals(files, Set(new File("foo"), new File("bar")))
files = Set(new File("foo"))
files ++= Set[String](null)
Assert.assertEquals(files, Set(new File("foo"), null))
files = Set[File](null)
files ++= Set("foo")
Assert.assertEquals(files, Set(new File("foo"), null))
files = Set[File](null)
files ++= Set[String](null)
Assert.assertEquals(files, Set(null))
}
@Test
@ -98,5 +186,17 @@ class StringFileConversionsUnitTest {
var strings = Set("foo")
strings ++= Set(new File("bar"))
Assert.assertEquals(strings, Set("foo", "bar"))
strings = Set("foo")
strings ++= Set[File](null)
Assert.assertEquals(strings, Set("foo", null))
strings = Set[String](null)
strings ++= Set(new File("foo"))
Assert.assertEquals(strings, Set("foo", null))
strings = Set[String](null)
strings ++= Set[File](null)
Assert.assertEquals(strings, Set(null))
}
}