Cleanup of the utils.broad package.

Using Picard IoUtils on sample names.
This commit is contained in:
Khalid Shakir 2011-07-01 20:47:03 -04:00
parent 444eae316c
commit b6bc64a0c8
14 changed files with 7 additions and 844 deletions

View File

@ -22,7 +22,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.datasources.pipeline;
package org.broadinstitute.sting.pipeline;
import java.util.ArrayList;
import java.util.List;

View File

@ -22,7 +22,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.datasources.pipeline;
package org.broadinstitute.sting.pipeline;
import java.io.File;
import java.util.Map;

View File

@ -22,7 +22,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.datasources.pipeline;
package org.broadinstitute.sting.pipeline;
import java.io.File;
import java.util.Map;

View File

@ -1,149 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.broad;
import org.apache.commons.io.filefilter.RegexFileFilter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.util.Arrays;
public class PicardAggregationUtils {
public static final String PICARD_AGGREGATION_DIR = "/seq/picard_aggregation/";
/**
* Returns the path to the sample BAM.
* @param project Project
* @param sample Sample
* @param version Version
* @return The path to the sample BAM.
*/
public static String getSampleBam(String project, String sample, int version) {
return getSampleDir(project, sample, version) + sample + ".bam";
}
/**
* Returns the path to the latest BAM.
* @param project Project
* @param sample Sample
* @return The path to the latest BAM.
* @throws FileNotFoundException If a finished directory cannot be found for a sample.
*/
public static String getSampleBam(String project, String sample) throws FileNotFoundException {
return getSampleDir(project, sample) + sample + ".bam";
}
/**
* Returns the sample directory.
* @param project Project
* @param sample Sample
* @param version Version
* @return the sample directory.
*/
public static String getSampleDir(String project, String sample, int version) {
return PICARD_AGGREGATION_DIR + String.format("%s/%s/v%d/", project, sample, version);
}
/**
* Returns the latest finished directory for this project sample.
* @param project Project
* @param sample Sample
* @return The path to the latest finished directory.
* @throws FileNotFoundException If a finished directory cannot be found for a sample.
*/
public static String getSampleDir(String project, String sample) throws FileNotFoundException {
int latestVersion = getLatestVersion(project, sample);
if (latestVersion == 0)
throw new FileNotFoundException("Unable to find a finished directory for project sample " + project + "/" + sample);
return getSampleDir(project, sample, latestVersion);
}
/**
* Returns the latest finished version directory.
* Because isilon metadata operations are relatively slow this method
* tries not to look for every possible versioned directory.
* @param project Project
* @param sample Sample
* @return The highest finished version directory or 0 if a finished directory was not found.
*/
public static int getLatestVersion(String project, String sample) {
return getLatestVersion(project, sample, 0);
}
/**
* Returns the latest finished version directory after startVersion.
* Because isilon metadata operations are relatively slow this method
* tries not to look for every possible versioned directory.
* @param project Project
* @param sample Sample
* @param startVersion minimum version to return
* @return The highest finished version directory after startVersion
*/
public static int getLatestVersion(String project, String sample, int startVersion) {
int version = Math.max(0, startVersion);
boolean nextExists = true;
while (nextExists) {
nextExists = false;
for (int next = 3; next > 0; next--)
if (isFinished(project, sample, version + next)) {
version += next;
nextExists = true;
break;
}
}
// Special case when the version is 0
// Because isilon storage takes a while to do meta data operations only look through every file if we have to.
if (version == 0) {
File sampleDir = new File(PICARD_AGGREGATION_DIR + project + "/" + sample);
if (sampleDir.exists()) {
FileFilter filter = new RegexFileFilter("v\\d+");
File[] files = sampleDir.listFiles(filter);
int[] versions = new int[files.length];
for (int i = 0; i < files.length; i++)
versions[i] = Integer.parseInt(files[i].getName().substring(1));
Arrays.sort(versions);
for (int i = versions.length - 1; i >= 0; i--) {
if (isFinished(project, sample, versions[i])) {
version = versions[i];
break;
}
}
}
}
return version == 0 ? startVersion : version;
}
/**
* Returns true if the project sample directory contains a finished.txt
* @param project Project
* @param sample Sample
* @param version Version
* @return true if the project sample directory contains a finished.txt
*/
public static boolean isFinished(String project, String sample, int version) {
return new File(getSampleDir(project, sample, version), "finished.txt").exists();
}
}

View File

@ -1,108 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.broad;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class PicardAnalysisFiles {
private static final String REFERENCE_SEQUENCE_HEADER = "REFERENCE_SEQUENCE";
private static final String TARGET_INTERVALS_HEADER = "TARGET_INTERVALS";
private static final String BAIT_INTERVALS_HEADER = "BAIT_INTERVALS";
private static final String[] ANALYSIS_HEADERS = {REFERENCE_SEQUENCE_HEADER, TARGET_INTERVALS_HEADER, BAIT_INTERVALS_HEADER};
private static final String ANALYSIS_FILES = "analysis_files.txt";
private String path;
private Map<String,Set<String>> headerValues = new HashMap<String,Set<String>>();
public PicardAnalysisFiles(String project, String sample) throws FileNotFoundException {
this(PicardAggregationUtils.getSampleDir(project, sample) + ANALYSIS_FILES);
}
public PicardAnalysisFiles(String project, String sample, int version) throws FileNotFoundException {
this(PicardAggregationUtils.getSampleDir(project, sample, version) + ANALYSIS_FILES);
}
public PicardAnalysisFiles(String path) throws FileNotFoundException {
this.path = path;
HashMap<String,Integer> headerIndexes = null;
for (String line: new XReadLines(new File(path))) {
if (line.startsWith("#"))
continue;
String[] values = line.split("\t");
if (headerIndexes == null) {
headerIndexes = new HashMap<String,Integer>();
for (String header: ANALYSIS_HEADERS) {
headerIndexes.put(header, ArrayUtils.indexOf(values, header));
headerValues.put(header, new HashSet<String>());
}
} else {
for (String header: ANALYSIS_HEADERS) {
int index = headerIndexes.get(header);
if (values.length <= index)
throw new StingException(String.format("Unable to parse line in %s: %n%s", path, line));
String value = values[index];
headerValues.get(header).add(value);
}
}
}
}
public String getPath() {
return path;
}
public String getReferenceSequence() {
return getSingle(REFERENCE_SEQUENCE_HEADER);
}
public String getTargetIntervals() {
return getSingle(TARGET_INTERVALS_HEADER);
}
public String getBaitIntervals() {
return getSingle(BAIT_INTERVALS_HEADER);
}
private String getSingle(String header) {
Set<String> values = headerValues.get(header);
if (values.size() > 1) {
throw new UnsupportedOperationException(path + " contains more than one value for " + header + ": " + values);
} else if (values.size() == 0) {
return null;
} else {
String value = values.iterator().next();
return "null".equals(value) ? null : value;
}
}
}

View File

@ -1,123 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.broad;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.NullArgumentException;
import org.broadinstitute.sting.datasources.pipeline.Pipeline;
import org.broadinstitute.sting.datasources.pipeline.PipelineProject;
import org.broadinstitute.sting.datasources.pipeline.PipelineSample;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.yaml.YamlUtils;
import java.io.File;
import java.io.FileNotFoundException;
/**
* Automatically gets the latest version using PicardAggregationUtils.
*/
public class PicardPipeline {
protected static final String PROJECT_TAG = "SQUIDProject";
protected static final String SAMPLE_TAG = "CollaboratorID";
protected static final String PICARD_BAM_TYPE = "cleaned";
private PicardPipeline() {}
/**
* Creates a new PicardPipeline
* @param path Path to a tsv with project [tab] sample on each line or a pipeline yaml.
* @return a new Picard
* @throws FileNotFoundException when unable to find the file or any supporting files.
*/
public static Pipeline parse(File path) throws FileNotFoundException {
if (path == null)
throw new NullArgumentException("path");
Pipeline pipeline;
if (path.getName().endsWith(".tsv")) {
pipeline = new Pipeline();
pipeline.getProject().setName(FilenameUtils.getBaseName(path.getPath()));
for (String line: new XReadLines(path)) {
String[] projectSample = line.split("\t");
addSample(pipeline, projectSample[0], projectSample[1]);
}
} else if (path.getName().endsWith(".yaml")) {
pipeline = YamlUtils.load(Pipeline.class, path);
} else {
throw new UserException.BadInput("Path does not end with .tsv or .yaml: " + path.getPath());
}
update(pipeline);
return pipeline;
}
private static void update(Pipeline pipeline) throws FileNotFoundException {
for (PipelineSample sample: pipeline.getSamples())
updateSample(pipeline.getProject(), sample);
}
private static void addSample(Pipeline pipeline, String project, String sample) {
PipelineSample pipelineSample = new PipelineSample();
pipelineSample.getTags().put(PROJECT_TAG, project);
pipelineSample.getTags().put(SAMPLE_TAG, sample);
pipeline.getSamples().add(pipelineSample);
}
private static void updateSample(PipelineProject pipelineProject, PipelineSample pipelineSample) throws FileNotFoundException {
if (!pipelineSample.getTags().containsKey(PROJECT_TAG) && !pipelineSample.getTags().containsKey(SAMPLE_TAG))
return;
String project = pipelineSample.getTags().get(PROJECT_TAG);
String sample = pipelineSample.getTags().get(SAMPLE_TAG);
int version = PicardAggregationUtils.getLatestVersion(project, sample);
if (version <= 0)
throw new UserException.BadInput("Project sample not found: " + project + "/" + sample);
String bam = PicardAggregationUtils.getSampleBam(project, sample, version);
if (pipelineSample.getId() == null)
pipelineSample.setId(project + "_" + sample);
pipelineSample.getBamFiles().put(PICARD_BAM_TYPE, new File(bam));
PicardAnalysisFiles analysis = new PicardAnalysisFiles(project, sample, version);
if (pipelineProject.getReferenceFile() == null) {
String referenceSequence = analysis.getReferenceSequence();
ReferenceData referenceData = ReferenceData.getByReference(referenceSequence);
pipelineProject.setReferenceFile(new File(referenceData.getReference()));
pipelineProject.setRefseqTable(new File(referenceData.getRefseq()));
if (analysis.getTargetIntervals() != null)
pipelineProject.setIntervalList(new File(analysis.getTargetIntervals()));
pipelineProject.setEvalDbsnp(new File(referenceData.getDbsnp(129)));
if (referenceData.getDbsnpVersions().contains(132)) {
pipelineProject.setGenotypeDbsnp(new File(referenceData.getDbsnp(132)));
} else {
pipelineProject.setGenotypeDbsnp(new File(referenceData.getDbsnp(129)));
}
} else {
String referenceSequence = analysis.getReferenceSequence();
if (!pipelineProject.getReferenceFile().getPath().equals(referenceSequence))
throw new UserException.BadInput("Samples sequenced with different references");
}
}
}

View File

@ -1,135 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.broad;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
/**
* Tracks data related to reference files at the Broad.
*/
public enum ReferenceData {
/**
* HG18 reference data
*/
HG18("hg18"),
/**
* HG19 reference data
*/
HG19("hg19");
private static final String REFSEQ_DIR = "/humgen/gsa-hpprojects/GATK/data/Annotations/refseq/";
private static final String DBSNP_DIR = "/humgen/gsa-hpprojects/GATK/data/";
private final String name;
private final String reference;
private final String refseq;
private final Map<Integer,String> dbsnps;
ReferenceData(String name) {
this.name = name;
Map<Integer,String> dbsnps = new TreeMap<Integer,String>();
if ("hg18".equals(name)) {
this.reference = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
this.refseq = REFSEQ_DIR + "refGene-big-table-hg18.txt";
dbsnps.put(129, DBSNP_DIR + "dbsnp_129_hg18.rod");
} else if ("hg19".equals(name)) {
this.reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta";
this.refseq = REFSEQ_DIR + "refGene-big-table-hg19.txt";
dbsnps.put(129, DBSNP_DIR + "dbsnp_129_b37.vcf");
dbsnps.put(132, DBSNP_DIR + "dbsnp_132_b37.vcf");
} else
throw new UnsupportedOperationException("Unknown reference: " + name);
this.dbsnps = Collections.unmodifiableMap(dbsnps);
}
/**
* Returns the name of the reference.
* @return the name of the reference.
*/
public String getName() {
return name;
}
/**
* Returns the path to the fasta.
* @return the path to the fasta.
*/
public String getReference() {
return reference;
}
/**
* Returns the path to the refseq table.
* @return the path to the refseq table.
*/
public String getRefseq() {
return refseq;
}
/**
* Returns the dbsnp versions available.
* @return the dbsnp versions available.
*/
public Set<Integer> getDbsnpVersions() {
return dbsnps.keySet();
}
/**
* Returns the dbsnp path for the version.
* @param version version from getDbsnpVersions()
* @return the dbsnp path for the version.
*/
public String getDbsnp(int version) {
return dbsnps.get(version);
}
/**
* Returns the dbsnp type for the version, "VCF" or "DBSNP".
* @param version version from getDbsnpVersions()
* @return the dbsnp type for the version, "VCF" or "DBSNP".
*/
public String getDbsnpType(int version) {
String dbsnp = getDbsnp(version);
if (dbsnp == null)
return null;
return dbsnp.toLowerCase().endsWith(".vcf") ? "VCF" : "DBSNP";
}
/**
* Returns the reference data based on the path or null.
* @param reference path to the reference
* @return the reference data based on the path or null.
*/
public static ReferenceData getByReference(String reference) {
for (ReferenceData data: ReferenceData.values())
if (data.reference.equals(reference))
return data;
return null;
}
}

View File

@ -22,8 +22,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.datasources.pipeline;
package org.broadinstitute.sting.pipeline;
import org.broadinstitute.sting.pipeline.Pipeline;
import org.broadinstitute.sting.pipeline.PipelineSample;
import org.testng.Assert;
import org.broadinstitute.sting.utils.yaml.YamlUtils;

View File

@ -1,61 +0,0 @@
package org.broadinstitute.sting.utils.broad;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.FileNotFoundException;
public class PicardAggregationUtilsUnitTest {
public static final String PROJECT = "C474";
public static final String SAMPLE = "NA19651";
public static final String MISSING_PROJECT = "C0";
public static final String MISSING_SAMPLE = "0";
private int latestVersion = -1;
@Test
public void testGetLatestVersion() {
latestVersion = PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE);
System.out.println(String.format("Latest version for %s %s is %d", PROJECT, SAMPLE, latestVersion));
Assert.assertTrue(latestVersion > 0);
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE, latestVersion), latestVersion);
}
@Test(dependsOnMethods = "testGetLatestVersion")
public void testGetSampleBam() throws Exception {
String test = PicardAggregationUtils.getSampleBam(PROJECT, SAMPLE);
String latest = PicardAggregationUtils.getSampleBam(PROJECT, SAMPLE, latestVersion);
Assert.assertEquals(test, latest);
}
@Test(dependsOnMethods = "testGetLatestVersion")
public void testGetSampleDir() throws Exception {
String test = PicardAggregationUtils.getSampleDir(PROJECT, SAMPLE);
String latest = PicardAggregationUtils.getSampleDir(PROJECT, SAMPLE, latestVersion);
Assert.assertEquals(test, latest);
}
@Test(dependsOnMethods = "testGetLatestVersion")
public void testIsFinished() {
Assert.assertTrue(PicardAggregationUtils.isFinished(PROJECT, SAMPLE, latestVersion));
Assert.assertFalse(PicardAggregationUtils.isFinished(PROJECT, SAMPLE, latestVersion + 1));
}
@Test(expectedExceptions = FileNotFoundException.class)
public void testMissingSampleBam() throws Exception {
PicardAggregationUtils.getSampleBam(MISSING_PROJECT, MISSING_SAMPLE);
}
@Test(expectedExceptions = FileNotFoundException.class)
public void testMissingSampleDir() throws Exception {
PicardAggregationUtils.getSampleDir(MISSING_PROJECT, MISSING_SAMPLE);
}
@Test
public void testLatestVersionMissing() {
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE), 0);
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, -1), -1);
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, 0), 0);
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, 1), 1);
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, 2), 2);
}
}

View File

@ -1,56 +0,0 @@
package org.broadinstitute.sting.utils.broad;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.FileNotFoundException;
import static org.broadinstitute.sting.utils.broad.PicardAggregationUtilsUnitTest.*;
public class PicardAnalysisFilesUnitTest extends BaseTest {
@Test
public void testParseLatest() throws Exception {
PicardAnalysisFiles files = new PicardAnalysisFiles(PROJECT, SAMPLE);
Assert.assertNotNull(files.getPath());
files = new PicardAnalysisFiles(PROJECT, SAMPLE, PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE));
Assert.assertNotNull(files.getPath());
}
@Test
public void testParseValid() throws Exception {
PicardAnalysisFiles file = new PicardAnalysisFiles(BaseTest.validationDataLocation + "picard_analysis_file.txt");
Assert.assertEquals(file.getReferenceSequence(), "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta");
Assert.assertEquals(file.getTargetIntervals(), "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list");
Assert.assertEquals(file.getBaitIntervals(), "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.baits.interval_list");
}
@Test
public void testParseValidWithComments() throws Exception {
PicardAnalysisFiles file = new PicardAnalysisFiles(BaseTest.validationDataLocation + "picard_analysis_file_with_comments.txt");
Assert.assertEquals(file.getReferenceSequence(), "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta");
Assert.assertEquals(file.getTargetIntervals(), "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list");
Assert.assertEquals(file.getBaitIntervals(), "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.baits.interval_list");
}
@Test(expectedExceptions = FileNotFoundException.class)
public void testParseBadPath() throws Exception {
new PicardAnalysisFiles(BaseTest.validationDataLocation + "non_existent_picard_analysis_file.txt");
}
@Test(expectedExceptions = FileNotFoundException.class)
public void testParseMissingLatest() throws Exception {
new PicardAnalysisFiles(MISSING_PROJECT, MISSING_SAMPLE);
}
@Test(expectedExceptions = FileNotFoundException.class)
public void testParseMissingVersion() throws Exception {
new PicardAnalysisFiles(PROJECT, SAMPLE, PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE) + 2);
}
@Test(expectedExceptions = UnsupportedOperationException.class)
public void testParseMultipleReferences() throws Exception {
PicardAnalysisFiles file = new PicardAnalysisFiles(BaseTest.validationDataLocation + "picard_analysis_file_with_different_references.txt");
file.getReferenceSequence();
}
}

View File

@ -1,71 +0,0 @@
package org.broadinstitute.sting.utils.broad;
import org.testng.Assert;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.datasources.pipeline.Pipeline;
import org.broadinstitute.sting.datasources.pipeline.PipelineSample;
import org.broadinstitute.sting.utils.yaml.YamlUtils;
import org.testng.annotations.Test;
import static org.broadinstitute.sting.utils.broad.PicardAggregationUtilsUnitTest.*;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
public class PicardPipelineUnitTest {
@Test
public void testParseTsv() throws IOException {
File tsv = writeTsv(PROJECT, SAMPLE);
Pipeline pipeline = PicardPipeline.parse(tsv);
validatePipeline(pipeline, FilenameUtils.getBaseName(tsv.getPath()));
}
@Test
public void testParseTsvWithPicardComments() throws Exception {
File tsv = writeTsv("C460", "HG01359");
PicardPipeline.parse(tsv);
}
@Test
public void testParseYaml() throws IOException {
File yaml = writeYaml("project_name", PROJECT, SAMPLE);
Pipeline pipeline = PicardPipeline.parse(yaml);
validatePipeline(pipeline, "project_name");
}
private void validatePipeline(Pipeline pipeline, String name) {
Assert.assertEquals(pipeline.getProject().getName(), name);
Assert.assertTrue(pipeline.getProject().getReferenceFile().exists(), "reference not found");
Assert.assertTrue(pipeline.getProject().getIntervalList().exists(), "intervals not found");
Assert.assertTrue(pipeline.getProject().getRefseqTable().exists(), "refseq not found");
Assert.assertTrue(pipeline.getProject().getGenotypeDbsnp().exists(), "genotype dbsnp not found");
Assert.assertTrue(pipeline.getProject().getEvalDbsnp().exists(), "eval dbsnp not found");
Assert.assertEquals(pipeline.getSamples().size(), 1);
for (PipelineSample sample: pipeline.getSamples()) {
Assert.assertEquals(sample.getId(), PROJECT + "_" + SAMPLE);
Assert.assertTrue(sample.getBamFiles().get(PicardPipeline.PICARD_BAM_TYPE).exists(), "bam not found");
Assert.assertEquals(sample.getTags().get(PicardPipeline.PROJECT_TAG), PROJECT);
Assert.assertEquals(sample.getTags().get(PicardPipeline.SAMPLE_TAG), SAMPLE);
}
}
private File writeTsv(String project, String sample) throws IOException {
File tsv = BaseTest.createTempFile("pipeline", ".tsv");
FileUtils.writeLines(tsv, Collections.singletonList(project + "\t" + sample));
return tsv;
}
private File writeYaml(String projectName, String project, String sample) throws IOException {
File yaml = BaseTest.createTempFile("pipeline", ".yaml");
PipelineSample pipelineSample = new PipelineSample();
pipelineSample.getTags().put(PicardPipeline.PROJECT_TAG, project);
pipelineSample.getTags().put(PicardPipeline.SAMPLE_TAG, sample);
Pipeline pipeline = new Pipeline();
pipeline.getProject().setName(projectName);
pipeline.getSamples().add(pipelineSample);
YamlUtils.dump(pipeline, yaml);
return yaml;
}
}

View File

@ -1,49 +0,0 @@
package org.broadinstitute.sting.utils.broad;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.File;
public class ReferenceDataUnitTest {
@Test
public void testNames() {
Assert.assertEquals(ReferenceData.HG18.getName(), "hg18");
Assert.assertEquals(ReferenceData.HG19.getName(), "hg19");
}
@Test
public void testFilesExist() {
for (ReferenceData data: ReferenceData.values()) {
Assert.assertTrue(new File(data.getReference()).exists());
Assert.assertTrue(new File(data.getRefseq()).exists());
for (int version: data.getDbsnpVersions()) {
Assert.assertTrue(new File(data.getDbsnp(version)).exists());
}
}
}
@Test
public void testDbsnps() {
Assert.assertTrue(new File(ReferenceData.HG18.getDbsnp(129)).exists());
Assert.assertTrue(new File(ReferenceData.HG19.getDbsnp(129)).exists());
Assert.assertTrue(new File(ReferenceData.HG19.getDbsnp(132)).exists());
Assert.assertNull(ReferenceData.HG19.getDbsnp(130));
}
@Test
public void testDbsnpTypes() {
Assert.assertEquals(ReferenceData.HG18.getDbsnpType(129), "DBSNP");
Assert.assertEquals(ReferenceData.HG19.getDbsnpType(129), "VCF");
Assert.assertEquals(ReferenceData.HG19.getDbsnpType(132), "VCF");
Assert.assertNull(ReferenceData.HG19.getDbsnpType(130));
}
@Test
public void testGetByReference() {
Assert.assertEquals(ReferenceData.getByReference(BaseTest.hg18Reference), ReferenceData.HG18);
Assert.assertEquals(ReferenceData.getByReference(BaseTest.hg19Reference), ReferenceData.HG19);
Assert.assertEquals(ReferenceData.getByReference("none"), null);
}
}

View File

@ -33,7 +33,7 @@
<package name="org.broadinstitute.sting.queue.**" />
<!-- Pipeline + Utils -->
<package name="org.broadinstitute.sting.datasources.pipeline.**" />
<package name="org.broadinstitute.sting.pipeline.**" />
<package name="org.broadinstitute.sting.utils.**" />
<!-- Scala -->

View File

@ -32,8 +32,6 @@ import java.util.Date
import java.text.SimpleDateFormat
import org.broadinstitute.sting.BaseTest
import org.broadinstitute.sting.queue.QCommandLine
import org.broadinstitute.sting.datasources.pipeline.{Pipeline, PipelineProject, PipelineSample}
import org.broadinstitute.sting.utils.broad.PicardAggregationUtils
import org.broadinstitute.sting.queue.util.{Logging, ProcessController}
import java.io.{FileNotFoundException, File}
import org.broadinstitute.sting.gatk.report.GATKReportParser
@ -42,23 +40,6 @@ import org.broadinstitute.sting.queue.engine.CommandLinePluginManager
object PipelineTest extends BaseTest with Logging {
case class K1gBam(squidId: String, sampleId: String, version: Int)
/** 1000G BAMs used for validation */
val k1gBams = List(
new K1gBam("C474", "NA19651", 2),
new K1gBam("C474", "NA19655", 2),
new K1gBam("C474", "NA19669", 2),
new K1gBam("C454", "NA19834", 2),
new K1gBam("C460", "HG01440", 2),
new K1gBam("C456", "NA12342", 2),
new K1gBam("C456", "NA12748", 2),
new K1gBam("C474", "NA19649", 2),
new K1gBam("C474", "NA19652", 2),
new K1gBam("C474", "NA19654", 2))
validateK1gBams()
private val validationReportsDataLocation = "/humgen/gsa-hpprojects/GATK/validationreports/submitted/"
val run = System.getProperty("pipeline.run") == "run"
@ -92,49 +73,6 @@ object PipelineTest extends BaseTest with Logging {
*/
private def tempDir(testName: String, jobRunner: String) = testDir(testName, jobRunner) + "temp/"
/**
* Creates a new pipeline from a project.
* @param project Pipeline project info.
* @param samples List of samples.
* @return a new pipeline project.
*/
def createPipeline(project: PipelineProject, samples: List[PipelineSample]) = {
val pipeline = new Pipeline
pipeline.setProject(project)
pipeline.setSamples(samples)
pipeline
}
/**
* Creates a new pipeline project for hg19 with b37 132 dbsnp for genotyping, and b37 129 dbsnp for eval.
* @param projectName Name of the project.
* @param intervals The intervals file to use.
* @return a new pipeline project.
*/
def createHg19Project(projectName: String, intervals: String) = {
val project = new PipelineProject
project.setName(projectName)
project.setReferenceFile(new File(BaseTest.hg19Reference))
project.setGenotypeDbsnp(new File(BaseTest.b37dbSNP132))
project.setEvalDbsnp(new File(BaseTest.b37dbSNP129))
project.setRefseqTable(new File(BaseTest.hg19Refseq))
project.setIntervalList(new File(intervals))
project
}
/**
* Creates a 1000G pipeline sample from one of the bams.
* @param idPrefix Text to prepend to the sample name.
* @param k1gBam bam to create the sample for.
* @return the created pipeline sample.
*/
def createK1gSample(idPrefix: String, k1gBam: K1gBam) = {
val sample = new PipelineSample
sample.setId(idPrefix + "_" + k1gBam.sampleId)
sample.setBamFiles(Map("cleaned" -> getPicardBam(k1gBam)))
sample
}
/**
* Runs the pipelineTest.
* @param pipelineTest test to run.
@ -267,31 +205,6 @@ object PipelineTest extends BaseTest with Logging {
}
}
/**
* Throws an exception if any of the 1000G bams do not exist and warns if they are out of date.
*/
private def validateK1gBams() {
var missingBams = List.empty[File]
for (k1gBam <- k1gBams) {
val latest = getLatestVersion(k1gBam)
val bam = getPicardBam(k1gBam)
if (k1gBam.version != latest)
logger.warn("1000G bam is not the latest version %d: %s".format(latest, k1gBam))
if (!bam.exists)
missingBams :+= bam
}
if (missingBams.size > 0) {
val nl = "%n".format()
throw new FileNotFoundException("The following 1000G bam files are missing.%n%s".format(missingBams.mkString(nl)))
}
}
private def getPicardBam(k1gBam: K1gBam): File =
new File(PicardAggregationUtils.getSampleBam(k1gBam.squidId, k1gBam.sampleId, k1gBam.version))
private def getLatestVersion(k1gBam: K1gBam): Int =
PicardAggregationUtils.getLatestVersion(k1gBam.squidId, k1gBam.sampleId, k1gBam.version)
private var runningCommandLines = Set.empty[QCommandLine]
Runtime.getRuntime.addShutdownHook(new Thread {