Centralizing a bunch of Broad specific utility functions from code scattered in GSA-Firehose, PipelineTest, custom QScripts, etc.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5631 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2011-04-13 21:29:02 +00:00
parent 91d308fc6d
commit 4bb573b1f5
8 changed files with 733 additions and 0 deletions

View File

@ -0,0 +1,149 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.broad;
import org.apache.commons.io.filefilter.RegexFileFilter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.util.Arrays;
public class PicardAggregationUtils {
public static final String PICARD_AGGREGATION_DIR = "/seq/picard_aggregation/";
/**
* Returns the path to the sample BAM.
* @param project Project
* @param sample Sample
* @param version Version
* @return The path to the sample BAM.
*/
public static String getSampleBam(String project, String sample, int version) {
return getSampleDir(project, sample, version) + sample + ".bam";
}
/**
* Returns the path to the latest BAM.
* @param project Project
* @param sample Sample
* @return The path to the latest BAM.
* @throws FileNotFoundException If a finished directory cannot be found for a sample.
*/
public static String getSampleBam(String project, String sample) throws FileNotFoundException {
return getSampleDir(project, sample) + sample + ".bam";
}
/**
* Returns the sample directory.
* @param project Project
* @param sample Sample
* @param version Version
* @return the sample directory.
*/
public static String getSampleDir(String project, String sample, int version) {
return PICARD_AGGREGATION_DIR + String.format("%s/%s/v%d/", project, sample, version);
}
/**
* Returns the latest finished directory for this project sample.
* @param project Project
* @param sample Sample
* @return The path to the latest finished directory.
* @throws FileNotFoundException If a finished directory cannot be found for a sample.
*/
public static String getSampleDir(String project, String sample) throws FileNotFoundException {
int latestVersion = getLatestVersion(project, sample);
if (latestVersion == 0)
throw new FileNotFoundException("Unable to find a finished directory for project sample " + project + "/" + sample);
return getSampleDir(project, sample, latestVersion);
}
/**
* Returns the latest finished version directory.
* Because isilon metadata operations are relatively slow this method
* tries not to look for every possible versioned directory.
* @param project Project
* @param sample Sample
* @return The highest finished version directory or 0 if a finished directory was not found.
*/
public static int getLatestVersion(String project, String sample) {
return getLatestVersion(project, sample, 0);
}
/**
* Returns the latest finished version directory after startVersion.
* Because isilon metadata operations are relatively slow this method
* tries not to look for every possible versioned directory.
* @param project Project
* @param sample Sample
* @param startVersion minimum version to return
* @return The highest finished version directory after startVersion
*/
public static int getLatestVersion(String project, String sample, int startVersion) {
int version = Math.max(0, startVersion);
boolean nextExists = true;
while (nextExists) {
nextExists = false;
for (int next = 3; next > 0; next--)
if (isFinished(project, sample, version + next)) {
version += next;
nextExists = true;
break;
}
}
// Special case when the version is 0
// Because isilon storage takes a while to do meta data operations only look through every file if we have to.
if (version == 0) {
File sampleDir = new File(PICARD_AGGREGATION_DIR + project + "/" + sample);
if (sampleDir.exists()) {
FileFilter filter = new RegexFileFilter("v\\d+");
File[] files = sampleDir.listFiles(filter);
int[] versions = new int[files.length];
for (int i = 0; i < files.length; i++)
versions[i] = Integer.parseInt(files[i].getName().substring(1));
Arrays.sort(versions);
for (int i = versions.length - 1; i >= 0; i--) {
if (isFinished(project, sample, versions[i])) {
version = versions[i];
break;
}
}
}
}
return version == 0 ? startVersion : version;
}
/**
* Returns true if the project sample directory contains a finished.txt
* @param project Project
* @param sample Sample
* @param version Version
* @return true if the project sample directory contains a finished.txt
*/
public static boolean isFinished(String project, String sample, int version) {
return new File(getSampleDir(project, sample, version), "finished.txt").exists();
}
}

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.broad;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class PicardAnalysisFiles {
private static final String REFERENCE_SEQUENCE_HEADER = "REFERENCE_SEQUENCE";
private static final String TARGET_INTERVALS_HEADER = "TARGET_INTERVALS";
private static final String BAIT_INTERVALS_HEADER = "BAIT_INTERVALS";
private static final String[] ANALYSIS_HEADERS = {REFERENCE_SEQUENCE_HEADER, TARGET_INTERVALS_HEADER, BAIT_INTERVALS_HEADER};
private static final String ANALYSIS_FILES = "analysis_files.txt";
private String path;
private Map<String,Set<String>> headerValues = new HashMap<String,Set<String>>();
public PicardAnalysisFiles(String project, String sample) throws FileNotFoundException {
this(PicardAggregationUtils.getSampleDir(project, sample) + ANALYSIS_FILES);
}
public PicardAnalysisFiles(String project, String sample, int version) throws FileNotFoundException {
this(PicardAggregationUtils.getSampleDir(project, sample, version) + ANALYSIS_FILES);
}
public PicardAnalysisFiles(String path) throws FileNotFoundException {
this.path = path;
HashMap<String,Integer> headerIndexes = null;
for (String line: new XReadLines(new File(path))) {
String[] values = line.split("\t");
if (headerIndexes == null) {
headerIndexes = new HashMap<String,Integer>();
for (String header: ANALYSIS_HEADERS) {
headerIndexes.put(header, ArrayUtils.indexOf(values, header));
headerValues.put(header, new HashSet<String>());
}
} else {
for (String header: ANALYSIS_HEADERS) {
String value = values[headerIndexes.get(header)];
headerValues.get(header).add(value);
}
}
}
}
public String getPath() {
return path;
}
public String getReferenceSequence() {
return getSingle(REFERENCE_SEQUENCE_HEADER);
}
public String getTargetIntervals() {
return getSingle(TARGET_INTERVALS_HEADER);
}
public String getBaitIntervals() {
return getSingle(BAIT_INTERVALS_HEADER);
}
private String getSingle(String header) {
Set<String> values = headerValues.get(header);
if (values.size() > 1) {
throw new UnsupportedOperationException(path + " contains more than one value for " + header + ": " + values);
} else if (values.size() == 0) {
return null;
} else {
String value = values.iterator().next();
return "null".equals(value) ? null : value;
}
}
}

View File

@ -0,0 +1,123 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.broad;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.NullArgumentException;
import org.broadinstitute.sting.datasources.pipeline.Pipeline;
import org.broadinstitute.sting.datasources.pipeline.PipelineProject;
import org.broadinstitute.sting.datasources.pipeline.PipelineSample;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.yaml.YamlUtils;
import java.io.File;
import java.io.FileNotFoundException;
/**
* Automatically gets the latest version using PicardAggregationUtils.
*/
public class PicardPipeline {
protected static final String PROJECT_TAG = "SQUIDProject";
protected static final String SAMPLE_TAG = "CollaboratorID";
protected static final String PICARD_BAM_TYPE = "cleaned";
private PicardPipeline() {}
/**
* Creates a new PicardPipeline
* @param path Path to a tsv with project [tab] sample on each line or a pipeline yaml.
* @return a new Picard
* @throws FileNotFoundException when unable to find the file or any supporting files.
*/
public static Pipeline parse(File path) throws FileNotFoundException {
if (path == null)
throw new NullArgumentException("path");
Pipeline pipeline;
if (path.getName().endsWith(".tsv")) {
pipeline = new Pipeline();
pipeline.getProject().setName(FilenameUtils.getBaseName(path.getPath()));
for (String line: new XReadLines(path)) {
String[] projectSample = line.split("\t");
addSample(pipeline, projectSample[0], projectSample[1]);
}
} else if (path.getName().endsWith(".yaml")) {
pipeline = YamlUtils.load(Pipeline.class, path);
} else {
throw new UserException.BadInput("Path does not end with .tsv or .yaml: " + path.getPath());
}
update(pipeline);
return pipeline;
}
private static void update(Pipeline pipeline) throws FileNotFoundException {
for (PipelineSample sample: pipeline.getSamples())
updateSample(pipeline.getProject(), sample);
}
private static void addSample(Pipeline pipeline, String project, String sample) {
PipelineSample pipelineSample = new PipelineSample();
pipelineSample.getTags().put(PROJECT_TAG, project);
pipelineSample.getTags().put(SAMPLE_TAG, sample);
pipeline.getSamples().add(pipelineSample);
}
private static void updateSample(PipelineProject pipelineProject, PipelineSample pipelineSample) throws FileNotFoundException {
if (!pipelineSample.getTags().containsKey(PROJECT_TAG) && !pipelineSample.getTags().containsKey(SAMPLE_TAG))
return;
String project = pipelineSample.getTags().get(PROJECT_TAG);
String sample = pipelineSample.getTags().get(SAMPLE_TAG);
int version = PicardAggregationUtils.getLatestVersion(project, sample);
if (version <= 0)
throw new UserException.BadInput("Project sample not found: " + project + "/" + sample);
String bam = PicardAggregationUtils.getSampleBam(project, sample, version);
if (pipelineSample.getId() == null)
pipelineSample.setId(project + "_" + sample);
pipelineSample.getBamFiles().put(PICARD_BAM_TYPE, new File(bam));
PicardAnalysisFiles analysis = new PicardAnalysisFiles(project, sample, version);
if (pipelineProject.getReferenceFile() == null) {
String referenceSequence = analysis.getReferenceSequence();
ReferenceData referenceData = ReferenceData.getByReference(referenceSequence);
pipelineProject.setReferenceFile(new File(referenceData.getReference()));
pipelineProject.setRefseqTable(new File(referenceData.getRefseq()));
if (analysis.getTargetIntervals() != null)
pipelineProject.setIntervalList(new File(analysis.getTargetIntervals()));
pipelineProject.setEvalDbsnp(new File(referenceData.getDbsnp(129)));
if (referenceData.getDbsnpVersions().contains(132)) {
pipelineProject.setGenotypeDbsnp(new File(referenceData.getDbsnp(132)));
} else {
pipelineProject.setGenotypeDbsnp(new File(referenceData.getDbsnp(129)));
}
} else {
String referenceSequence = analysis.getReferenceSequence();
if (!pipelineProject.getReferenceFile().getPath().equals(referenceSequence))
throw new UserException.BadInput("Samples sequenced with different references");
}
}
}

View File

@ -0,0 +1,136 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.broad;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
/**
* Tracks data related to reference files at the Broad.
*/
public enum ReferenceData {
/**
* HG18 reference data
*/
HG18("hg18"),
/**
* HG19 reference data
*/
HG19("hg19");
private static final String REFSEQ_DIR = "/humgen/gsa-hpprojects/GATK/data/Annotations/refseq/";
private static final String DBSNP_DIR = "/humgen/gsa-hpprojects/GATK/data/";
private final String name;
private final String reference;
private final String refseq;
private final Map<Integer,String> dbsnps;
ReferenceData(String name) {
this.name = name;
Map<Integer,String> dbsnps = new TreeMap<Integer,String>();
if ("hg18".equals(name)) {
this.reference = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
this.refseq = REFSEQ_DIR + "refGene-big-table-hg18.txt";
dbsnps.put(129, DBSNP_DIR + "dbsnp_129_hg18.rod");
} else if ("hg19".equals(name)) {
this.reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta";
this.refseq = REFSEQ_DIR + "refGene-big-table-hg19.txt";
dbsnps.put(129, DBSNP_DIR + "dbsnp_129_b37.vcf");
dbsnps.put(132, DBSNP_DIR + "dbsnp_132_b37.vcf");
} else
throw new UnsupportedOperationException("Unknown reference: " + name);
this.dbsnps = Collections.unmodifiableMap(dbsnps);
}
/**
* Returns the name of the reference.
* @return the name of the reference.
*/
public String getName() {
return name;
}
/**
* Returns the path to the fasta.
* @return the path to the fasta.
*/
public String getReference() {
return reference;
}
/**
* Returns the path to the refseq table.
* @return the path to the refseq table.
*/
public String getRefseq() {
return refseq;
}
/**
* Returns the dbsnp versions available.
* @return the dbsnp versions available.
*/
public Set<Integer> getDbsnpVersions() {
return dbsnps.keySet();
}
/**
* Returns the dbsnp path for the version.
* @param version version from getDbsnpVersions()
* @return the dbsnp path for the version.
*/
public String getDbsnp(int version) {
return dbsnps.get(version);
}
/**
* Returns the dbsnp type for the version, "VCF" or "ROD".
* @param version version from getDbsnpVersions()
* @return the dbsnp type for the version, "VCF" or "ROD".
*/
public String getDbsnpType(int version) {
String dbsnp = getDbsnp(version);
if (dbsnp == null)
return null;
int len = dbsnp.length();
return dbsnp.substring(len - 3, len).toUpperCase();
}
/**
* Returns the reference data based on the path or null.
* @param reference path to the reference
* @return the reference data based on the path or null.
*/
public static ReferenceData getByReference(String reference) {
for (ReferenceData data: ReferenceData.values())
if (data.reference.equals(reference))
return data;
return null;
}
}

View File

@ -0,0 +1,61 @@
package org.broadinstitute.sting.utils.broad;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.FileNotFoundException;
public class PicardAggregationUtilsUnitTest {
public static final String PROJECT = "C474";
public static final String SAMPLE = "NA19651";
public static final String MISSING_PROJECT = "C0";
public static final String MISSING_SAMPLE = "0";
private int latestVersion = -1;
@Test
public void testGetLatestVersion() {
latestVersion = PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE);
System.out.println(String.format("Latest version for %s %s is %d", PROJECT, SAMPLE, latestVersion));
Assert.assertTrue(latestVersion > 0);
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE, latestVersion), latestVersion);
}
@Test(dependsOnMethods = "testGetLatestVersion")
public void testGetSampleBam() throws Exception {
String test = PicardAggregationUtils.getSampleBam(PROJECT, SAMPLE);
String latest = PicardAggregationUtils.getSampleBam(PROJECT, SAMPLE, latestVersion);
Assert.assertEquals(test, latest);
}
@Test(dependsOnMethods = "testGetLatestVersion")
public void testGetSampleDir() throws Exception {
String test = PicardAggregationUtils.getSampleDir(PROJECT, SAMPLE);
String latest = PicardAggregationUtils.getSampleDir(PROJECT, SAMPLE, latestVersion);
Assert.assertEquals(test, latest);
}
@Test(dependsOnMethods = "testGetLatestVersion")
public void testIsFinished() {
Assert.assertTrue(PicardAggregationUtils.isFinished(PROJECT, SAMPLE, latestVersion));
Assert.assertFalse(PicardAggregationUtils.isFinished(PROJECT, SAMPLE, latestVersion + 1));
}
@Test(expectedExceptions = FileNotFoundException.class)
public void testMissingSampleBam() throws Exception {
PicardAggregationUtils.getSampleBam(MISSING_PROJECT, MISSING_SAMPLE);
}
@Test(expectedExceptions = FileNotFoundException.class)
public void testMissingSampleDir() throws Exception {
PicardAggregationUtils.getSampleDir(MISSING_PROJECT, MISSING_SAMPLE);
}
@Test
public void testLatestVersionMissing() {
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE), 0);
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, -1), -1);
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, 0), 0);
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, 1), 1);
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, 2), 2);
}
}

View File

@ -0,0 +1,48 @@
package org.broadinstitute.sting.utils.broad;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.FileNotFoundException;
import static org.broadinstitute.sting.utils.broad.PicardAggregationUtilsUnitTest.*;
public class PicardAnalysisFilesUnitTest extends BaseTest {
@Test
public void testParseLatest() throws Exception {
PicardAnalysisFiles files = new PicardAnalysisFiles(PROJECT, SAMPLE);
Assert.assertNotNull(files.getPath());
files = new PicardAnalysisFiles(PROJECT, SAMPLE, PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE));
Assert.assertNotNull(files.getPath());
}
@Test
public void testParseValid() throws Exception {
PicardAnalysisFiles file = new PicardAnalysisFiles(BaseTest.validationDataLocation + "picard_analysis_file.txt");
Assert.assertEquals(file.getReferenceSequence(), "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta");
Assert.assertEquals(file.getTargetIntervals(), "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list");
Assert.assertEquals(file.getBaitIntervals(), "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.baits.interval_list");
}
@Test(expectedExceptions = FileNotFoundException.class)
public void testParseBadPath() throws Exception {
new PicardAnalysisFiles(BaseTest.validationDataLocation + "non_existent_picard_analysis_file.txt");
}
@Test(expectedExceptions = FileNotFoundException.class)
public void testParseMissingLatest() throws Exception {
new PicardAnalysisFiles(MISSING_PROJECT, MISSING_SAMPLE);
}
@Test(expectedExceptions = FileNotFoundException.class)
public void testParseMissingVersion() throws Exception {
new PicardAnalysisFiles(PROJECT, SAMPLE, PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE) + 2);
}
@Test(expectedExceptions = UnsupportedOperationException.class)
public void testParseMultipleReferences() throws Exception {
PicardAnalysisFiles file = new PicardAnalysisFiles(BaseTest.validationDataLocation + "picard_analysis_file_with_different_references.txt");
file.getReferenceSequence();
}
}

View File

@ -0,0 +1,65 @@
package org.broadinstitute.sting.utils.broad;
import junit.framework.Assert;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.datasources.pipeline.Pipeline;
import org.broadinstitute.sting.datasources.pipeline.PipelineSample;
import org.broadinstitute.sting.utils.yaml.YamlUtils;
import org.testng.annotations.Test;
import static org.broadinstitute.sting.utils.broad.PicardAggregationUtilsUnitTest.*;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
public class PicardPipelineUnitTest {
@Test
public void testParseTsv() throws IOException {
File tsv = writeTsv(PROJECT, SAMPLE);
Pipeline pipeline = PicardPipeline.parse(tsv);
validatePipeline(pipeline, FilenameUtils.getBaseName(tsv.getPath()));
}
@Test
public void testParseYaml() throws IOException {
File yaml = writeYaml("project_name", PROJECT, SAMPLE);
Pipeline pipeline = PicardPipeline.parse(yaml);
validatePipeline(pipeline, "project_name");
}
private void validatePipeline(Pipeline pipeline, String name) {
Assert.assertEquals(pipeline.getProject().getName(), name);
Assert.assertTrue("reference not found", pipeline.getProject().getReferenceFile().exists());
Assert.assertTrue("intervals not found", pipeline.getProject().getIntervalList().exists());
Assert.assertTrue("refseq not found", pipeline.getProject().getRefseqTable().exists());
Assert.assertTrue("genotype dbsnp not found", pipeline.getProject().getGenotypeDbsnp().exists());
Assert.assertTrue("eval dbsnp not found", pipeline.getProject().getEvalDbsnp().exists());
Assert.assertEquals(pipeline.getSamples().size(), 1);
for (PipelineSample sample: pipeline.getSamples()) {
Assert.assertEquals(sample.getId(), PROJECT + "_" + SAMPLE);
Assert.assertTrue("bam not found", sample.getBamFiles().get(PicardPipeline.PICARD_BAM_TYPE).exists());
Assert.assertEquals(sample.getTags().get(PicardPipeline.PROJECT_TAG), PROJECT);
Assert.assertEquals(sample.getTags().get(PicardPipeline.SAMPLE_TAG), SAMPLE);
}
}
private File writeTsv(String project, String sample) throws IOException {
File tsv = BaseTest.createTempFile("pipeline", ".tsv");
FileUtils.writeLines(tsv, Collections.singletonList(project + "\t" + sample));
return tsv;
}
private File writeYaml(String projectName, String project, String sample) throws IOException {
File yaml = BaseTest.createTempFile("pipeline", ".yaml");
PipelineSample pipelineSample = new PipelineSample();
pipelineSample.getTags().put(PicardPipeline.PROJECT_TAG, project);
pipelineSample.getTags().put(PicardPipeline.SAMPLE_TAG, sample);
Pipeline pipeline = new Pipeline();
pipeline.getProject().setName(projectName);
pipeline.getSamples().add(pipelineSample);
YamlUtils.dump(pipeline, yaml);
return yaml;
}
}

View File

@ -0,0 +1,49 @@
package org.broadinstitute.sting.utils.broad;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.File;
public class ReferenceDataUnitTest {
@Test
public void testNames() {
Assert.assertEquals(ReferenceData.HG18.getName(), "hg18");
Assert.assertEquals(ReferenceData.HG19.getName(), "hg19");
}
@Test
public void testFilesExist() {
for (ReferenceData data: ReferenceData.values()) {
Assert.assertTrue(new File(data.getReference()).exists());
Assert.assertTrue(new File(data.getRefseq()).exists());
for (int version: data.getDbsnpVersions()) {
Assert.assertTrue(new File(data.getDbsnp(version)).exists());
}
}
}
@Test
public void testDbsnps() {
Assert.assertTrue(new File(ReferenceData.HG18.getDbsnp(129)).exists());
Assert.assertTrue(new File(ReferenceData.HG19.getDbsnp(129)).exists());
Assert.assertTrue(new File(ReferenceData.HG19.getDbsnp(132)).exists());
Assert.assertNull(ReferenceData.HG19.getDbsnp(130));
}
@Test
public void testDbsnpTypes() {
Assert.assertEquals(ReferenceData.HG18.getDbsnpType(129), "ROD");
Assert.assertEquals(ReferenceData.HG19.getDbsnpType(129), "VCF");
Assert.assertEquals(ReferenceData.HG19.getDbsnpType(132), "VCF");
Assert.assertNull(ReferenceData.HG19.getDbsnpType(130));
}
@Test
public void testGetByReference() {
Assert.assertEquals(ReferenceData.getByReference(BaseTest.hg18Reference), ReferenceData.HG18);
Assert.assertEquals(ReferenceData.getByReference(BaseTest.hg19Reference), ReferenceData.HG19);
Assert.assertEquals(ReferenceData.getByReference("none"), null);
}
}