diff --git a/java/src/org/broadinstitute/sting/utils/broad/PicardAggregationUtils.java b/java/src/org/broadinstitute/sting/utils/broad/PicardAggregationUtils.java new file mode 100755 index 000000000..8f3ffd741 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/broad/PicardAggregationUtils.java @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.broad; + +import org.apache.commons.io.filefilter.RegexFileFilter; + +import java.io.File; +import java.io.FileFilter; +import java.io.FileNotFoundException; +import java.util.Arrays; + +public class PicardAggregationUtils { + public static final String PICARD_AGGREGATION_DIR = "/seq/picard_aggregation/"; + + /** + * Returns the path to the sample BAM. + * @param project Project + * @param sample Sample + * @param version Version + * @return The path to the sample BAM. + */ + public static String getSampleBam(String project, String sample, int version) { + return getSampleDir(project, sample, version) + sample + ".bam"; + } + + /** + * Returns the path to the latest BAM. + * @param project Project + * @param sample Sample + * @return The path to the latest BAM. + * @throws FileNotFoundException If a finished directory cannot be found for a sample. + */ + public static String getSampleBam(String project, String sample) throws FileNotFoundException { + return getSampleDir(project, sample) + sample + ".bam"; + } + + /** + * Returns the sample directory. + * @param project Project + * @param sample Sample + * @param version Version + * @return the sample directory. + */ + public static String getSampleDir(String project, String sample, int version) { + return PICARD_AGGREGATION_DIR + String.format("%s/%s/v%d/", project, sample, version); + } + + /** + * Returns the latest finished directory for this project sample. + * @param project Project + * @param sample Sample + * @return The path to the latest finished directory. + * @throws FileNotFoundException If a finished directory cannot be found for a sample. + */ + public static String getSampleDir(String project, String sample) throws FileNotFoundException { + int latestVersion = getLatestVersion(project, sample); + if (latestVersion == 0) + throw new FileNotFoundException("Unable to find a finished directory for project sample " + project + "/" + sample); + return getSampleDir(project, sample, latestVersion); + } + + /** + * Returns the latest finished version directory. + * Because isilon metadata operations are relatively slow this method + * tries not to look for every possible versioned directory. + * @param project Project + * @param sample Sample + * @return The highest finished version directory or 0 if a finished directory was not found. + */ + public static int getLatestVersion(String project, String sample) { + return getLatestVersion(project, sample, 0); + } + + /** + * Returns the latest finished version directory after startVersion. + * Because isilon metadata operations are relatively slow this method + * tries not to look for every possible versioned directory. + * @param project Project + * @param sample Sample + * @param startVersion minimum version to return + * @return The highest finished version directory after startVersion + */ + public static int getLatestVersion(String project, String sample, int startVersion) { + int version = Math.max(0, startVersion); + boolean nextExists = true; + while (nextExists) { + nextExists = false; + for (int next = 3; next > 0; next--) + if (isFinished(project, sample, version + next)) { + version += next; + nextExists = true; + break; + } + } + // Special case when the version is 0 + // Because isilon storage takes a while to do meta data operations only look through every file if we have to. + if (version == 0) { + File sampleDir = new File(PICARD_AGGREGATION_DIR + project + "/" + sample); + if (sampleDir.exists()) { + FileFilter filter = new RegexFileFilter("v\\d+"); + File[] files = sampleDir.listFiles(filter); + int[] versions = new int[files.length]; + for (int i = 0; i < files.length; i++) + versions[i] = Integer.parseInt(files[i].getName().substring(1)); + Arrays.sort(versions); + for (int i = versions.length - 1; i >= 0; i--) { + if (isFinished(project, sample, versions[i])) { + version = versions[i]; + break; + } + } + } + } + return version == 0 ? startVersion : version; + } + + /** + * Returns true if the project sample directory contains a finished.txt + * @param project Project + * @param sample Sample + * @param version Version + * @return true if the project sample directory contains a finished.txt + */ + public static boolean isFinished(String project, String sample, int version) { + return new File(getSampleDir(project, sample, version), "finished.txt").exists(); + } +} diff --git a/java/src/org/broadinstitute/sting/utils/broad/PicardAnalysisFiles.java b/java/src/org/broadinstitute/sting/utils/broad/PicardAnalysisFiles.java new file mode 100755 index 000000000..ef01e7e63 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/broad/PicardAnalysisFiles.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.broad; + +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.utils.text.XReadLines; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +public class PicardAnalysisFiles { + private static final String REFERENCE_SEQUENCE_HEADER = "REFERENCE_SEQUENCE"; + private static final String TARGET_INTERVALS_HEADER = "TARGET_INTERVALS"; + private static final String BAIT_INTERVALS_HEADER = "BAIT_INTERVALS"; + private static final String[] ANALYSIS_HEADERS = {REFERENCE_SEQUENCE_HEADER, TARGET_INTERVALS_HEADER, BAIT_INTERVALS_HEADER}; + private static final String ANALYSIS_FILES = "analysis_files.txt"; + + private String path; + private Map> headerValues = new HashMap>(); + + public PicardAnalysisFiles(String project, String sample) throws FileNotFoundException { + this(PicardAggregationUtils.getSampleDir(project, sample) + ANALYSIS_FILES); + } + + public PicardAnalysisFiles(String project, String sample, int version) throws FileNotFoundException { + this(PicardAggregationUtils.getSampleDir(project, sample, version) + ANALYSIS_FILES); + } + + public PicardAnalysisFiles(String path) throws FileNotFoundException { + this.path = path; + HashMap headerIndexes = null; + for (String line: new XReadLines(new File(path))) { + String[] values = line.split("\t"); + if (headerIndexes == null) { + headerIndexes = new HashMap(); + for (String header: ANALYSIS_HEADERS) { + headerIndexes.put(header, ArrayUtils.indexOf(values, header)); + headerValues.put(header, new HashSet()); + } + } else { + for (String header: ANALYSIS_HEADERS) { + String value = values[headerIndexes.get(header)]; + headerValues.get(header).add(value); + } + } + } + } + + public String getPath() { + return path; + } + + public String getReferenceSequence() { + return getSingle(REFERENCE_SEQUENCE_HEADER); + } + + public String getTargetIntervals() { + return getSingle(TARGET_INTERVALS_HEADER); + } + + public String getBaitIntervals() { + return getSingle(BAIT_INTERVALS_HEADER); + } + + private String getSingle(String header) { + Set values = headerValues.get(header); + if (values.size() > 1) { + throw new UnsupportedOperationException(path + " contains more than one value for " + header + ": " + values); + } else if (values.size() == 0) { + return null; + } else { + String value = values.iterator().next(); + return "null".equals(value) ? null : value; + } + } +} diff --git a/java/src/org/broadinstitute/sting/utils/broad/PicardPipeline.java b/java/src/org/broadinstitute/sting/utils/broad/PicardPipeline.java new file mode 100755 index 000000000..96bbb2455 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/broad/PicardPipeline.java @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.broad; + +import org.apache.commons.io.FilenameUtils; +import org.apache.commons.lang.NullArgumentException; +import org.broadinstitute.sting.datasources.pipeline.Pipeline; +import org.broadinstitute.sting.datasources.pipeline.PipelineProject; +import org.broadinstitute.sting.datasources.pipeline.PipelineSample; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.text.XReadLines; +import org.broadinstitute.sting.utils.yaml.YamlUtils; + +import java.io.File; +import java.io.FileNotFoundException; + +/** + * Automatically gets the latest version using PicardAggregationUtils. + */ +public class PicardPipeline { + + protected static final String PROJECT_TAG = "SQUIDProject"; + protected static final String SAMPLE_TAG = "CollaboratorID"; + protected static final String PICARD_BAM_TYPE = "cleaned"; + + private PicardPipeline() {} + + /** + * Creates a new PicardPipeline + * @param path Path to a tsv with project [tab] sample on each line or a pipeline yaml. + * @return a new Picard + * @throws FileNotFoundException when unable to find the file or any supporting files. + */ + public static Pipeline parse(File path) throws FileNotFoundException { + if (path == null) + throw new NullArgumentException("path"); + + Pipeline pipeline; + if (path.getName().endsWith(".tsv")) { + pipeline = new Pipeline(); + pipeline.getProject().setName(FilenameUtils.getBaseName(path.getPath())); + for (String line: new XReadLines(path)) { + String[] projectSample = line.split("\t"); + addSample(pipeline, projectSample[0], projectSample[1]); + } + } else if (path.getName().endsWith(".yaml")) { + pipeline = YamlUtils.load(Pipeline.class, path); + } else { + throw new UserException.BadInput("Path does not end with .tsv or .yaml: " + path.getPath()); + } + + update(pipeline); + return pipeline; + } + + private static void update(Pipeline pipeline) throws FileNotFoundException { + for (PipelineSample sample: pipeline.getSamples()) + updateSample(pipeline.getProject(), sample); + } + + private static void addSample(Pipeline pipeline, String project, String sample) { + PipelineSample pipelineSample = new PipelineSample(); + pipelineSample.getTags().put(PROJECT_TAG, project); + pipelineSample.getTags().put(SAMPLE_TAG, sample); + pipeline.getSamples().add(pipelineSample); + } + + private static void updateSample(PipelineProject pipelineProject, PipelineSample pipelineSample) throws FileNotFoundException { + if (!pipelineSample.getTags().containsKey(PROJECT_TAG) && !pipelineSample.getTags().containsKey(SAMPLE_TAG)) + return; + String project = pipelineSample.getTags().get(PROJECT_TAG); + String sample = pipelineSample.getTags().get(SAMPLE_TAG); + int version = PicardAggregationUtils.getLatestVersion(project, sample); + if (version <= 0) + throw new UserException.BadInput("Project sample not found: " + project + "/" + sample); + String bam = PicardAggregationUtils.getSampleBam(project, sample, version); + if (pipelineSample.getId() == null) + pipelineSample.setId(project + "_" + sample); + pipelineSample.getBamFiles().put(PICARD_BAM_TYPE, new File(bam)); + + PicardAnalysisFiles analysis = new PicardAnalysisFiles(project, sample, version); + if (pipelineProject.getReferenceFile() == null) { + String referenceSequence = analysis.getReferenceSequence(); + ReferenceData referenceData = ReferenceData.getByReference(referenceSequence); + pipelineProject.setReferenceFile(new File(referenceData.getReference())); + pipelineProject.setRefseqTable(new File(referenceData.getRefseq())); + if (analysis.getTargetIntervals() != null) + pipelineProject.setIntervalList(new File(analysis.getTargetIntervals())); + pipelineProject.setEvalDbsnp(new File(referenceData.getDbsnp(129))); + if (referenceData.getDbsnpVersions().contains(132)) { + pipelineProject.setGenotypeDbsnp(new File(referenceData.getDbsnp(132))); + } else { + pipelineProject.setGenotypeDbsnp(new File(referenceData.getDbsnp(129))); + } + } else { + String referenceSequence = analysis.getReferenceSequence(); + if (!pipelineProject.getReferenceFile().getPath().equals(referenceSequence)) + throw new UserException.BadInput("Samples sequenced with different references"); + } + } +} diff --git a/java/src/org/broadinstitute/sting/utils/broad/ReferenceData.java b/java/src/org/broadinstitute/sting/utils/broad/ReferenceData.java new file mode 100755 index 000000000..4c908f430 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/broad/ReferenceData.java @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.broad; + +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; + +/** + * Tracks data related to reference files at the Broad. + */ +public enum ReferenceData { + /** + * HG18 reference data + */ + HG18("hg18"), + + /** + * HG19 reference data + */ + HG19("hg19"); + + private static final String REFSEQ_DIR = "/humgen/gsa-hpprojects/GATK/data/Annotations/refseq/"; + private static final String DBSNP_DIR = "/humgen/gsa-hpprojects/GATK/data/"; + + private final String name; + private final String reference; + private final String refseq; + private final Map dbsnps; + + ReferenceData(String name) { + this.name = name; + Map dbsnps = new TreeMap(); + if ("hg18".equals(name)) { + this.reference = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"; + this.refseq = REFSEQ_DIR + "refGene-big-table-hg18.txt"; + dbsnps.put(129, DBSNP_DIR + "dbsnp_129_hg18.rod"); + } else if ("hg19".equals(name)) { + this.reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta"; + this.refseq = REFSEQ_DIR + "refGene-big-table-hg19.txt"; + dbsnps.put(129, DBSNP_DIR + "dbsnp_129_b37.vcf"); + dbsnps.put(132, DBSNP_DIR + "dbsnp_132_b37.vcf"); + } else + throw new UnsupportedOperationException("Unknown reference: " + name); + this.dbsnps = Collections.unmodifiableMap(dbsnps); + } + + /** + * Returns the name of the reference. + * @return the name of the reference. + */ + public String getName() { + return name; + } + + /** + * Returns the path to the fasta. + * @return the path to the fasta. + */ + public String getReference() { + return reference; + } + + /** + * Returns the path to the refseq table. + * @return the path to the refseq table. + */ + public String getRefseq() { + return refseq; + } + + /** + * Returns the dbsnp versions available. + * @return the dbsnp versions available. + */ + public Set getDbsnpVersions() { + return dbsnps.keySet(); + } + + /** + * Returns the dbsnp path for the version. + * @param version version from getDbsnpVersions() + * @return the dbsnp path for the version. + */ + public String getDbsnp(int version) { + return dbsnps.get(version); + } + + /** + * Returns the dbsnp type for the version, "VCF" or "ROD". + * @param version version from getDbsnpVersions() + * @return the dbsnp type for the version, "VCF" or "ROD". + */ + public String getDbsnpType(int version) { + String dbsnp = getDbsnp(version); + if (dbsnp == null) + return null; + int len = dbsnp.length(); + return dbsnp.substring(len - 3, len).toUpperCase(); + } + + /** + * Returns the reference data based on the path or null. + * @param reference path to the reference + * @return the reference data based on the path or null. + */ + public static ReferenceData getByReference(String reference) { + for (ReferenceData data: ReferenceData.values()) + if (data.reference.equals(reference)) + return data; + return null; + } +} diff --git a/java/test/org/broadinstitute/sting/utils/broad/PicardAggregationUtilsUnitTest.java b/java/test/org/broadinstitute/sting/utils/broad/PicardAggregationUtilsUnitTest.java new file mode 100755 index 000000000..77a8a1ac8 --- /dev/null +++ b/java/test/org/broadinstitute/sting/utils/broad/PicardAggregationUtilsUnitTest.java @@ -0,0 +1,61 @@ +package org.broadinstitute.sting.utils.broad; + +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.FileNotFoundException; + +public class PicardAggregationUtilsUnitTest { + public static final String PROJECT = "C474"; + public static final String SAMPLE = "NA19651"; + public static final String MISSING_PROJECT = "C0"; + public static final String MISSING_SAMPLE = "0"; + private int latestVersion = -1; + + @Test + public void testGetLatestVersion() { + latestVersion = PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE); + System.out.println(String.format("Latest version for %s %s is %d", PROJECT, SAMPLE, latestVersion)); + Assert.assertTrue(latestVersion > 0); + Assert.assertEquals(PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE, latestVersion), latestVersion); + } + + @Test(dependsOnMethods = "testGetLatestVersion") + public void testGetSampleBam() throws Exception { + String test = PicardAggregationUtils.getSampleBam(PROJECT, SAMPLE); + String latest = PicardAggregationUtils.getSampleBam(PROJECT, SAMPLE, latestVersion); + Assert.assertEquals(test, latest); + } + + @Test(dependsOnMethods = "testGetLatestVersion") + public void testGetSampleDir() throws Exception { + String test = PicardAggregationUtils.getSampleDir(PROJECT, SAMPLE); + String latest = PicardAggregationUtils.getSampleDir(PROJECT, SAMPLE, latestVersion); + Assert.assertEquals(test, latest); + } + + @Test(dependsOnMethods = "testGetLatestVersion") + public void testIsFinished() { + Assert.assertTrue(PicardAggregationUtils.isFinished(PROJECT, SAMPLE, latestVersion)); + Assert.assertFalse(PicardAggregationUtils.isFinished(PROJECT, SAMPLE, latestVersion + 1)); + } + + @Test(expectedExceptions = FileNotFoundException.class) + public void testMissingSampleBam() throws Exception { + PicardAggregationUtils.getSampleBam(MISSING_PROJECT, MISSING_SAMPLE); + } + + @Test(expectedExceptions = FileNotFoundException.class) + public void testMissingSampleDir() throws Exception { + PicardAggregationUtils.getSampleDir(MISSING_PROJECT, MISSING_SAMPLE); + } + + @Test + public void testLatestVersionMissing() { + Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE), 0); + Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, -1), -1); + Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, 0), 0); + Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, 1), 1); + Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, 2), 2); + } +} diff --git a/java/test/org/broadinstitute/sting/utils/broad/PicardAnalysisFilesUnitTest.java b/java/test/org/broadinstitute/sting/utils/broad/PicardAnalysisFilesUnitTest.java new file mode 100755 index 000000000..f1e44000c --- /dev/null +++ b/java/test/org/broadinstitute/sting/utils/broad/PicardAnalysisFilesUnitTest.java @@ -0,0 +1,48 @@ +package org.broadinstitute.sting.utils.broad; + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.FileNotFoundException; + +import static org.broadinstitute.sting.utils.broad.PicardAggregationUtilsUnitTest.*; + +public class PicardAnalysisFilesUnitTest extends BaseTest { + @Test + public void testParseLatest() throws Exception { + PicardAnalysisFiles files = new PicardAnalysisFiles(PROJECT, SAMPLE); + Assert.assertNotNull(files.getPath()); + files = new PicardAnalysisFiles(PROJECT, SAMPLE, PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE)); + Assert.assertNotNull(files.getPath()); + } + + @Test + public void testParseValid() throws Exception { + PicardAnalysisFiles file = new PicardAnalysisFiles(BaseTest.validationDataLocation + "picard_analysis_file.txt"); + Assert.assertEquals(file.getReferenceSequence(), "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta"); + Assert.assertEquals(file.getTargetIntervals(), "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list"); + Assert.assertEquals(file.getBaitIntervals(), "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.baits.interval_list"); + } + + @Test(expectedExceptions = FileNotFoundException.class) + public void testParseBadPath() throws Exception { + new PicardAnalysisFiles(BaseTest.validationDataLocation + "non_existent_picard_analysis_file.txt"); + } + + @Test(expectedExceptions = FileNotFoundException.class) + public void testParseMissingLatest() throws Exception { + new PicardAnalysisFiles(MISSING_PROJECT, MISSING_SAMPLE); + } + + @Test(expectedExceptions = FileNotFoundException.class) + public void testParseMissingVersion() throws Exception { + new PicardAnalysisFiles(PROJECT, SAMPLE, PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE) + 2); + } + + @Test(expectedExceptions = UnsupportedOperationException.class) + public void testParseMultipleReferences() throws Exception { + PicardAnalysisFiles file = new PicardAnalysisFiles(BaseTest.validationDataLocation + "picard_analysis_file_with_different_references.txt"); + file.getReferenceSequence(); + } +} diff --git a/java/test/org/broadinstitute/sting/utils/broad/PicardPipelineUnitTest.java b/java/test/org/broadinstitute/sting/utils/broad/PicardPipelineUnitTest.java new file mode 100755 index 000000000..a17074b01 --- /dev/null +++ b/java/test/org/broadinstitute/sting/utils/broad/PicardPipelineUnitTest.java @@ -0,0 +1,65 @@ +package org.broadinstitute.sting.utils.broad; + +import junit.framework.Assert; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.FilenameUtils; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.datasources.pipeline.Pipeline; +import org.broadinstitute.sting.datasources.pipeline.PipelineSample; +import org.broadinstitute.sting.utils.yaml.YamlUtils; +import org.testng.annotations.Test; +import static org.broadinstitute.sting.utils.broad.PicardAggregationUtilsUnitTest.*; + +import java.io.File; +import java.io.IOException; +import java.util.Collections; + +public class PicardPipelineUnitTest { + @Test + public void testParseTsv() throws IOException { + File tsv = writeTsv(PROJECT, SAMPLE); + Pipeline pipeline = PicardPipeline.parse(tsv); + validatePipeline(pipeline, FilenameUtils.getBaseName(tsv.getPath())); + } + + @Test + public void testParseYaml() throws IOException { + File yaml = writeYaml("project_name", PROJECT, SAMPLE); + Pipeline pipeline = PicardPipeline.parse(yaml); + validatePipeline(pipeline, "project_name"); + } + + private void validatePipeline(Pipeline pipeline, String name) { + Assert.assertEquals(pipeline.getProject().getName(), name); + Assert.assertTrue("reference not found", pipeline.getProject().getReferenceFile().exists()); + Assert.assertTrue("intervals not found", pipeline.getProject().getIntervalList().exists()); + Assert.assertTrue("refseq not found", pipeline.getProject().getRefseqTable().exists()); + Assert.assertTrue("genotype dbsnp not found", pipeline.getProject().getGenotypeDbsnp().exists()); + Assert.assertTrue("eval dbsnp not found", pipeline.getProject().getEvalDbsnp().exists()); + Assert.assertEquals(pipeline.getSamples().size(), 1); + for (PipelineSample sample: pipeline.getSamples()) { + Assert.assertEquals(sample.getId(), PROJECT + "_" + SAMPLE); + Assert.assertTrue("bam not found", sample.getBamFiles().get(PicardPipeline.PICARD_BAM_TYPE).exists()); + Assert.assertEquals(sample.getTags().get(PicardPipeline.PROJECT_TAG), PROJECT); + Assert.assertEquals(sample.getTags().get(PicardPipeline.SAMPLE_TAG), SAMPLE); + } + } + + private File writeTsv(String project, String sample) throws IOException { + File tsv = BaseTest.createTempFile("pipeline", ".tsv"); + FileUtils.writeLines(tsv, Collections.singletonList(project + "\t" + sample)); + return tsv; + } + + private File writeYaml(String projectName, String project, String sample) throws IOException { + File yaml = BaseTest.createTempFile("pipeline", ".yaml"); + PipelineSample pipelineSample = new PipelineSample(); + pipelineSample.getTags().put(PicardPipeline.PROJECT_TAG, project); + pipelineSample.getTags().put(PicardPipeline.SAMPLE_TAG, sample); + Pipeline pipeline = new Pipeline(); + pipeline.getProject().setName(projectName); + pipeline.getSamples().add(pipelineSample); + YamlUtils.dump(pipeline, yaml); + return yaml; + } +} diff --git a/java/test/org/broadinstitute/sting/utils/broad/ReferenceDataUnitTest.java b/java/test/org/broadinstitute/sting/utils/broad/ReferenceDataUnitTest.java new file mode 100755 index 000000000..e34f0d67e --- /dev/null +++ b/java/test/org/broadinstitute/sting/utils/broad/ReferenceDataUnitTest.java @@ -0,0 +1,49 @@ +package org.broadinstitute.sting.utils.broad; + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; + +public class ReferenceDataUnitTest { + @Test + public void testNames() { + Assert.assertEquals(ReferenceData.HG18.getName(), "hg18"); + Assert.assertEquals(ReferenceData.HG19.getName(), "hg19"); + } + + @Test + public void testFilesExist() { + for (ReferenceData data: ReferenceData.values()) { + Assert.assertTrue(new File(data.getReference()).exists()); + Assert.assertTrue(new File(data.getRefseq()).exists()); + for (int version: data.getDbsnpVersions()) { + Assert.assertTrue(new File(data.getDbsnp(version)).exists()); + } + } + } + + @Test + public void testDbsnps() { + Assert.assertTrue(new File(ReferenceData.HG18.getDbsnp(129)).exists()); + Assert.assertTrue(new File(ReferenceData.HG19.getDbsnp(129)).exists()); + Assert.assertTrue(new File(ReferenceData.HG19.getDbsnp(132)).exists()); + Assert.assertNull(ReferenceData.HG19.getDbsnp(130)); + } + + @Test + public void testDbsnpTypes() { + Assert.assertEquals(ReferenceData.HG18.getDbsnpType(129), "ROD"); + Assert.assertEquals(ReferenceData.HG19.getDbsnpType(129), "VCF"); + Assert.assertEquals(ReferenceData.HG19.getDbsnpType(132), "VCF"); + Assert.assertNull(ReferenceData.HG19.getDbsnpType(130)); + } + + @Test + public void testGetByReference() { + Assert.assertEquals(ReferenceData.getByReference(BaseTest.hg18Reference), ReferenceData.HG18); + Assert.assertEquals(ReferenceData.getByReference(BaseTest.hg19Reference), ReferenceData.HG19); + Assert.assertEquals(ReferenceData.getByReference("none"), null); + } +}