Cleanup of the utils.broad package.
Using Picard IoUtils on sample names.
This commit is contained in:
parent
444eae316c
commit
b6bc64a0c8
|
|
@ -22,7 +22,7 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.datasources.pipeline;
|
||||
package org.broadinstitute.sting.pipeline;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
|
@ -22,7 +22,7 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.datasources.pipeline;
|
||||
package org.broadinstitute.sting.pipeline;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Map;
|
||||
|
|
@ -22,7 +22,7 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.datasources.pipeline;
|
||||
package org.broadinstitute.sting.pipeline;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Map;
|
||||
|
|
@ -1,149 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.broad;
|
||||
|
||||
import org.apache.commons.io.filefilter.RegexFileFilter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileFilter;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class PicardAggregationUtils {
|
||||
public static final String PICARD_AGGREGATION_DIR = "/seq/picard_aggregation/";
|
||||
|
||||
/**
|
||||
* Returns the path to the sample BAM.
|
||||
* @param project Project
|
||||
* @param sample Sample
|
||||
* @param version Version
|
||||
* @return The path to the sample BAM.
|
||||
*/
|
||||
public static String getSampleBam(String project, String sample, int version) {
|
||||
return getSampleDir(project, sample, version) + sample + ".bam";
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the path to the latest BAM.
|
||||
* @param project Project
|
||||
* @param sample Sample
|
||||
* @return The path to the latest BAM.
|
||||
* @throws FileNotFoundException If a finished directory cannot be found for a sample.
|
||||
*/
|
||||
public static String getSampleBam(String project, String sample) throws FileNotFoundException {
|
||||
return getSampleDir(project, sample) + sample + ".bam";
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the sample directory.
|
||||
* @param project Project
|
||||
* @param sample Sample
|
||||
* @param version Version
|
||||
* @return the sample directory.
|
||||
*/
|
||||
public static String getSampleDir(String project, String sample, int version) {
|
||||
return PICARD_AGGREGATION_DIR + String.format("%s/%s/v%d/", project, sample, version);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the latest finished directory for this project sample.
|
||||
* @param project Project
|
||||
* @param sample Sample
|
||||
* @return The path to the latest finished directory.
|
||||
* @throws FileNotFoundException If a finished directory cannot be found for a sample.
|
||||
*/
|
||||
public static String getSampleDir(String project, String sample) throws FileNotFoundException {
|
||||
int latestVersion = getLatestVersion(project, sample);
|
||||
if (latestVersion == 0)
|
||||
throw new FileNotFoundException("Unable to find a finished directory for project sample " + project + "/" + sample);
|
||||
return getSampleDir(project, sample, latestVersion);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the latest finished version directory.
|
||||
* Because isilon metadata operations are relatively slow this method
|
||||
* tries not to look for every possible versioned directory.
|
||||
* @param project Project
|
||||
* @param sample Sample
|
||||
* @return The highest finished version directory or 0 if a finished directory was not found.
|
||||
*/
|
||||
public static int getLatestVersion(String project, String sample) {
|
||||
return getLatestVersion(project, sample, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the latest finished version directory after startVersion.
|
||||
* Because isilon metadata operations are relatively slow this method
|
||||
* tries not to look for every possible versioned directory.
|
||||
* @param project Project
|
||||
* @param sample Sample
|
||||
* @param startVersion minimum version to return
|
||||
* @return The highest finished version directory after startVersion
|
||||
*/
|
||||
public static int getLatestVersion(String project, String sample, int startVersion) {
|
||||
int version = Math.max(0, startVersion);
|
||||
boolean nextExists = true;
|
||||
while (nextExists) {
|
||||
nextExists = false;
|
||||
for (int next = 3; next > 0; next--)
|
||||
if (isFinished(project, sample, version + next)) {
|
||||
version += next;
|
||||
nextExists = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Special case when the version is 0
|
||||
// Because isilon storage takes a while to do meta data operations only look through every file if we have to.
|
||||
if (version == 0) {
|
||||
File sampleDir = new File(PICARD_AGGREGATION_DIR + project + "/" + sample);
|
||||
if (sampleDir.exists()) {
|
||||
FileFilter filter = new RegexFileFilter("v\\d+");
|
||||
File[] files = sampleDir.listFiles(filter);
|
||||
int[] versions = new int[files.length];
|
||||
for (int i = 0; i < files.length; i++)
|
||||
versions[i] = Integer.parseInt(files[i].getName().substring(1));
|
||||
Arrays.sort(versions);
|
||||
for (int i = versions.length - 1; i >= 0; i--) {
|
||||
if (isFinished(project, sample, versions[i])) {
|
||||
version = versions[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return version == 0 ? startVersion : version;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the project sample directory contains a finished.txt
|
||||
* @param project Project
|
||||
* @param sample Sample
|
||||
* @param version Version
|
||||
* @return true if the project sample directory contains a finished.txt
|
||||
*/
|
||||
public static boolean isFinished(String project, String sample, int version) {
|
||||
return new File(getSampleDir(project, sample, version), "finished.txt").exists();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,108 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.broad;
|
||||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
public class PicardAnalysisFiles {
|
||||
private static final String REFERENCE_SEQUENCE_HEADER = "REFERENCE_SEQUENCE";
|
||||
private static final String TARGET_INTERVALS_HEADER = "TARGET_INTERVALS";
|
||||
private static final String BAIT_INTERVALS_HEADER = "BAIT_INTERVALS";
|
||||
private static final String[] ANALYSIS_HEADERS = {REFERENCE_SEQUENCE_HEADER, TARGET_INTERVALS_HEADER, BAIT_INTERVALS_HEADER};
|
||||
private static final String ANALYSIS_FILES = "analysis_files.txt";
|
||||
|
||||
private String path;
|
||||
private Map<String,Set<String>> headerValues = new HashMap<String,Set<String>>();
|
||||
|
||||
public PicardAnalysisFiles(String project, String sample) throws FileNotFoundException {
|
||||
this(PicardAggregationUtils.getSampleDir(project, sample) + ANALYSIS_FILES);
|
||||
}
|
||||
|
||||
public PicardAnalysisFiles(String project, String sample, int version) throws FileNotFoundException {
|
||||
this(PicardAggregationUtils.getSampleDir(project, sample, version) + ANALYSIS_FILES);
|
||||
}
|
||||
|
||||
public PicardAnalysisFiles(String path) throws FileNotFoundException {
|
||||
this.path = path;
|
||||
HashMap<String,Integer> headerIndexes = null;
|
||||
for (String line: new XReadLines(new File(path))) {
|
||||
if (line.startsWith("#"))
|
||||
continue;
|
||||
String[] values = line.split("\t");
|
||||
if (headerIndexes == null) {
|
||||
headerIndexes = new HashMap<String,Integer>();
|
||||
for (String header: ANALYSIS_HEADERS) {
|
||||
headerIndexes.put(header, ArrayUtils.indexOf(values, header));
|
||||
headerValues.put(header, new HashSet<String>());
|
||||
}
|
||||
} else {
|
||||
for (String header: ANALYSIS_HEADERS) {
|
||||
int index = headerIndexes.get(header);
|
||||
if (values.length <= index)
|
||||
throw new StingException(String.format("Unable to parse line in %s: %n%s", path, line));
|
||||
String value = values[index];
|
||||
headerValues.get(header).add(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String getPath() {
|
||||
return path;
|
||||
}
|
||||
|
||||
public String getReferenceSequence() {
|
||||
return getSingle(REFERENCE_SEQUENCE_HEADER);
|
||||
}
|
||||
|
||||
public String getTargetIntervals() {
|
||||
return getSingle(TARGET_INTERVALS_HEADER);
|
||||
}
|
||||
|
||||
public String getBaitIntervals() {
|
||||
return getSingle(BAIT_INTERVALS_HEADER);
|
||||
}
|
||||
|
||||
private String getSingle(String header) {
|
||||
Set<String> values = headerValues.get(header);
|
||||
if (values.size() > 1) {
|
||||
throw new UnsupportedOperationException(path + " contains more than one value for " + header + ": " + values);
|
||||
} else if (values.size() == 0) {
|
||||
return null;
|
||||
} else {
|
||||
String value = values.iterator().next();
|
||||
return "null".equals(value) ? null : value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,123 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.broad;
|
||||
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import org.apache.commons.lang.NullArgumentException;
|
||||
import org.broadinstitute.sting.datasources.pipeline.Pipeline;
|
||||
import org.broadinstitute.sting.datasources.pipeline.PipelineProject;
|
||||
import org.broadinstitute.sting.datasources.pipeline.PipelineSample;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.utils.yaml.YamlUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
/**
|
||||
* Automatically gets the latest version using PicardAggregationUtils.
|
||||
*/
|
||||
public class PicardPipeline {
|
||||
|
||||
protected static final String PROJECT_TAG = "SQUIDProject";
|
||||
protected static final String SAMPLE_TAG = "CollaboratorID";
|
||||
protected static final String PICARD_BAM_TYPE = "cleaned";
|
||||
|
||||
private PicardPipeline() {}
|
||||
|
||||
/**
|
||||
* Creates a new PicardPipeline
|
||||
* @param path Path to a tsv with project [tab] sample on each line or a pipeline yaml.
|
||||
* @return a new Picard
|
||||
* @throws FileNotFoundException when unable to find the file or any supporting files.
|
||||
*/
|
||||
public static Pipeline parse(File path) throws FileNotFoundException {
|
||||
if (path == null)
|
||||
throw new NullArgumentException("path");
|
||||
|
||||
Pipeline pipeline;
|
||||
if (path.getName().endsWith(".tsv")) {
|
||||
pipeline = new Pipeline();
|
||||
pipeline.getProject().setName(FilenameUtils.getBaseName(path.getPath()));
|
||||
for (String line: new XReadLines(path)) {
|
||||
String[] projectSample = line.split("\t");
|
||||
addSample(pipeline, projectSample[0], projectSample[1]);
|
||||
}
|
||||
} else if (path.getName().endsWith(".yaml")) {
|
||||
pipeline = YamlUtils.load(Pipeline.class, path);
|
||||
} else {
|
||||
throw new UserException.BadInput("Path does not end with .tsv or .yaml: " + path.getPath());
|
||||
}
|
||||
|
||||
update(pipeline);
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
private static void update(Pipeline pipeline) throws FileNotFoundException {
|
||||
for (PipelineSample sample: pipeline.getSamples())
|
||||
updateSample(pipeline.getProject(), sample);
|
||||
}
|
||||
|
||||
private static void addSample(Pipeline pipeline, String project, String sample) {
|
||||
PipelineSample pipelineSample = new PipelineSample();
|
||||
pipelineSample.getTags().put(PROJECT_TAG, project);
|
||||
pipelineSample.getTags().put(SAMPLE_TAG, sample);
|
||||
pipeline.getSamples().add(pipelineSample);
|
||||
}
|
||||
|
||||
private static void updateSample(PipelineProject pipelineProject, PipelineSample pipelineSample) throws FileNotFoundException {
|
||||
if (!pipelineSample.getTags().containsKey(PROJECT_TAG) && !pipelineSample.getTags().containsKey(SAMPLE_TAG))
|
||||
return;
|
||||
String project = pipelineSample.getTags().get(PROJECT_TAG);
|
||||
String sample = pipelineSample.getTags().get(SAMPLE_TAG);
|
||||
int version = PicardAggregationUtils.getLatestVersion(project, sample);
|
||||
if (version <= 0)
|
||||
throw new UserException.BadInput("Project sample not found: " + project + "/" + sample);
|
||||
String bam = PicardAggregationUtils.getSampleBam(project, sample, version);
|
||||
if (pipelineSample.getId() == null)
|
||||
pipelineSample.setId(project + "_" + sample);
|
||||
pipelineSample.getBamFiles().put(PICARD_BAM_TYPE, new File(bam));
|
||||
|
||||
PicardAnalysisFiles analysis = new PicardAnalysisFiles(project, sample, version);
|
||||
if (pipelineProject.getReferenceFile() == null) {
|
||||
String referenceSequence = analysis.getReferenceSequence();
|
||||
ReferenceData referenceData = ReferenceData.getByReference(referenceSequence);
|
||||
pipelineProject.setReferenceFile(new File(referenceData.getReference()));
|
||||
pipelineProject.setRefseqTable(new File(referenceData.getRefseq()));
|
||||
if (analysis.getTargetIntervals() != null)
|
||||
pipelineProject.setIntervalList(new File(analysis.getTargetIntervals()));
|
||||
pipelineProject.setEvalDbsnp(new File(referenceData.getDbsnp(129)));
|
||||
if (referenceData.getDbsnpVersions().contains(132)) {
|
||||
pipelineProject.setGenotypeDbsnp(new File(referenceData.getDbsnp(132)));
|
||||
} else {
|
||||
pipelineProject.setGenotypeDbsnp(new File(referenceData.getDbsnp(129)));
|
||||
}
|
||||
} else {
|
||||
String referenceSequence = analysis.getReferenceSequence();
|
||||
if (!pipelineProject.getReferenceFile().getPath().equals(referenceSequence))
|
||||
throw new UserException.BadInput("Samples sequenced with different references");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,135 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.broad;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Tracks data related to reference files at the Broad.
|
||||
*/
|
||||
public enum ReferenceData {
|
||||
/**
|
||||
* HG18 reference data
|
||||
*/
|
||||
HG18("hg18"),
|
||||
|
||||
/**
|
||||
* HG19 reference data
|
||||
*/
|
||||
HG19("hg19");
|
||||
|
||||
private static final String REFSEQ_DIR = "/humgen/gsa-hpprojects/GATK/data/Annotations/refseq/";
|
||||
private static final String DBSNP_DIR = "/humgen/gsa-hpprojects/GATK/data/";
|
||||
|
||||
private final String name;
|
||||
private final String reference;
|
||||
private final String refseq;
|
||||
private final Map<Integer,String> dbsnps;
|
||||
|
||||
ReferenceData(String name) {
|
||||
this.name = name;
|
||||
Map<Integer,String> dbsnps = new TreeMap<Integer,String>();
|
||||
if ("hg18".equals(name)) {
|
||||
this.reference = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
|
||||
this.refseq = REFSEQ_DIR + "refGene-big-table-hg18.txt";
|
||||
dbsnps.put(129, DBSNP_DIR + "dbsnp_129_hg18.rod");
|
||||
} else if ("hg19".equals(name)) {
|
||||
this.reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta";
|
||||
this.refseq = REFSEQ_DIR + "refGene-big-table-hg19.txt";
|
||||
dbsnps.put(129, DBSNP_DIR + "dbsnp_129_b37.vcf");
|
||||
dbsnps.put(132, DBSNP_DIR + "dbsnp_132_b37.vcf");
|
||||
} else
|
||||
throw new UnsupportedOperationException("Unknown reference: " + name);
|
||||
this.dbsnps = Collections.unmodifiableMap(dbsnps);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the name of the reference.
|
||||
* @return the name of the reference.
|
||||
*/
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the path to the fasta.
|
||||
* @return the path to the fasta.
|
||||
*/
|
||||
public String getReference() {
|
||||
return reference;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the path to the refseq table.
|
||||
* @return the path to the refseq table.
|
||||
*/
|
||||
public String getRefseq() {
|
||||
return refseq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the dbsnp versions available.
|
||||
* @return the dbsnp versions available.
|
||||
*/
|
||||
public Set<Integer> getDbsnpVersions() {
|
||||
return dbsnps.keySet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the dbsnp path for the version.
|
||||
* @param version version from getDbsnpVersions()
|
||||
* @return the dbsnp path for the version.
|
||||
*/
|
||||
public String getDbsnp(int version) {
|
||||
return dbsnps.get(version);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the dbsnp type for the version, "VCF" or "DBSNP".
|
||||
* @param version version from getDbsnpVersions()
|
||||
* @return the dbsnp type for the version, "VCF" or "DBSNP".
|
||||
*/
|
||||
public String getDbsnpType(int version) {
|
||||
String dbsnp = getDbsnp(version);
|
||||
if (dbsnp == null)
|
||||
return null;
|
||||
return dbsnp.toLowerCase().endsWith(".vcf") ? "VCF" : "DBSNP";
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the reference data based on the path or null.
|
||||
* @param reference path to the reference
|
||||
* @return the reference data based on the path or null.
|
||||
*/
|
||||
public static ReferenceData getByReference(String reference) {
|
||||
for (ReferenceData data: ReferenceData.values())
|
||||
if (data.reference.equals(reference))
|
||||
return data;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -22,8 +22,10 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.datasources.pipeline;
|
||||
package org.broadinstitute.sting.pipeline;
|
||||
|
||||
import org.broadinstitute.sting.pipeline.Pipeline;
|
||||
import org.broadinstitute.sting.pipeline.PipelineSample;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.utils.yaml.YamlUtils;
|
||||
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.broad;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
public class PicardAggregationUtilsUnitTest {
|
||||
public static final String PROJECT = "C474";
|
||||
public static final String SAMPLE = "NA19651";
|
||||
public static final String MISSING_PROJECT = "C0";
|
||||
public static final String MISSING_SAMPLE = "0";
|
||||
private int latestVersion = -1;
|
||||
|
||||
@Test
|
||||
public void testGetLatestVersion() {
|
||||
latestVersion = PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE);
|
||||
System.out.println(String.format("Latest version for %s %s is %d", PROJECT, SAMPLE, latestVersion));
|
||||
Assert.assertTrue(latestVersion > 0);
|
||||
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE, latestVersion), latestVersion);
|
||||
}
|
||||
|
||||
@Test(dependsOnMethods = "testGetLatestVersion")
|
||||
public void testGetSampleBam() throws Exception {
|
||||
String test = PicardAggregationUtils.getSampleBam(PROJECT, SAMPLE);
|
||||
String latest = PicardAggregationUtils.getSampleBam(PROJECT, SAMPLE, latestVersion);
|
||||
Assert.assertEquals(test, latest);
|
||||
}
|
||||
|
||||
@Test(dependsOnMethods = "testGetLatestVersion")
|
||||
public void testGetSampleDir() throws Exception {
|
||||
String test = PicardAggregationUtils.getSampleDir(PROJECT, SAMPLE);
|
||||
String latest = PicardAggregationUtils.getSampleDir(PROJECT, SAMPLE, latestVersion);
|
||||
Assert.assertEquals(test, latest);
|
||||
}
|
||||
|
||||
@Test(dependsOnMethods = "testGetLatestVersion")
|
||||
public void testIsFinished() {
|
||||
Assert.assertTrue(PicardAggregationUtils.isFinished(PROJECT, SAMPLE, latestVersion));
|
||||
Assert.assertFalse(PicardAggregationUtils.isFinished(PROJECT, SAMPLE, latestVersion + 1));
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = FileNotFoundException.class)
|
||||
public void testMissingSampleBam() throws Exception {
|
||||
PicardAggregationUtils.getSampleBam(MISSING_PROJECT, MISSING_SAMPLE);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = FileNotFoundException.class)
|
||||
public void testMissingSampleDir() throws Exception {
|
||||
PicardAggregationUtils.getSampleDir(MISSING_PROJECT, MISSING_SAMPLE);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLatestVersionMissing() {
|
||||
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE), 0);
|
||||
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, -1), -1);
|
||||
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, 0), 0);
|
||||
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, 1), 1);
|
||||
Assert.assertEquals(PicardAggregationUtils.getLatestVersion(MISSING_PROJECT, MISSING_SAMPLE, 2), 2);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,56 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.broad;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
import static org.broadinstitute.sting.utils.broad.PicardAggregationUtilsUnitTest.*;
|
||||
|
||||
public class PicardAnalysisFilesUnitTest extends BaseTest {
|
||||
@Test
|
||||
public void testParseLatest() throws Exception {
|
||||
PicardAnalysisFiles files = new PicardAnalysisFiles(PROJECT, SAMPLE);
|
||||
Assert.assertNotNull(files.getPath());
|
||||
files = new PicardAnalysisFiles(PROJECT, SAMPLE, PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE));
|
||||
Assert.assertNotNull(files.getPath());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseValid() throws Exception {
|
||||
PicardAnalysisFiles file = new PicardAnalysisFiles(BaseTest.validationDataLocation + "picard_analysis_file.txt");
|
||||
Assert.assertEquals(file.getReferenceSequence(), "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta");
|
||||
Assert.assertEquals(file.getTargetIntervals(), "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list");
|
||||
Assert.assertEquals(file.getBaitIntervals(), "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.baits.interval_list");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseValidWithComments() throws Exception {
|
||||
PicardAnalysisFiles file = new PicardAnalysisFiles(BaseTest.validationDataLocation + "picard_analysis_file_with_comments.txt");
|
||||
Assert.assertEquals(file.getReferenceSequence(), "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta");
|
||||
Assert.assertEquals(file.getTargetIntervals(), "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list");
|
||||
Assert.assertEquals(file.getBaitIntervals(), "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.baits.interval_list");
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = FileNotFoundException.class)
|
||||
public void testParseBadPath() throws Exception {
|
||||
new PicardAnalysisFiles(BaseTest.validationDataLocation + "non_existent_picard_analysis_file.txt");
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = FileNotFoundException.class)
|
||||
public void testParseMissingLatest() throws Exception {
|
||||
new PicardAnalysisFiles(MISSING_PROJECT, MISSING_SAMPLE);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = FileNotFoundException.class)
|
||||
public void testParseMissingVersion() throws Exception {
|
||||
new PicardAnalysisFiles(PROJECT, SAMPLE, PicardAggregationUtils.getLatestVersion(PROJECT, SAMPLE) + 2);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = UnsupportedOperationException.class)
|
||||
public void testParseMultipleReferences() throws Exception {
|
||||
PicardAnalysisFiles file = new PicardAnalysisFiles(BaseTest.validationDataLocation + "picard_analysis_file_with_different_references.txt");
|
||||
file.getReferenceSequence();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,71 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.broad;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.datasources.pipeline.Pipeline;
|
||||
import org.broadinstitute.sting.datasources.pipeline.PipelineSample;
|
||||
import org.broadinstitute.sting.utils.yaml.YamlUtils;
|
||||
import org.testng.annotations.Test;
|
||||
import static org.broadinstitute.sting.utils.broad.PicardAggregationUtilsUnitTest.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
|
||||
public class PicardPipelineUnitTest {
|
||||
@Test
|
||||
public void testParseTsv() throws IOException {
|
||||
File tsv = writeTsv(PROJECT, SAMPLE);
|
||||
Pipeline pipeline = PicardPipeline.parse(tsv);
|
||||
validatePipeline(pipeline, FilenameUtils.getBaseName(tsv.getPath()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseTsvWithPicardComments() throws Exception {
|
||||
File tsv = writeTsv("C460", "HG01359");
|
||||
PicardPipeline.parse(tsv);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseYaml() throws IOException {
|
||||
File yaml = writeYaml("project_name", PROJECT, SAMPLE);
|
||||
Pipeline pipeline = PicardPipeline.parse(yaml);
|
||||
validatePipeline(pipeline, "project_name");
|
||||
}
|
||||
|
||||
private void validatePipeline(Pipeline pipeline, String name) {
|
||||
Assert.assertEquals(pipeline.getProject().getName(), name);
|
||||
Assert.assertTrue(pipeline.getProject().getReferenceFile().exists(), "reference not found");
|
||||
Assert.assertTrue(pipeline.getProject().getIntervalList().exists(), "intervals not found");
|
||||
Assert.assertTrue(pipeline.getProject().getRefseqTable().exists(), "refseq not found");
|
||||
Assert.assertTrue(pipeline.getProject().getGenotypeDbsnp().exists(), "genotype dbsnp not found");
|
||||
Assert.assertTrue(pipeline.getProject().getEvalDbsnp().exists(), "eval dbsnp not found");
|
||||
Assert.assertEquals(pipeline.getSamples().size(), 1);
|
||||
for (PipelineSample sample: pipeline.getSamples()) {
|
||||
Assert.assertEquals(sample.getId(), PROJECT + "_" + SAMPLE);
|
||||
Assert.assertTrue(sample.getBamFiles().get(PicardPipeline.PICARD_BAM_TYPE).exists(), "bam not found");
|
||||
Assert.assertEquals(sample.getTags().get(PicardPipeline.PROJECT_TAG), PROJECT);
|
||||
Assert.assertEquals(sample.getTags().get(PicardPipeline.SAMPLE_TAG), SAMPLE);
|
||||
}
|
||||
}
|
||||
|
||||
private File writeTsv(String project, String sample) throws IOException {
|
||||
File tsv = BaseTest.createTempFile("pipeline", ".tsv");
|
||||
FileUtils.writeLines(tsv, Collections.singletonList(project + "\t" + sample));
|
||||
return tsv;
|
||||
}
|
||||
|
||||
private File writeYaml(String projectName, String project, String sample) throws IOException {
|
||||
File yaml = BaseTest.createTempFile("pipeline", ".yaml");
|
||||
PipelineSample pipelineSample = new PipelineSample();
|
||||
pipelineSample.getTags().put(PicardPipeline.PROJECT_TAG, project);
|
||||
pipelineSample.getTags().put(PicardPipeline.SAMPLE_TAG, sample);
|
||||
Pipeline pipeline = new Pipeline();
|
||||
pipeline.getProject().setName(projectName);
|
||||
pipeline.getSamples().add(pipelineSample);
|
||||
YamlUtils.dump(pipeline, yaml);
|
||||
return yaml;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.broad;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
public class ReferenceDataUnitTest {
|
||||
@Test
|
||||
public void testNames() {
|
||||
Assert.assertEquals(ReferenceData.HG18.getName(), "hg18");
|
||||
Assert.assertEquals(ReferenceData.HG19.getName(), "hg19");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFilesExist() {
|
||||
for (ReferenceData data: ReferenceData.values()) {
|
||||
Assert.assertTrue(new File(data.getReference()).exists());
|
||||
Assert.assertTrue(new File(data.getRefseq()).exists());
|
||||
for (int version: data.getDbsnpVersions()) {
|
||||
Assert.assertTrue(new File(data.getDbsnp(version)).exists());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDbsnps() {
|
||||
Assert.assertTrue(new File(ReferenceData.HG18.getDbsnp(129)).exists());
|
||||
Assert.assertTrue(new File(ReferenceData.HG19.getDbsnp(129)).exists());
|
||||
Assert.assertTrue(new File(ReferenceData.HG19.getDbsnp(132)).exists());
|
||||
Assert.assertNull(ReferenceData.HG19.getDbsnp(130));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDbsnpTypes() {
|
||||
Assert.assertEquals(ReferenceData.HG18.getDbsnpType(129), "DBSNP");
|
||||
Assert.assertEquals(ReferenceData.HG19.getDbsnpType(129), "VCF");
|
||||
Assert.assertEquals(ReferenceData.HG19.getDbsnpType(132), "VCF");
|
||||
Assert.assertNull(ReferenceData.HG19.getDbsnpType(130));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetByReference() {
|
||||
Assert.assertEquals(ReferenceData.getByReference(BaseTest.hg18Reference), ReferenceData.HG18);
|
||||
Assert.assertEquals(ReferenceData.getByReference(BaseTest.hg19Reference), ReferenceData.HG19);
|
||||
Assert.assertEquals(ReferenceData.getByReference("none"), null);
|
||||
}
|
||||
}
|
||||
|
|
@ -33,7 +33,7 @@
|
|||
<package name="org.broadinstitute.sting.queue.**" />
|
||||
|
||||
<!-- Pipeline + Utils -->
|
||||
<package name="org.broadinstitute.sting.datasources.pipeline.**" />
|
||||
<package name="org.broadinstitute.sting.pipeline.**" />
|
||||
<package name="org.broadinstitute.sting.utils.**" />
|
||||
|
||||
<!-- Scala -->
|
||||
|
|
|
|||
|
|
@ -32,8 +32,6 @@ import java.util.Date
|
|||
import java.text.SimpleDateFormat
|
||||
import org.broadinstitute.sting.BaseTest
|
||||
import org.broadinstitute.sting.queue.QCommandLine
|
||||
import org.broadinstitute.sting.datasources.pipeline.{Pipeline, PipelineProject, PipelineSample}
|
||||
import org.broadinstitute.sting.utils.broad.PicardAggregationUtils
|
||||
import org.broadinstitute.sting.queue.util.{Logging, ProcessController}
|
||||
import java.io.{FileNotFoundException, File}
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportParser
|
||||
|
|
@ -42,23 +40,6 @@ import org.broadinstitute.sting.queue.engine.CommandLinePluginManager
|
|||
|
||||
object PipelineTest extends BaseTest with Logging {
|
||||
|
||||
case class K1gBam(squidId: String, sampleId: String, version: Int)
|
||||
|
||||
/** 1000G BAMs used for validation */
|
||||
val k1gBams = List(
|
||||
new K1gBam("C474", "NA19651", 2),
|
||||
new K1gBam("C474", "NA19655", 2),
|
||||
new K1gBam("C474", "NA19669", 2),
|
||||
new K1gBam("C454", "NA19834", 2),
|
||||
new K1gBam("C460", "HG01440", 2),
|
||||
new K1gBam("C456", "NA12342", 2),
|
||||
new K1gBam("C456", "NA12748", 2),
|
||||
new K1gBam("C474", "NA19649", 2),
|
||||
new K1gBam("C474", "NA19652", 2),
|
||||
new K1gBam("C474", "NA19654", 2))
|
||||
|
||||
validateK1gBams()
|
||||
|
||||
private val validationReportsDataLocation = "/humgen/gsa-hpprojects/GATK/validationreports/submitted/"
|
||||
|
||||
val run = System.getProperty("pipeline.run") == "run"
|
||||
|
|
@ -92,49 +73,6 @@ object PipelineTest extends BaseTest with Logging {
|
|||
*/
|
||||
private def tempDir(testName: String, jobRunner: String) = testDir(testName, jobRunner) + "temp/"
|
||||
|
||||
/**
|
||||
* Creates a new pipeline from a project.
|
||||
* @param project Pipeline project info.
|
||||
* @param samples List of samples.
|
||||
* @return a new pipeline project.
|
||||
*/
|
||||
def createPipeline(project: PipelineProject, samples: List[PipelineSample]) = {
|
||||
val pipeline = new Pipeline
|
||||
pipeline.setProject(project)
|
||||
pipeline.setSamples(samples)
|
||||
pipeline
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new pipeline project for hg19 with b37 132 dbsnp for genotyping, and b37 129 dbsnp for eval.
|
||||
* @param projectName Name of the project.
|
||||
* @param intervals The intervals file to use.
|
||||
* @return a new pipeline project.
|
||||
*/
|
||||
def createHg19Project(projectName: String, intervals: String) = {
|
||||
val project = new PipelineProject
|
||||
project.setName(projectName)
|
||||
project.setReferenceFile(new File(BaseTest.hg19Reference))
|
||||
project.setGenotypeDbsnp(new File(BaseTest.b37dbSNP132))
|
||||
project.setEvalDbsnp(new File(BaseTest.b37dbSNP129))
|
||||
project.setRefseqTable(new File(BaseTest.hg19Refseq))
|
||||
project.setIntervalList(new File(intervals))
|
||||
project
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a 1000G pipeline sample from one of the bams.
|
||||
* @param idPrefix Text to prepend to the sample name.
|
||||
* @param k1gBam bam to create the sample for.
|
||||
* @return the created pipeline sample.
|
||||
*/
|
||||
def createK1gSample(idPrefix: String, k1gBam: K1gBam) = {
|
||||
val sample = new PipelineSample
|
||||
sample.setId(idPrefix + "_" + k1gBam.sampleId)
|
||||
sample.setBamFiles(Map("cleaned" -> getPicardBam(k1gBam)))
|
||||
sample
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the pipelineTest.
|
||||
* @param pipelineTest test to run.
|
||||
|
|
@ -267,31 +205,6 @@ object PipelineTest extends BaseTest with Logging {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Throws an exception if any of the 1000G bams do not exist and warns if they are out of date.
|
||||
*/
|
||||
private def validateK1gBams() {
|
||||
var missingBams = List.empty[File]
|
||||
for (k1gBam <- k1gBams) {
|
||||
val latest = getLatestVersion(k1gBam)
|
||||
val bam = getPicardBam(k1gBam)
|
||||
if (k1gBam.version != latest)
|
||||
logger.warn("1000G bam is not the latest version %d: %s".format(latest, k1gBam))
|
||||
if (!bam.exists)
|
||||
missingBams :+= bam
|
||||
}
|
||||
if (missingBams.size > 0) {
|
||||
val nl = "%n".format()
|
||||
throw new FileNotFoundException("The following 1000G bam files are missing.%n%s".format(missingBams.mkString(nl)))
|
||||
}
|
||||
}
|
||||
|
||||
private def getPicardBam(k1gBam: K1gBam): File =
|
||||
new File(PicardAggregationUtils.getSampleBam(k1gBam.squidId, k1gBam.sampleId, k1gBam.version))
|
||||
|
||||
private def getLatestVersion(k1gBam: K1gBam): Int =
|
||||
PicardAggregationUtils.getLatestVersion(k1gBam.squidId, k1gBam.sampleId, k1gBam.version)
|
||||
|
||||
private var runningCommandLines = Set.empty[QCommandLine]
|
||||
|
||||
Runtime.getRuntime.addShutdownHook(new Thread {
|
||||
|
|
|
|||
Loading…
Reference in New Issue