Simple change to allow a list of samples or regular expressions to be provided in a text file (one line per sample).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4074 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
165dc6d3b0
commit
121b4f23b6
|
|
@ -40,6 +40,8 @@ import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||||
|
|
@ -47,6 +49,8 @@ import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Takes a VCF file, selects variants based on sample(s) in which it was found and/or on various annotation criteria,
|
* Takes a VCF file, selects variants based on sample(s) in which it was found and/or on various annotation criteria,
|
||||||
|
|
@ -54,7 +58,7 @@ import java.util.regex.Pattern;
|
||||||
*/
|
*/
|
||||||
@Requires(value={},referenceMetaData=@RMD(name="variant", type=ReferenceOrderedDatum.class))
|
@Requires(value={},referenceMetaData=@RMD(name="variant", type=ReferenceOrderedDatum.class))
|
||||||
public class SelectVariants extends RodWalker<Integer, Integer> {
|
public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="sample", shortName="sn", doc="Sample(s) to include. Can be a single sample, specified multiple times for many samples, or a regular expression to select many samples.", required=false)
|
@Argument(fullName="sample", shortName="sn", doc="Sample(s) to include. Can be a single sample, specified multiple times for many samples, a file containing sample names, a regular expression to select many samples, or any combination thereof.", required=false)
|
||||||
public Set<String> SAMPLE_EXPRESSIONS;
|
public Set<String> SAMPLE_EXPRESSIONS;
|
||||||
|
|
||||||
@Argument(shortName="select", doc="One or more criteria to use when selecting the data. Evaluated *after* the specified samples are extracted and the INFO-field annotations are updated.", required=false)
|
@Argument(shortName="select", doc="One or more criteria to use when selecting the data. Evaluated *after* the specified samples are extracted and the INFO-field annotations are updated.", required=false)
|
||||||
|
|
@ -88,16 +92,38 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||||
|
|
||||||
if (SAMPLE_EXPRESSIONS != null) {
|
if (SAMPLE_EXPRESSIONS != null) {
|
||||||
// Let's first assume that the values in SAMPLE_EXPRESSIONS are literal sample names and not regular
|
// Let's first go through the list and see if we were given any files. We'll add every entry in the file to our
|
||||||
|
// sample list set, and treat the entries as if they had been specified on the command line.
|
||||||
|
Set<String> samplesFromFiles = new HashSet<String>();
|
||||||
|
for (String SAMPLE_EXPRESSION : SAMPLE_EXPRESSIONS) {
|
||||||
|
File sampleFile = new File(SAMPLE_EXPRESSION);
|
||||||
|
|
||||||
|
try {
|
||||||
|
XReadLines reader = new XReadLines(sampleFile);
|
||||||
|
|
||||||
|
List<String> lines = reader.readLines();
|
||||||
|
for (String line : lines) {
|
||||||
|
samplesFromFiles.add(line);
|
||||||
|
}
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
// ignore exception
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SAMPLE_EXPRESSIONS.addAll(samplesFromFiles);
|
||||||
|
|
||||||
|
// Let's now assume that the values in SAMPLE_EXPRESSIONS are literal sample names and not regular
|
||||||
// expressions. Extract those samples specifically so we don't make the mistake of selecting more
|
// expressions. Extract those samples specifically so we don't make the mistake of selecting more
|
||||||
// than what the user really wants.
|
// than what the user really wants.
|
||||||
for (String SAMPLE_EXPRESSION : SAMPLE_EXPRESSIONS) {
|
for (String SAMPLE_EXPRESSION : SAMPLE_EXPRESSIONS) {
|
||||||
|
if (!(new File(SAMPLE_EXPRESSION).exists())) {
|
||||||
if (vcfSamples.contains(SAMPLE_EXPRESSION)) {
|
if (vcfSamples.contains(SAMPLE_EXPRESSION)) {
|
||||||
samples.add(SAMPLE_EXPRESSION);
|
samples.add(SAMPLE_EXPRESSION);
|
||||||
} else {
|
} else {
|
||||||
possibleSampleRegexs.add(SAMPLE_EXPRESSION);
|
possibleSampleRegexs.add(SAMPLE_EXPRESSION);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Now, check the expressions that weren't used in the previous step, and use them as if they're regular expressions
|
// Now, check the expressions that weren't used in the previous step, and use them as if they're regular expressions
|
||||||
for (String sampleRegex : possibleSampleRegexs) {
|
for (String sampleRegex : possibleSampleRegexs) {
|
||||||
|
|
|
||||||
|
|
@ -13,9 +13,10 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testComplexSelection() {
|
public void testComplexSelection() {
|
||||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||||
|
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||||
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString(" -sn A -sn '[CDEH]' -env -ef -select 'AF < 0.2' -B variant,VCF," + testfile),
|
baseTestString(" -sn A -sn '[CDH]' -sn " + samplesFile + " -env -ef -select 'AF < 0.2' -B variant,VCF," + testfile),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("3a15628b5980031c629c0c33e7e60b40")
|
Arrays.asList("3a15628b5980031c629c0c33e7e60b40")
|
||||||
);
|
);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue