Change default behavior of SelectVariants to trim remaining alleles when samples are subset. -noTrim argument preserves original alleles. Add test for trimming.
This commit is contained in:
parent
31cb47b9e6
commit
783a4fd651
|
|
@ -378,4 +378,12 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
UserException.class);
|
UserException.class);
|
||||||
executeTest("InvalidJexl", spec);
|
executeTest("InvalidJexl", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAlleleTrimming() {
|
||||||
|
final String testFile = privateTestDir + "forHardLeftAlignVariantsTest.vcf";
|
||||||
|
final String cmd = "-T SelectVariants -R " + b36KGReference + " -sn NA12878 -env "
|
||||||
|
+ testFile + " -o %s --no_cmdline_in_header";
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("69c3f59c132418ec10117aa395addfea"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -100,7 +100,7 @@ public class SelectVariantsParallelIntegrationTest extends WalkerTest {
|
||||||
{ // AD and PL decoding race condition
|
{ // AD and PL decoding race condition
|
||||||
final String testfile = privateTestDir + "race_condition.vcf";
|
final String testfile = privateTestDir + "race_condition.vcf";
|
||||||
final String args = "-env -sn SAMPLE -L 1:1-10,000,000 -V " + testfile;
|
final String args = "-env -sn SAMPLE -L 1:1-10,000,000 -V " + testfile;
|
||||||
new ParallelSelectTestProvider(b37KGReference, args, "62e6156387d6e91bd2b08ef649cb1129", nt);
|
new ParallelSelectTestProvider(b37KGReference, args, "e86c6eb105ecdd3ff026999ffc692821", nt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -104,7 +104,7 @@ import java.util.*;
|
||||||
* -se 'SAMPLE.+PARC'
|
* -se 'SAMPLE.+PARC'
|
||||||
* -select "QD > 10.0"
|
* -select "QD > 10.0"
|
||||||
*
|
*
|
||||||
* Select a sample and exclude non-variant loci and filtered loci:
|
* Select a sample and exclude non-variant loci and filtered loci (trim remaining alleles by default):
|
||||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
* -R ref.fasta \
|
* -R ref.fasta \
|
||||||
* -T SelectVariants \
|
* -T SelectVariants \
|
||||||
|
|
@ -114,6 +114,16 @@ import java.util.*;
|
||||||
* -env \
|
* -env \
|
||||||
* -ef
|
* -ef
|
||||||
*
|
*
|
||||||
|
* Select a sample, subset remaining alleles, but don't trim:
|
||||||
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
|
* -R ref.fasta \
|
||||||
|
* -T SelectVariants \
|
||||||
|
* --variant input.vcf \
|
||||||
|
* -o output.vcf \
|
||||||
|
* -sn SAMPLE_1_ACTG \
|
||||||
|
* -env \
|
||||||
|
* -noTrim
|
||||||
|
*
|
||||||
* Select a sample and restrict the output vcf to a set of intervals:
|
* Select a sample and restrict the output vcf to a set of intervals:
|
||||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
* -R ref.fasta \
|
* -R ref.fasta \
|
||||||
|
|
@ -234,6 +244,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
@Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci in the analysis", required=false)
|
@Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci in the analysis", required=false)
|
||||||
protected boolean EXCLUDE_FILTERED = false;
|
protected boolean EXCLUDE_FILTERED = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default is to remove bases common to all remaining alleles, leaving only their minimal representation.
|
||||||
|
* If this argument is set, original alleles from input VCF will be preserved.
|
||||||
|
*/
|
||||||
|
@Argument(fullName="preserveAlleles", shortName="noTrim", doc="Preserve original alleles, do not trim", required=false)
|
||||||
|
protected boolean preserveAlleles = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When this argument is used, we can choose to include only multiallelic or biallelic sites, depending on how many alleles are listed in the ALT column of a vcf.
|
* When this argument is used, we can choose to include only multiallelic or biallelic sites, depending on how many alleles are listed in the ALT column of a vcf.
|
||||||
* For example, a multiallelic record such as:
|
* For example, a multiallelic record such as:
|
||||||
|
|
@ -509,7 +526,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
if ( containsIndelLargerThan(vc, maxIndelSize) )
|
if ( containsIndelLargerThan(vc, maxIndelSize) )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS);
|
VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS, preserveAlleles);
|
||||||
|
|
||||||
if ( (!EXCLUDE_NON_VARIANTS || sub.isPolymorphicInSamples()) && (!EXCLUDE_FILTERED || !sub.isFiltered()) ) {
|
if ( (!EXCLUDE_NON_VARIANTS || sub.isPolymorphicInSamples()) && (!EXCLUDE_FILTERED || !sub.isFiltered()) ) {
|
||||||
boolean failedJexlMatch = false;
|
boolean failedJexlMatch = false;
|
||||||
|
|
@ -665,7 +682,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
* @param excludeNonVariants should we exclude sites that have AC=0 for any alternate alleles?
|
* @param excludeNonVariants should we exclude sites that have AC=0 for any alternate alleles?
|
||||||
* @return the subsetted VariantContext
|
* @return the subsetted VariantContext
|
||||||
*/
|
*/
|
||||||
private VariantContext subsetRecord(final VariantContext vc, final boolean excludeNonVariants) {
|
private VariantContext subsetRecord(final VariantContext vc, final boolean excludeNonVariants, final boolean preserveAlleles) {
|
||||||
if ( NO_SAMPLES_SPECIFIED || samples.isEmpty() )
|
if ( NO_SAMPLES_SPECIFIED || samples.isEmpty() )
|
||||||
return vc;
|
return vc;
|
||||||
|
|
||||||
|
|
@ -702,7 +719,11 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
|
|
||||||
addAnnotations(builder, vc, sub.getSampleNames());
|
addAnnotations(builder, vc, sub.getSampleNames());
|
||||||
|
|
||||||
return builder.make();
|
final VariantContext subset = builder.make();
|
||||||
|
|
||||||
|
final VariantContext trimmed = preserveAlleles? subset : GATKVariantContextUtils.trimAlleles(subset,true,true);
|
||||||
|
|
||||||
|
return trimmed;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue