Allow LeftAlignAndTrimVariants to handle alleles longer than the default processing window
This commit is contained in:
parent
0d936b28c4
commit
36ca9fe898
|
|
@ -51,9 +51,14 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.variantutils;
|
package org.broadinstitute.gatk.tools.walkers.variantutils;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.log4j.Level;
|
||||||
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||||
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -70,6 +75,53 @@ public class LeftAlignAndTrimVariantsIntegrationTest extends WalkerTest {
|
||||||
executeTest("test left alignment", spec);
|
executeTest("test left alignment", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLeftAlignmentLongAllelesError() throws IOException {
|
||||||
|
|
||||||
|
// Need to see log INFO messages
|
||||||
|
Level level = logger.getLevel();
|
||||||
|
logger.setLevel(Level.INFO);
|
||||||
|
|
||||||
|
File logFile = createTempFile("testLargeReferenceAlleleError.log", ".tmp");
|
||||||
|
String logFileName = logFile.getAbsolutePath();
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "longAlleles.vcf --no_cmdline_in_header -log " + logFileName,
|
||||||
|
1,
|
||||||
|
Arrays.asList("136f88a5bd07a022a3404089359cb8ee"));
|
||||||
|
executeTest("test left alignment with long alleles with an error", spec);
|
||||||
|
|
||||||
|
// Make sure the "reference allele too long" message is in the log
|
||||||
|
Assert.assertTrue(FileUtils.readFileToString(logFile).contains(ValidateVariants.REFERENCE_ALLELE_TOO_LONG_MSG));
|
||||||
|
|
||||||
|
// Set the log level back
|
||||||
|
logger.setLevel(level);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLeftAlignmentLongAllelesFix() throws IOException {
|
||||||
|
|
||||||
|
// Need to see log INFO messages
|
||||||
|
Level level = logger.getLevel();
|
||||||
|
logger.setLevel(Level.INFO);
|
||||||
|
|
||||||
|
File logFile = createTempFile("testLargeReferenceAlleleError.log", ".tmp");
|
||||||
|
String logFileName = logFile.getAbsolutePath();
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir +
|
||||||
|
"longAlleles.vcf --no_cmdline_in_header --reference_window_stop 208 -log " + logFileName,
|
||||||
|
1,
|
||||||
|
Arrays.asList("c4ca5520ee499da171053059e3717b2f"));
|
||||||
|
executeTest("test left alignment with long alleles fix", spec);
|
||||||
|
|
||||||
|
// Make sure the "reference allele too long" message is in the log
|
||||||
|
Assert.assertFalse(FileUtils.readFileToString(logFile).contains(ValidateVariants.REFERENCE_ALLELE_TOO_LONG_MSG));
|
||||||
|
|
||||||
|
// Set the log level back
|
||||||
|
logger.setLevel(level);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testLeftAlignmentDontTrim() {
|
public void testLeftAlignmentDontTrim() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
|
|
||||||
|
|
@ -94,17 +94,27 @@ import java.util.*;
|
||||||
* -T LeftAlignAndTrimVariants \
|
* -T LeftAlignAndTrimVariants \
|
||||||
* -R reference.fasta \
|
* -R reference.fasta \
|
||||||
* --variant input.vcf \
|
* --variant input.vcf \
|
||||||
* -o output.vcf
|
* -o output.vcf \
|
||||||
* --dontTrimAlleles
|
* --dontTrimAlleles
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
|
* <h4>Left align and trim alleles, process alleles <= 208 bases</h4>
|
||||||
|
* <pre>
|
||||||
|
* java -jar GenomeAnalysisTK.jar \
|
||||||
|
* -T LeftAlignAndTrimVariants \
|
||||||
|
* -R reference.fasta \
|
||||||
|
* --variant input.vcf \
|
||||||
|
* -o output.vcf \
|
||||||
|
* --reference_window_stop 208
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
* <h4>Split multiallics into biallelics, left align and trim alleles</h4>
|
* <h4>Split multiallics into biallelics, left align and trim alleles</h4>
|
||||||
* <pre>
|
* <pre>
|
||||||
* java -jar GenomeAnalysisTK.jar \
|
* java -jar GenomeAnalysisTK.jar \
|
||||||
* -T LeftAlignAndTrimVariants \
|
* -T LeftAlignAndTrimVariants \
|
||||||
* -R reference.fasta \
|
* -R reference.fasta \
|
||||||
* --variant input.vcf \
|
* --variant input.vcf \
|
||||||
* -o output.vcf
|
* -o output.vcf \
|
||||||
* --splitMultiallelics
|
* --splitMultiallelics
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
|
|
@ -114,8 +124,8 @@ import java.util.*;
|
||||||
* -T LeftAlignAndTrimVariants \
|
* -T LeftAlignAndTrimVariants \
|
||||||
* -R reference.fasta \
|
* -R reference.fasta \
|
||||||
* --variant input.vcf \
|
* --variant input.vcf \
|
||||||
* -o output.vcf
|
* -o output.vcf \
|
||||||
* --splitMultiallelics
|
* --splitMultiallelics \
|
||||||
* --dontTrimAlleles
|
* --dontTrimAlleles
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
|
|
@ -124,6 +134,9 @@ import java.util.*;
|
||||||
@Reference(window=@Window(start=-200,stop=200)) // WARNING: if this changes,MAX_INDEL_LENGTH needs to change as well!
|
@Reference(window=@Window(start=-200,stop=200)) // WARNING: if this changes,MAX_INDEL_LENGTH needs to change as well!
|
||||||
public class LeftAlignAndTrimVariants extends RodWalker<Integer, Integer> {
|
public class LeftAlignAndTrimVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
// Log message for a reference allele that is too long
|
||||||
|
protected static final String REFERENCE_ALLELE_TOO_LONG_MSG = "Reference allele is too long";
|
||||||
|
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||||
|
|
||||||
|
|
@ -147,6 +160,10 @@ public class LeftAlignAndTrimVariants extends RodWalker<Integer, Integer> {
|
||||||
private VariantContextWriter writer;
|
private VariantContextWriter writer;
|
||||||
|
|
||||||
private static final int MAX_INDEL_LENGTH = 200; // needs to match reference window size!
|
private static final int MAX_INDEL_LENGTH = 200; // needs to match reference window size!
|
||||||
|
|
||||||
|
// Stop of the expanded window for which the reference context should be provided, relative to the locus.
|
||||||
|
private int referenceWindowStop;
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
String trackName = variantCollection.variants.getName();
|
String trackName = variantCollection.variants.getName();
|
||||||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
|
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
|
||||||
|
|
@ -155,7 +172,9 @@ public class LeftAlignAndTrimVariants extends RodWalker<Integer, Integer> {
|
||||||
Set<VCFHeaderLine> headerLines = vcfHeaders.get(trackName).getMetaDataInSortedOrder();
|
Set<VCFHeaderLine> headerLines = vcfHeaders.get(trackName).getMetaDataInSortedOrder();
|
||||||
baseWriter.writeHeader(new VCFHeader(headerLines, samples));
|
baseWriter.writeHeader(new VCFHeader(headerLines, samples));
|
||||||
|
|
||||||
writer = VariantContextWriterFactory.sortOnTheFly(baseWriter, 200);
|
writer = VariantContextWriterFactory.sortOnTheFly(baseWriter, MAX_INDEL_LENGTH);
|
||||||
|
|
||||||
|
referenceWindowStop = getToolkit().getArguments().reference_window_stop;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
@ -203,6 +222,15 @@ public class LeftAlignAndTrimVariants extends RodWalker<Integer, Integer> {
|
||||||
@Requires("vc != null")
|
@Requires("vc != null")
|
||||||
protected int trimAlignWrite(final VariantContext vc, final ReferenceContext ref, final int numBiallelics ){
|
protected int trimAlignWrite(final VariantContext vc, final ReferenceContext ref, final int numBiallelics ){
|
||||||
|
|
||||||
|
final int refLength = vc.getReference().length();
|
||||||
|
|
||||||
|
// ignore if the reference length is greater than the reference window stop before and after expansion
|
||||||
|
if ( refLength > MAX_INDEL_LENGTH && refLength > referenceWindowStop ) {
|
||||||
|
logger.info(String.format("%s (%d) at position %s:%d; skipping that record. Set --referenceWindowStop >= %d",
|
||||||
|
REFERENCE_ALLELE_TOO_LONG_MSG, refLength, vc.getChr(), vc.getStart(), refLength));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// optionally don't trim VC
|
// optionally don't trim VC
|
||||||
final VariantContext v = dontTrimAlleles ? vc : GATKVariantContextUtils.trimAlleles(vc, true, true);
|
final VariantContext v = dontTrimAlleles ? vc : GATKVariantContextUtils.trimAlleles(vc, true, true);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -91,7 +91,7 @@ import java.util.*;
|
||||||
* --dbsnp dbsnp.vcf
|
* --dbsnp dbsnp.vcf
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* <h4>To perform VCF format tests and all strict validations with the VCFs containing alleles > 208 bases</h4>
|
* <h4>To perform VCF format tests and all strict validations with the VCFs containing alleles <= 208 bases</h4>
|
||||||
* <pre>
|
* <pre>
|
||||||
* java -jar GenomeAnalysisTK.jar \
|
* java -jar GenomeAnalysisTK.jar \
|
||||||
* -T ValidateVariants \
|
* -T ValidateVariants \
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue