Allow overriding ValidateVariants' hard-coded cutoff for allele length
This commit is contained in:
parent
5870225f83
commit
3ecabf7e45
|
|
@ -51,20 +51,25 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.variantutils;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.log4j.Level;
|
||||
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class ValidateVariantsIntegrationTest extends WalkerTest {
|
||||
|
||||
protected static final String emptyMd5 = "d41d8cd98f00b204e9800998ecf8427e";
|
||||
protected static final String defaultRegion = "1:10001292-10001303";
|
||||
protected static final String EMPTY_MD5 = "d41d8cd98f00b204e9800998ecf8427e";
|
||||
protected static final String DEFAULT_REGION = "1:10001292-10001303";
|
||||
|
||||
|
||||
public static String baseTestString(final String file, String type) {
|
||||
return baseTestString(file,type,defaultRegion,b36KGReference);
|
||||
return baseTestString(file,type,DEFAULT_REGION,b36KGReference);
|
||||
}
|
||||
|
||||
public static String baseTestString(String file, String type, String region, String reference) {
|
||||
|
|
@ -88,7 +93,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationExampleGood.vcf", "ALL"),
|
||||
0,
|
||||
Arrays.asList(emptyMd5)
|
||||
Arrays.asList(EMPTY_MD5)
|
||||
);
|
||||
|
||||
executeTest("test good file", spec);
|
||||
|
|
@ -175,7 +180,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationExampleBad.vcf", "-ALL"),
|
||||
0,
|
||||
Arrays.asList(emptyMd5)
|
||||
Arrays.asList(EMPTY_MD5)
|
||||
);
|
||||
|
||||
executeTest("test no validation", spec);
|
||||
|
|
@ -186,7 +191,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("complexEvents.vcf", "ALL"),
|
||||
0,
|
||||
Arrays.asList(emptyMd5)
|
||||
Arrays.asList(EMPTY_MD5)
|
||||
);
|
||||
|
||||
executeTest("test validating complex events", spec);
|
||||
|
|
@ -195,7 +200,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
@Test(description = "Fixes '''bug''' reported in story https://www.pivotaltracker.com/story/show/68725164")
|
||||
public void testUnusedAlleleFix() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationUnusedAllelesBugFix.vcf","-ALLELES","1:1-739000",b37KGReference),0,Arrays.asList(emptyMd5));
|
||||
baseTestString("validationUnusedAllelesBugFix.vcf","-ALLELES","1:1-739000",b37KGReference),0,Arrays.asList(EMPTY_MD5));
|
||||
executeTest("test unused allele bug fix", spec);
|
||||
}
|
||||
|
||||
|
|
@ -205,4 +210,49 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
baseTestString("validationUnusedAllelesBugFix.vcf","ALLELES","1:1-739000",b37KGReference),0, UserException.FailsStrictValidation.class);
|
||||
executeTest("test unused allele bug fix", spec);
|
||||
}
|
||||
|
||||
@Test(description = "Checks '''bug''' reported in issue https://github.com/broadinstitute/gsa-unstable/issues/963")
|
||||
public void testLargeReferenceAlleleError() throws IOException {
|
||||
// Need to see log INFO messages
|
||||
Level level = logger.getLevel();
|
||||
logger.setLevel(Level.INFO);
|
||||
|
||||
File logFile = createTempFile("testLargeReferenceAlleleError.log", ".tmp");
|
||||
String logFileName = logFile.getAbsolutePath();
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("longAlleles.vcf", "ALL", "1", b37KGReference) + " -log " + logFileName,
|
||||
0, Arrays.asList(EMPTY_MD5));
|
||||
executeTest("test long reference allele bug error", spec);
|
||||
|
||||
// Make sure the "reference allele too long" message is in the log
|
||||
Assert.assertTrue(FileUtils.readFileToString(logFile).contains(ValidateVariants.REFERENCE_ALLELE_TOO_LONG_MSG));
|
||||
|
||||
// Set the log level back
|
||||
logger.setLevel(level);
|
||||
}
|
||||
|
||||
@Test(description = "Checks '''bug''' is fixed, reported in issue https://github.com/broadinstitute/gsa-unstable/issues/963")
|
||||
public void testLargeReferenceAlleleFix() throws IOException {
|
||||
// Need to see log INFO messages
|
||||
Level level = logger.getLevel();
|
||||
logger.setLevel(Level.INFO);
|
||||
|
||||
File logFile = createTempFile("testLargeReferenceAllele.log", ".tmp");
|
||||
String logFileName = logFile.getAbsolutePath();
|
||||
|
||||
// expand window for the large reference allele
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("longAlleles.vcf","ALL","1",b37KGReference) + " --reference_window_stop 208 -log " + logFileName,
|
||||
0, Arrays.asList(EMPTY_MD5));
|
||||
executeTest("test long reference allele bug fix", spec);
|
||||
|
||||
// Make sure the "reference allele too long" message is not in the log
|
||||
Assert.assertFalse(FileUtils.readFileToString(logFile).contains(ValidateVariants.REFERENCE_ALLELE_TOO_LONG_MSG));
|
||||
|
||||
// All of the validation tests have passed since UserException.FailsStrictValidation is not thrown.
|
||||
|
||||
// Set the log level back
|
||||
logger.setLevel(level);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,6 +50,9 @@ import java.util.concurrent.TimeUnit;
|
|||
*/
|
||||
public class GATKArgumentCollection {
|
||||
|
||||
// the default value of the stop of the expanded window
|
||||
public static final int DEFAULT_REFERENCE_WINDOW_STOP = 0;
|
||||
|
||||
/** the constructor */
|
||||
public GATKArgumentCollection() {
|
||||
}
|
||||
|
|
@ -663,5 +666,17 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName="variant_index_parameter",shortName = "variant_index_parameter",doc="Parameter to pass to the VCF/BCF IndexCreator",required=false)
|
||||
@Advanced
|
||||
public int variant_index_parameter = GATKVCFUtils.DEFAULT_INDEX_PARAMETER;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Window arguments
|
||||
//
|
||||
// -------------------------------------------------------------------------------------------------------------
|
||||
/**
|
||||
* Stop of the expanded window for which the reference context should be provided, relative to the locus.
|
||||
*/
|
||||
@Argument(fullName = "reference_window_stop", shortName = "ref_win_stop", doc = "Reference window stop", minValue = 0, required = false)
|
||||
@Advanced
|
||||
public int reference_window_stop = DEFAULT_REFERENCE_WINDOW_STOP;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.gatk.engine.datasources.providers;
|
||||
|
||||
import htsjdk.samtools.reference.ReferenceSequence;
|
||||
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.walkers.Reference;
|
||||
import org.broadinstitute.gatk.engine.walkers.Walker;
|
||||
|
|
@ -74,7 +75,7 @@ public class LocusReferenceView extends ReferenceView {
|
|||
|
||||
|
||||
/**
|
||||
* Start of the expanded window for which the reference context should be provided,
|
||||
* Stop of the expanded window for which the reference context should be provided,
|
||||
* relative to the locus in question.
|
||||
*/
|
||||
private final int windowStop;
|
||||
|
|
@ -99,10 +100,11 @@ public class LocusReferenceView extends ReferenceView {
|
|||
|
||||
/**
|
||||
* Create a new locus reference view.
|
||||
* @param walker input walker
|
||||
* @param provider source for locus data.
|
||||
*/
|
||||
public LocusReferenceView( Walker walker, LocusShardDataProvider provider ) {
|
||||
super( provider );
|
||||
super(provider);
|
||||
initializeBounds(provider);
|
||||
|
||||
// Retrieve information about the window being accessed.
|
||||
|
|
@ -113,11 +115,22 @@ public class LocusReferenceView extends ReferenceView {
|
|||
if( window.stop() < 0 ) throw new ReviewedGATKException( "Reference window ends before current locus" );
|
||||
|
||||
windowStart = window.start();
|
||||
windowStop = window.stop();
|
||||
|
||||
if ( walker.getArguments() == null ){
|
||||
windowStop = window.stop();
|
||||
} else {
|
||||
// Use reference arguments if set, otherwise use the annotation
|
||||
windowStop = walker.getArguments().reference_window_stop != GATKArgumentCollection.DEFAULT_REFERENCE_WINDOW_STOP ?
|
||||
walker.getArguments().reference_window_stop : window.stop();
|
||||
}
|
||||
}
|
||||
else {
|
||||
windowStart = 0;
|
||||
windowStop = 0;
|
||||
if ( walker.getArguments() == null ){
|
||||
windowStop = 0;
|
||||
} else {
|
||||
windowStop = walker.getArguments().reference_window_stop;
|
||||
}
|
||||
}
|
||||
|
||||
if(bounds != null) {
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.gatk.utils.baq.BAQ;
|
|||
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.gatk.engine.recalibration.BQSRMode;
|
||||
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -85,18 +86,49 @@ public abstract class Walker<MapType, ReduceType> {
|
|||
/**
|
||||
* Gets the master sequence dictionary for this walker
|
||||
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
|
||||
* @return
|
||||
* @return the master sequence dictionary or null if no genome analysis toolkit.
|
||||
*/
|
||||
protected SAMSequenceDictionary getMasterSequenceDictionary() {
|
||||
return getToolkit().getMasterSequenceDictionary();
|
||||
if ( toolkit == null )
|
||||
return null;
|
||||
else
|
||||
return toolkit.getMasterSequenceDictionary();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the GATK argument collection
|
||||
* @link GenomeAnalysisEngine.getArguments
|
||||
* @return the GATK argument collection or null if no genome analysis toolkit.
|
||||
*/
|
||||
public GATKArgumentCollection getArguments(){
|
||||
if ( toolkit == null )
|
||||
return null;
|
||||
else
|
||||
return toolkit.getArguments();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the GATK samples database
|
||||
* @link GenomeAnalysisEngine.getSampleDB
|
||||
* @return the GATK samples database or null if no genome analysis toolkit.
|
||||
*/
|
||||
public SampleDB getSampleDB() {
|
||||
return getToolkit().getSampleDB();
|
||||
if ( toolkit == null )
|
||||
return null;
|
||||
else
|
||||
return toolkit.getSampleDB();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a sample from the GATK samples database
|
||||
* @param id the sample ID
|
||||
* @return the sample from the GATK samples database or null if no genome analysis toolkit or samples database.
|
||||
*/
|
||||
protected Sample getSample(final String id) {
|
||||
return getToolkit().getSampleDB().getSample(id);
|
||||
if ( getSampleDB() == null )
|
||||
return null;
|
||||
else
|
||||
return getSampleDB().getSample(id);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -91,6 +91,16 @@ import java.util.*;
|
|||
* --dbsnp dbsnp.vcf
|
||||
* </pre>
|
||||
*
|
||||
* <h4>To perform VCF format tests and all strict validations with the VCFs containing alleles > 208 bases</h4>
|
||||
* <pre>
|
||||
* java -jar GenomeAnalysisTK.jar \
|
||||
* -T ValidateVariants \
|
||||
* -R reference.fasta \
|
||||
* -V input.vcf \
|
||||
* --dbsnp dbsnp.vcf
|
||||
* --reference_window_stop 208
|
||||
* </pre>
|
||||
*
|
||||
* <h4>To perform only VCF format tests</h4>
|
||||
* <pre>
|
||||
* java -jar GenomeAnalysisTK.jar \
|
||||
|
|
@ -114,6 +124,9 @@ import java.util.*;
|
|||
@Reference(window=@Window(start=0,stop=100))
|
||||
public class ValidateVariants extends RodWalker<Integer, Integer> {
|
||||
|
||||
// Log message for a reference allele that is too long
|
||||
protected static final String REFERENCE_ALLELE_TOO_LONG_MSG = "Reference allele is too long";
|
||||
|
||||
@ArgumentCollection
|
||||
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||
|
||||
|
|
@ -181,6 +194,9 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
private File file = null;
|
||||
|
||||
// Stop of the expanded window for which the reference context should be provided, relative to the locus.
|
||||
private int referenceWindowStop;
|
||||
|
||||
/**
|
||||
* Contains final set of validation to apply.
|
||||
*/
|
||||
|
|
@ -189,6 +205,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
public void initialize() {
|
||||
file = new File(variantCollection.variants.getSource());
|
||||
validationTypes = calculateValidationTypesToApply(excludeTypes);
|
||||
referenceWindowStop = getToolkit().getArguments().reference_window_stop;
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
|
@ -220,8 +237,11 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
// get the true reference allele
|
||||
final Allele reportedRefAllele = vc.getReference();
|
||||
final int refLength = reportedRefAllele.length();
|
||||
if ( refLength > 100 ) {
|
||||
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", refLength, vc.getChr(), vc.getStart()));
|
||||
|
||||
// reference length is greater than the reference window stop before and after expansion
|
||||
if ( refLength > 100 && refLength > referenceWindowStop ) {
|
||||
logger.info(String.format("%s (%d) at position %s:%d; skipping that record. Set --referenceWindowStop >= %d",
|
||||
REFERENCE_ALLELE_TOO_LONG_MSG, refLength, vc.getChr(), vc.getStart(), refLength));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -259,7 +279,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
* @return never {@code null} but perhaps an empty set.
|
||||
*/
|
||||
private Collection<ValidationType> calculateValidationTypesToApply(final List<ValidationType> excludeTypes) {
|
||||
if (excludeTypes.size() == 0)
|
||||
if (excludeTypes.isEmpty())
|
||||
return Collections.singleton(ValidationType.ALL);
|
||||
final Set<ValidationType> excludeTypeSet = new LinkedHashSet<>(excludeTypes);
|
||||
if (excludeTypes.size() != excludeTypeSet.size())
|
||||
|
|
|
|||
Loading…
Reference in New Issue