Merge pull request #1097 from broadinstitute/rhl_validatevariants_al_cutoff
Allow overriding ValidateVariants' hard-coded cutoff for allele length
This commit is contained in:
commit
aadf6645f6
|
|
@ -51,20 +51,25 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.variantutils;
|
package org.broadinstitute.gatk.tools.walkers.variantutils;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.log4j.Level;
|
||||||
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
public class ValidateVariantsIntegrationTest extends WalkerTest {
|
public class ValidateVariantsIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
protected static final String emptyMd5 = "d41d8cd98f00b204e9800998ecf8427e";
|
protected static final String EMPTY_MD5 = "d41d8cd98f00b204e9800998ecf8427e";
|
||||||
protected static final String defaultRegion = "1:10001292-10001303";
|
protected static final String DEFAULT_REGION = "1:10001292-10001303";
|
||||||
|
|
||||||
|
|
||||||
public static String baseTestString(final String file, String type) {
|
public static String baseTestString(final String file, String type) {
|
||||||
return baseTestString(file,type,defaultRegion,b36KGReference);
|
return baseTestString(file,type,DEFAULT_REGION,b36KGReference);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String baseTestString(String file, String type, String region, String reference) {
|
public static String baseTestString(String file, String type, String region, String reference) {
|
||||||
|
|
@ -88,7 +93,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString("validationExampleGood.vcf", "ALL"),
|
baseTestString("validationExampleGood.vcf", "ALL"),
|
||||||
0,
|
0,
|
||||||
Arrays.asList(emptyMd5)
|
Arrays.asList(EMPTY_MD5)
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("test good file", spec);
|
executeTest("test good file", spec);
|
||||||
|
|
@ -175,7 +180,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString("validationExampleBad.vcf", "-ALL"),
|
baseTestString("validationExampleBad.vcf", "-ALL"),
|
||||||
0,
|
0,
|
||||||
Arrays.asList(emptyMd5)
|
Arrays.asList(EMPTY_MD5)
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("test no validation", spec);
|
executeTest("test no validation", spec);
|
||||||
|
|
@ -186,7 +191,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString("complexEvents.vcf", "ALL"),
|
baseTestString("complexEvents.vcf", "ALL"),
|
||||||
0,
|
0,
|
||||||
Arrays.asList(emptyMd5)
|
Arrays.asList(EMPTY_MD5)
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("test validating complex events", spec);
|
executeTest("test validating complex events", spec);
|
||||||
|
|
@ -195,7 +200,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
||||||
@Test(description = "Fixes '''bug''' reported in story https://www.pivotaltracker.com/story/show/68725164")
|
@Test(description = "Fixes '''bug''' reported in story https://www.pivotaltracker.com/story/show/68725164")
|
||||||
public void testUnusedAlleleFix() {
|
public void testUnusedAlleleFix() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString("validationUnusedAllelesBugFix.vcf","-ALLELES","1:1-739000",b37KGReference),0,Arrays.asList(emptyMd5));
|
baseTestString("validationUnusedAllelesBugFix.vcf","-ALLELES","1:1-739000",b37KGReference),0,Arrays.asList(EMPTY_MD5));
|
||||||
executeTest("test unused allele bug fix", spec);
|
executeTest("test unused allele bug fix", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -205,4 +210,49 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
||||||
baseTestString("validationUnusedAllelesBugFix.vcf","ALLELES","1:1-739000",b37KGReference),0, UserException.FailsStrictValidation.class);
|
baseTestString("validationUnusedAllelesBugFix.vcf","ALLELES","1:1-739000",b37KGReference),0, UserException.FailsStrictValidation.class);
|
||||||
executeTest("test unused allele bug fix", spec);
|
executeTest("test unused allele bug fix", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(description = "Checks '''bug''' reported in issue https://github.com/broadinstitute/gsa-unstable/issues/963")
|
||||||
|
public void testLargeReferenceAlleleError() throws IOException {
|
||||||
|
// Need to see log INFO messages
|
||||||
|
Level level = logger.getLevel();
|
||||||
|
logger.setLevel(Level.INFO);
|
||||||
|
|
||||||
|
File logFile = createTempFile("testLargeReferenceAlleleError.log", ".tmp");
|
||||||
|
String logFileName = logFile.getAbsolutePath();
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString("longAlleles.vcf", "ALL", "1", b37KGReference) + " -log " + logFileName,
|
||||||
|
0, Arrays.asList(EMPTY_MD5));
|
||||||
|
executeTest("test long reference allele bug error", spec);
|
||||||
|
|
||||||
|
// Make sure the "reference allele too long" message is in the log
|
||||||
|
Assert.assertTrue(FileUtils.readFileToString(logFile).contains(ValidateVariants.REFERENCE_ALLELE_TOO_LONG_MSG));
|
||||||
|
|
||||||
|
// Set the log level back
|
||||||
|
logger.setLevel(level);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(description = "Checks '''bug''' is fixed, reported in issue https://github.com/broadinstitute/gsa-unstable/issues/963")
|
||||||
|
public void testLargeReferenceAlleleFix() throws IOException {
|
||||||
|
// Need to see log INFO messages
|
||||||
|
Level level = logger.getLevel();
|
||||||
|
logger.setLevel(Level.INFO);
|
||||||
|
|
||||||
|
File logFile = createTempFile("testLargeReferenceAllele.log", ".tmp");
|
||||||
|
String logFileName = logFile.getAbsolutePath();
|
||||||
|
|
||||||
|
// expand window for the large reference allele
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString("longAlleles.vcf","ALL","1",b37KGReference) + " --reference_window_stop 208 -log " + logFileName,
|
||||||
|
0, Arrays.asList(EMPTY_MD5));
|
||||||
|
executeTest("test long reference allele bug fix", spec);
|
||||||
|
|
||||||
|
// Make sure the "reference allele too long" message is not in the log
|
||||||
|
Assert.assertFalse(FileUtils.readFileToString(logFile).contains(ValidateVariants.REFERENCE_ALLELE_TOO_LONG_MSG));
|
||||||
|
|
||||||
|
// All of the validation tests have passed since UserException.FailsStrictValidation is not thrown.
|
||||||
|
|
||||||
|
// Set the log level back
|
||||||
|
logger.setLevel(level);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,9 @@ import java.util.concurrent.TimeUnit;
|
||||||
*/
|
*/
|
||||||
public class GATKArgumentCollection {
|
public class GATKArgumentCollection {
|
||||||
|
|
||||||
|
// the default value of the stop of the expanded window
|
||||||
|
public static final int DEFAULT_REFERENCE_WINDOW_STOP = 0;
|
||||||
|
|
||||||
/** the constructor */
|
/** the constructor */
|
||||||
public GATKArgumentCollection() {
|
public GATKArgumentCollection() {
|
||||||
}
|
}
|
||||||
|
|
@ -663,5 +666,17 @@ public class GATKArgumentCollection {
|
||||||
@Argument(fullName="variant_index_parameter",shortName = "variant_index_parameter",doc="Parameter to pass to the VCF/BCF IndexCreator",required=false)
|
@Argument(fullName="variant_index_parameter",shortName = "variant_index_parameter",doc="Parameter to pass to the VCF/BCF IndexCreator",required=false)
|
||||||
@Advanced
|
@Advanced
|
||||||
public int variant_index_parameter = GATKVCFUtils.DEFAULT_INDEX_PARAMETER;
|
public int variant_index_parameter = GATKVCFUtils.DEFAULT_INDEX_PARAMETER;
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Window arguments
|
||||||
|
//
|
||||||
|
// -------------------------------------------------------------------------------------------------------------
|
||||||
|
/**
|
||||||
|
* Stop of the expanded window for which the reference context should be provided, relative to the locus.
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "reference_window_stop", shortName = "ref_win_stop", doc = "Reference window stop", minValue = 0, required = false)
|
||||||
|
@Advanced
|
||||||
|
public int reference_window_stop = DEFAULT_REFERENCE_WINDOW_STOP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.gatk.engine.datasources.providers;
|
package org.broadinstitute.gatk.engine.datasources.providers;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.ReferenceSequence;
|
import htsjdk.samtools.reference.ReferenceSequence;
|
||||||
|
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.gatk.engine.walkers.Reference;
|
import org.broadinstitute.gatk.engine.walkers.Reference;
|
||||||
import org.broadinstitute.gatk.engine.walkers.Walker;
|
import org.broadinstitute.gatk.engine.walkers.Walker;
|
||||||
|
|
@ -74,7 +75,7 @@ public class LocusReferenceView extends ReferenceView {
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start of the expanded window for which the reference context should be provided,
|
* Stop of the expanded window for which the reference context should be provided,
|
||||||
* relative to the locus in question.
|
* relative to the locus in question.
|
||||||
*/
|
*/
|
||||||
private final int windowStop;
|
private final int windowStop;
|
||||||
|
|
@ -99,10 +100,11 @@ public class LocusReferenceView extends ReferenceView {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new locus reference view.
|
* Create a new locus reference view.
|
||||||
|
* @param walker input walker
|
||||||
* @param provider source for locus data.
|
* @param provider source for locus data.
|
||||||
*/
|
*/
|
||||||
public LocusReferenceView( Walker walker, LocusShardDataProvider provider ) {
|
public LocusReferenceView( Walker walker, LocusShardDataProvider provider ) {
|
||||||
super( provider );
|
super(provider);
|
||||||
initializeBounds(provider);
|
initializeBounds(provider);
|
||||||
|
|
||||||
// Retrieve information about the window being accessed.
|
// Retrieve information about the window being accessed.
|
||||||
|
|
@ -113,11 +115,22 @@ public class LocusReferenceView extends ReferenceView {
|
||||||
if( window.stop() < 0 ) throw new ReviewedGATKException( "Reference window ends before current locus" );
|
if( window.stop() < 0 ) throw new ReviewedGATKException( "Reference window ends before current locus" );
|
||||||
|
|
||||||
windowStart = window.start();
|
windowStart = window.start();
|
||||||
windowStop = window.stop();
|
|
||||||
|
if ( walker.getArguments() == null ){
|
||||||
|
windowStop = window.stop();
|
||||||
|
} else {
|
||||||
|
// Use reference arguments if set, otherwise use the annotation
|
||||||
|
windowStop = walker.getArguments().reference_window_stop != GATKArgumentCollection.DEFAULT_REFERENCE_WINDOW_STOP ?
|
||||||
|
walker.getArguments().reference_window_stop : window.stop();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
windowStart = 0;
|
windowStart = 0;
|
||||||
windowStop = 0;
|
if ( walker.getArguments() == null ){
|
||||||
|
windowStop = 0;
|
||||||
|
} else {
|
||||||
|
windowStop = walker.getArguments().reference_window_stop;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(bounds != null) {
|
if(bounds != null) {
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.gatk.utils.baq.BAQ;
|
||||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.gatk.engine.recalibration.BQSRMode;
|
import org.broadinstitute.gatk.engine.recalibration.BQSRMode;
|
||||||
|
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -85,18 +86,49 @@ public abstract class Walker<MapType, ReduceType> {
|
||||||
/**
|
/**
|
||||||
* Gets the master sequence dictionary for this walker
|
* Gets the master sequence dictionary for this walker
|
||||||
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
|
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
|
||||||
* @return
|
* @return the master sequence dictionary or null if no genome analysis toolkit.
|
||||||
*/
|
*/
|
||||||
protected SAMSequenceDictionary getMasterSequenceDictionary() {
|
protected SAMSequenceDictionary getMasterSequenceDictionary() {
|
||||||
return getToolkit().getMasterSequenceDictionary();
|
if ( toolkit == null )
|
||||||
|
return null;
|
||||||
|
else
|
||||||
|
return toolkit.getMasterSequenceDictionary();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the GATK argument collection
|
||||||
|
* @link GenomeAnalysisEngine.getArguments
|
||||||
|
* @return the GATK argument collection or null if no genome analysis toolkit.
|
||||||
|
*/
|
||||||
|
public GATKArgumentCollection getArguments(){
|
||||||
|
if ( toolkit == null )
|
||||||
|
return null;
|
||||||
|
else
|
||||||
|
return toolkit.getArguments();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the GATK samples database
|
||||||
|
* @link GenomeAnalysisEngine.getSampleDB
|
||||||
|
* @return the GATK samples database or null if no genome analysis toolkit.
|
||||||
|
*/
|
||||||
public SampleDB getSampleDB() {
|
public SampleDB getSampleDB() {
|
||||||
return getToolkit().getSampleDB();
|
if ( toolkit == null )
|
||||||
|
return null;
|
||||||
|
else
|
||||||
|
return toolkit.getSampleDB();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a sample from the GATK samples database
|
||||||
|
* @param id the sample ID
|
||||||
|
* @return the sample from the GATK samples database or null if no genome analysis toolkit or samples database.
|
||||||
|
*/
|
||||||
protected Sample getSample(final String id) {
|
protected Sample getSample(final String id) {
|
||||||
return getToolkit().getSampleDB().getSample(id);
|
if ( getSampleDB() == null )
|
||||||
|
return null;
|
||||||
|
else
|
||||||
|
return getSampleDB().getSample(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -91,6 +91,16 @@ import java.util.*;
|
||||||
* --dbsnp dbsnp.vcf
|
* --dbsnp dbsnp.vcf
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
|
* <h4>To perform VCF format tests and all strict validations with the VCFs containing alleles > 208 bases</h4>
|
||||||
|
* <pre>
|
||||||
|
* java -jar GenomeAnalysisTK.jar \
|
||||||
|
* -T ValidateVariants \
|
||||||
|
* -R reference.fasta \
|
||||||
|
* -V input.vcf \
|
||||||
|
* --dbsnp dbsnp.vcf
|
||||||
|
* --reference_window_stop 208
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
* <h4>To perform only VCF format tests</h4>
|
* <h4>To perform only VCF format tests</h4>
|
||||||
* <pre>
|
* <pre>
|
||||||
* java -jar GenomeAnalysisTK.jar \
|
* java -jar GenomeAnalysisTK.jar \
|
||||||
|
|
@ -114,6 +124,9 @@ import java.util.*;
|
||||||
@Reference(window=@Window(start=0,stop=100))
|
@Reference(window=@Window(start=0,stop=100))
|
||||||
public class ValidateVariants extends RodWalker<Integer, Integer> {
|
public class ValidateVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
// Log message for a reference allele that is too long
|
||||||
|
protected static final String REFERENCE_ALLELE_TOO_LONG_MSG = "Reference allele is too long";
|
||||||
|
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||||
|
|
||||||
|
|
@ -181,6 +194,9 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
private File file = null;
|
private File file = null;
|
||||||
|
|
||||||
|
// Stop of the expanded window for which the reference context should be provided, relative to the locus.
|
||||||
|
private int referenceWindowStop;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Contains final set of validation to apply.
|
* Contains final set of validation to apply.
|
||||||
*/
|
*/
|
||||||
|
|
@ -189,6 +205,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
file = new File(variantCollection.variants.getSource());
|
file = new File(variantCollection.variants.getSource());
|
||||||
validationTypes = calculateValidationTypesToApply(excludeTypes);
|
validationTypes = calculateValidationTypesToApply(excludeTypes);
|
||||||
|
referenceWindowStop = getToolkit().getArguments().reference_window_stop;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
@ -220,8 +237,11 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
||||||
// get the true reference allele
|
// get the true reference allele
|
||||||
final Allele reportedRefAllele = vc.getReference();
|
final Allele reportedRefAllele = vc.getReference();
|
||||||
final int refLength = reportedRefAllele.length();
|
final int refLength = reportedRefAllele.length();
|
||||||
if ( refLength > 100 ) {
|
|
||||||
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", refLength, vc.getChr(), vc.getStart()));
|
// reference length is greater than the reference window stop before and after expansion
|
||||||
|
if ( refLength > 100 && refLength > referenceWindowStop ) {
|
||||||
|
logger.info(String.format("%s (%d) at position %s:%d; skipping that record. Set --referenceWindowStop >= %d",
|
||||||
|
REFERENCE_ALLELE_TOO_LONG_MSG, refLength, vc.getChr(), vc.getStart(), refLength));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -259,7 +279,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
||||||
* @return never {@code null} but perhaps an empty set.
|
* @return never {@code null} but perhaps an empty set.
|
||||||
*/
|
*/
|
||||||
private Collection<ValidationType> calculateValidationTypesToApply(final List<ValidationType> excludeTypes) {
|
private Collection<ValidationType> calculateValidationTypesToApply(final List<ValidationType> excludeTypes) {
|
||||||
if (excludeTypes.size() == 0)
|
if (excludeTypes.isEmpty())
|
||||||
return Collections.singleton(ValidationType.ALL);
|
return Collections.singleton(ValidationType.ALL);
|
||||||
final Set<ValidationType> excludeTypeSet = new LinkedHashSet<>(excludeTypes);
|
final Set<ValidationType> excludeTypeSet = new LinkedHashSet<>(excludeTypes);
|
||||||
if (excludeTypes.size() != excludeTypeSet.size())
|
if (excludeTypes.size() != excludeTypeSet.size())
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue