Allow overriding ValidateVariants' hard-coded cutoff for allele length

This commit is contained in:
Ron Levine 2015-07-27 09:27:16 -04:00
parent 5870225f83
commit 3ecabf7e45
5 changed files with 148 additions and 18 deletions

View File

@ -51,20 +51,25 @@
package org.broadinstitute.gatk.tools.walkers.variantutils;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Level;
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
public class ValidateVariantsIntegrationTest extends WalkerTest {
protected static final String emptyMd5 = "d41d8cd98f00b204e9800998ecf8427e";
protected static final String defaultRegion = "1:10001292-10001303";
protected static final String EMPTY_MD5 = "d41d8cd98f00b204e9800998ecf8427e";
protected static final String DEFAULT_REGION = "1:10001292-10001303";
public static String baseTestString(final String file, String type) {
return baseTestString(file,type,defaultRegion,b36KGReference);
return baseTestString(file,type,DEFAULT_REGION,b36KGReference);
}
public static String baseTestString(String file, String type, String region, String reference) {
@ -88,7 +93,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleGood.vcf", "ALL"),
0,
Arrays.asList(emptyMd5)
Arrays.asList(EMPTY_MD5)
);
executeTest("test good file", spec);
@ -175,7 +180,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleBad.vcf", "-ALL"),
0,
Arrays.asList(emptyMd5)
Arrays.asList(EMPTY_MD5)
);
executeTest("test no validation", spec);
@ -186,7 +191,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("complexEvents.vcf", "ALL"),
0,
Arrays.asList(emptyMd5)
Arrays.asList(EMPTY_MD5)
);
executeTest("test validating complex events", spec);
@ -195,7 +200,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
@Test(description = "Fixes '''bug''' reported in story https://www.pivotaltracker.com/story/show/68725164")
public void testUnusedAlleleFix() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationUnusedAllelesBugFix.vcf","-ALLELES","1:1-739000",b37KGReference),0,Arrays.asList(emptyMd5));
baseTestString("validationUnusedAllelesBugFix.vcf","-ALLELES","1:1-739000",b37KGReference),0,Arrays.asList(EMPTY_MD5));
executeTest("test unused allele bug fix", spec);
}
@ -205,4 +210,49 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
baseTestString("validationUnusedAllelesBugFix.vcf","ALLELES","1:1-739000",b37KGReference),0, UserException.FailsStrictValidation.class);
executeTest("test unused allele bug fix", spec);
}
@Test(description = "Checks '''bug''' reported in issue https://github.com/broadinstitute/gsa-unstable/issues/963")
public void testLargeReferenceAlleleError() throws IOException {
// Need to see log INFO messages
Level level = logger.getLevel();
logger.setLevel(Level.INFO);
File logFile = createTempFile("testLargeReferenceAlleleError.log", ".tmp");
String logFileName = logFile.getAbsolutePath();
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("longAlleles.vcf", "ALL", "1", b37KGReference) + " -log " + logFileName,
0, Arrays.asList(EMPTY_MD5));
executeTest("test long reference allele bug error", spec);
// Make sure the "reference allele too long" message is in the log
Assert.assertTrue(FileUtils.readFileToString(logFile).contains(ValidateVariants.REFERENCE_ALLELE_TOO_LONG_MSG));
// Set the log level back
logger.setLevel(level);
}
@Test(description = "Checks '''bug''' is fixed, reported in issue https://github.com/broadinstitute/gsa-unstable/issues/963")
public void testLargeReferenceAlleleFix() throws IOException {
// Need to see log INFO messages
Level level = logger.getLevel();
logger.setLevel(Level.INFO);
File logFile = createTempFile("testLargeReferenceAllele.log", ".tmp");
String logFileName = logFile.getAbsolutePath();
// expand window for the large reference allele
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("longAlleles.vcf","ALL","1",b37KGReference) + " --reference_window_stop 208 -log " + logFileName,
0, Arrays.asList(EMPTY_MD5));
executeTest("test long reference allele bug fix", spec);
// Make sure the "reference allele too long" message is not in the log
Assert.assertFalse(FileUtils.readFileToString(logFile).contains(ValidateVariants.REFERENCE_ALLELE_TOO_LONG_MSG));
// All of the validation tests have passed since UserException.FailsStrictValidation is not thrown.
// Set the log level back
logger.setLevel(level);
}
}

View File

@ -50,6 +50,9 @@ import java.util.concurrent.TimeUnit;
*/
public class GATKArgumentCollection {
// the default value of the stop of the expanded window
public static final int DEFAULT_REFERENCE_WINDOW_STOP = 0;
/** the constructor */
public GATKArgumentCollection() {
}
@ -663,5 +666,17 @@ public class GATKArgumentCollection {
@Argument(fullName="variant_index_parameter",shortName = "variant_index_parameter",doc="Parameter to pass to the VCF/BCF IndexCreator",required=false)
@Advanced
public int variant_index_parameter = GATKVCFUtils.DEFAULT_INDEX_PARAMETER;
// --------------------------------------------------------------------------------------------------------------
//
// Window arguments
//
// -------------------------------------------------------------------------------------------------------------
/**
* Stop of the expanded window for which the reference context should be provided, relative to the locus.
*/
@Argument(fullName = "reference_window_stop", shortName = "ref_win_stop", doc = "Reference window stop", minValue = 0, required = false)
@Advanced
public int reference_window_stop = DEFAULT_REFERENCE_WINDOW_STOP;
}

View File

@ -26,6 +26,7 @@
package org.broadinstitute.gatk.engine.datasources.providers;
import htsjdk.samtools.reference.ReferenceSequence;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.walkers.Reference;
import org.broadinstitute.gatk.engine.walkers.Walker;
@ -74,7 +75,7 @@ public class LocusReferenceView extends ReferenceView {
/**
* Start of the expanded window for which the reference context should be provided,
* Stop of the expanded window for which the reference context should be provided,
* relative to the locus in question.
*/
private final int windowStop;
@ -99,10 +100,11 @@ public class LocusReferenceView extends ReferenceView {
/**
* Create a new locus reference view.
* @param walker input walker
* @param provider source for locus data.
*/
public LocusReferenceView( Walker walker, LocusShardDataProvider provider ) {
super( provider );
super(provider);
initializeBounds(provider);
// Retrieve information about the window being accessed.
@ -113,11 +115,22 @@ public class LocusReferenceView extends ReferenceView {
if( window.stop() < 0 ) throw new ReviewedGATKException( "Reference window ends before current locus" );
windowStart = window.start();
windowStop = window.stop();
if ( walker.getArguments() == null ){
windowStop = window.stop();
} else {
// Use reference arguments if set, otherwise use the annotation
windowStop = walker.getArguments().reference_window_stop != GATKArgumentCollection.DEFAULT_REFERENCE_WINDOW_STOP ?
walker.getArguments().reference_window_stop : window.stop();
}
}
else {
windowStart = 0;
windowStop = 0;
if ( walker.getArguments() == null ){
windowStop = 0;
} else {
windowStop = walker.getArguments().reference_window_stop;
}
}
if(bounds != null) {

View File

@ -40,6 +40,7 @@ import org.broadinstitute.gatk.utils.baq.BAQ;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import org.broadinstitute.gatk.engine.recalibration.BQSRMode;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import java.util.List;
@ -85,18 +86,49 @@ public abstract class Walker<MapType, ReduceType> {
/**
* Gets the master sequence dictionary for this walker
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
* @return
* @return the master sequence dictionary or null if no genome analysis toolkit.
*/
protected SAMSequenceDictionary getMasterSequenceDictionary() {
return getToolkit().getMasterSequenceDictionary();
if ( toolkit == null )
return null;
else
return toolkit.getMasterSequenceDictionary();
}
/**
* Gets the GATK argument collection
* @link GenomeAnalysisEngine.getArguments
* @return the GATK argument collection or null if no genome analysis toolkit.
*/
public GATKArgumentCollection getArguments(){
if ( toolkit == null )
return null;
else
return toolkit.getArguments();
}
/**
* Gets the GATK samples database
* @link GenomeAnalysisEngine.getSampleDB
* @return the GATK samples database or null if no genome analysis toolkit.
*/
public SampleDB getSampleDB() {
return getToolkit().getSampleDB();
if ( toolkit == null )
return null;
else
return toolkit.getSampleDB();
}
/**
* Gets a sample from the GATK samples database
* @param id the sample ID
* @return the sample from the GATK samples database or null if no genome analysis toolkit or samples database.
*/
protected Sample getSample(final String id) {
return getToolkit().getSampleDB().getSample(id);
if ( getSampleDB() == null )
return null;
else
return getSampleDB().getSample(id);
}
/**

View File

@ -91,6 +91,16 @@ import java.util.*;
* --dbsnp dbsnp.vcf
* </pre>
*
* <h4>To perform VCF format tests and all strict validations with the VCFs containing alleles > 208 bases</h4>
* <pre>
* java -jar GenomeAnalysisTK.jar \
* -T ValidateVariants \
* -R reference.fasta \
* -V input.vcf \
* --dbsnp dbsnp.vcf
* --reference_window_stop 208
* </pre>
*
* <h4>To perform only VCF format tests</h4>
* <pre>
* java -jar GenomeAnalysisTK.jar \
@ -114,6 +124,9 @@ import java.util.*;
@Reference(window=@Window(start=0,stop=100))
public class ValidateVariants extends RodWalker<Integer, Integer> {
// Log message for a reference allele that is too long
protected static final String REFERENCE_ALLELE_TOO_LONG_MSG = "Reference allele is too long";
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@ -181,6 +194,9 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
private File file = null;
// Stop of the expanded window for which the reference context should be provided, relative to the locus.
private int referenceWindowStop;
/**
* Contains final set of validation to apply.
*/
@ -189,6 +205,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
public void initialize() {
file = new File(variantCollection.variants.getSource());
validationTypes = calculateValidationTypesToApply(excludeTypes);
referenceWindowStop = getToolkit().getArguments().reference_window_stop;
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
@ -220,8 +237,11 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
// get the true reference allele
final Allele reportedRefAllele = vc.getReference();
final int refLength = reportedRefAllele.length();
if ( refLength > 100 ) {
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", refLength, vc.getChr(), vc.getStart()));
// reference length is greater than the reference window stop before and after expansion
if ( refLength > 100 && refLength > referenceWindowStop ) {
logger.info(String.format("%s (%d) at position %s:%d; skipping that record. Set --referenceWindowStop >= %d",
REFERENCE_ALLELE_TOO_LONG_MSG, refLength, vc.getChr(), vc.getStart(), refLength));
return;
}
@ -259,7 +279,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
* @return never {@code null} but perhaps an empty set.
*/
private Collection<ValidationType> calculateValidationTypesToApply(final List<ValidationType> excludeTypes) {
if (excludeTypes.size() == 0)
if (excludeTypes.isEmpty())
return Collections.singleton(ValidationType.ALL);
final Set<ValidationType> excludeTypeSet = new LinkedHashSet<>(excludeTypes);
if (excludeTypes.size() != excludeTypeSet.size())