consolidating the checking of the RMD sequence dictionary against the reference into a single function, and adding an integration test to test that empty VCFs pass (both the indexing and the seq dictionary validation).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4750 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
abc13d0a90
commit
b03ac61e9d
|
|
@ -334,7 +334,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
|||
validateSuppliedReferenceOrderedData(tracks);
|
||||
|
||||
// validate all the sequence dictionaries against the reference
|
||||
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), tracks);
|
||||
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), tracks, manager);
|
||||
|
||||
rodDataSources = getReferenceOrderedDataSources(tracks);
|
||||
}
|
||||
|
|
@ -505,7 +505,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
|||
* @param reference Reference data source.
|
||||
* @param tracks a collection of the reference ordered data tracks
|
||||
*/
|
||||
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<RMDTrack> tracks) {
|
||||
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<RMDTrack> tracks, RMDTrackBuilder manager) {
|
||||
if ((reads.isEmpty() && (tracks == null || tracks.isEmpty())) || reference == null )
|
||||
return;
|
||||
|
||||
|
|
@ -531,21 +531,8 @@ public abstract class AbstractGenomeAnalysisEngine {
|
|||
}
|
||||
|
||||
// compare the tracks to the reference, if they have a sequence dictionary
|
||||
for (RMDTrack track : tracks) {
|
||||
SAMSequenceDictionary trackDict = track.getSequenceDictionary();
|
||||
|
||||
// hack: if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
|
||||
if (trackDict == null || trackDict.size() == 0) {
|
||||
logger.info("Track " + track.getName() + " doesn't have a sequence dictionary built in, skipping dictionary validation");
|
||||
continue;
|
||||
}
|
||||
|
||||
Set<String> trackSequences = new TreeSet<String>();
|
||||
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
|
||||
trackSequences.add(dictionaryEntry.getSequenceName());
|
||||
SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, track.getName(), trackDict, "reference", referenceDictionary);
|
||||
}
|
||||
|
||||
for (RMDTrack track : tracks)
|
||||
manager.validateTrackSequenceDictionary(track.getName(),track.getSequenceDictionary(),referenceDictionary);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -503,7 +503,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
if (dict == null) return;
|
||||
|
||||
SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
|
||||
SequenceDictionaryUtils.validateDictionaries(logger,validationExclusionType,"GATK",dict,inputFile.getAbsolutePath(),currentDict);
|
||||
validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
|
||||
|
||||
// check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
|
||||
for (SAMSequenceRecord seq : currentDict.getSequences()) {
|
||||
|
|
@ -518,4 +518,17 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) {
|
||||
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
|
||||
if (trackDict == null || trackDict.size() == 0)
|
||||
logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
|
||||
else {
|
||||
Set<String> trackSequences = new TreeSet<String>();
|
||||
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
|
||||
trackSequences.add(dictionaryEntry.getSequenceName());
|
||||
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,10 +3,13 @@ package org.broadinstitute.sting.playground.gatk.walkers.validation;
|
|||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* The pile-up tests, that test any changes to the underlying ROD system
|
||||
* tests for the ROD system in general; from rod system validation to empty VCF files
|
||||
*/
|
||||
public class RodSystemValidationIntegrationTest extends WalkerTest {
|
||||
|
||||
|
|
@ -31,6 +34,18 @@ public class RodSystemValidationIntegrationTest extends WalkerTest {
|
|||
executeTest("testSimpleVCFPileup", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyVCF() {
|
||||
File vcf = new File(validationDataLocation + "justHeader.vcf.idx");
|
||||
if (vcf.exists()) vcf.delete();
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString1KG() + " -B:eval,VCF " + validationDataLocation + "justHeader.vcf", 1,
|
||||
Arrays.asList("b3e80d77f8c05edf2a385b0a87708670"));
|
||||
executeTest("testEmptyVCF", spec);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testComplexVCFPileup() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
|
|||
Loading…
Reference in New Issue