consolidating the checking of the RMD sequence dictionary against the reference into a single function, and adding an integration test to test that empty VCFs pass (both the indexing and the seq dictionary validation).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4750 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-11-30 00:01:56 +00:00
parent abc13d0a90
commit b03ac61e9d
3 changed files with 34 additions and 19 deletions

View File

@ -334,7 +334,7 @@ public abstract class AbstractGenomeAnalysisEngine {
validateSuppliedReferenceOrderedData(tracks);
// validate all the sequence dictionaries against the reference
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), tracks);
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), tracks, manager);
rodDataSources = getReferenceOrderedDataSources(tracks);
}
@ -505,7 +505,7 @@ public abstract class AbstractGenomeAnalysisEngine {
* @param reference Reference data source.
* @param tracks a collection of the reference ordered data tracks
*/
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<RMDTrack> tracks) {
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<RMDTrack> tracks, RMDTrackBuilder manager) {
if ((reads.isEmpty() && (tracks == null || tracks.isEmpty())) || reference == null )
return;
@ -531,21 +531,8 @@ public abstract class AbstractGenomeAnalysisEngine {
}
// compare the tracks to the reference, if they have a sequence dictionary
for (RMDTrack track : tracks) {
SAMSequenceDictionary trackDict = track.getSequenceDictionary();
// hack: if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
if (trackDict == null || trackDict.size() == 0) {
logger.info("Track " + track.getName() + " doesn't have a sequence dictionary built in, skipping dictionary validation");
continue;
}
Set<String> trackSequences = new TreeSet<String>();
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
trackSequences.add(dictionaryEntry.getSequenceName());
SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, track.getName(), trackDict, "reference", referenceDictionary);
}
for (RMDTrack track : tracks)
manager.validateTrackSequenceDictionary(track.getName(),track.getSequenceDictionary(),referenceDictionary);
}
/**

View File

@ -503,7 +503,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
if (dict == null) return;
SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
SequenceDictionaryUtils.validateDictionaries(logger,validationExclusionType,"GATK",dict,inputFile.getAbsolutePath(),currentDict);
validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
// check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
for (SAMSequenceRecord seq : currentDict.getSequences()) {
@ -518,4 +518,17 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
}
}
public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) {
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
if (trackDict == null || trackDict.size() == 0)
logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
else {
Set<String> trackSequences = new TreeSet<String>();
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
trackSequences.add(dictionaryEntry.getSequenceName());
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
}
}
}

View File

@ -3,10 +3,13 @@ package org.broadinstitute.sting.playground.gatk.walkers.validation;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* The pile-up tests, that test any changes to the underlying ROD system
* tests for the ROD system in general; from rod system validation to empty VCF files
*/
public class RodSystemValidationIntegrationTest extends WalkerTest {
@ -31,6 +34,18 @@ public class RodSystemValidationIntegrationTest extends WalkerTest {
executeTest("testSimpleVCFPileup", spec);
}
@Test
public void testEmptyVCF() {
File vcf = new File(validationDataLocation + "justHeader.vcf.idx");
if (vcf.exists()) vcf.delete();
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString1KG() + " -B:eval,VCF " + validationDataLocation + "justHeader.vcf", 1,
Arrays.asList("b3e80d77f8c05edf2a385b0a87708670"));
executeTest("testEmptyVCF", spec);
}
@Test
public void testComplexVCFPileup() {
WalkerTestSpec spec = new WalkerTestSpec(