consolidating the checking of the RMD sequence dictionary against the reference into a single function, and adding an integration test to test that empty VCFs pass (both the indexing and the seq dictionary validation).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4750 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-11-30 00:01:56 +00:00
parent abc13d0a90
commit b03ac61e9d
3 changed files with 34 additions and 19 deletions

View File

@ -334,7 +334,7 @@ public abstract class AbstractGenomeAnalysisEngine {
validateSuppliedReferenceOrderedData(tracks); validateSuppliedReferenceOrderedData(tracks);
// validate all the sequence dictionaries against the reference // validate all the sequence dictionaries against the reference
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), tracks); validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), tracks, manager);
rodDataSources = getReferenceOrderedDataSources(tracks); rodDataSources = getReferenceOrderedDataSources(tracks);
} }
@ -505,7 +505,7 @@ public abstract class AbstractGenomeAnalysisEngine {
* @param reference Reference data source. * @param reference Reference data source.
* @param tracks a collection of the reference ordered data tracks * @param tracks a collection of the reference ordered data tracks
*/ */
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<RMDTrack> tracks) { private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<RMDTrack> tracks, RMDTrackBuilder manager) {
if ((reads.isEmpty() && (tracks == null || tracks.isEmpty())) || reference == null ) if ((reads.isEmpty() && (tracks == null || tracks.isEmpty())) || reference == null )
return; return;
@ -531,21 +531,8 @@ public abstract class AbstractGenomeAnalysisEngine {
} }
// compare the tracks to the reference, if they have a sequence dictionary // compare the tracks to the reference, if they have a sequence dictionary
for (RMDTrack track : tracks) { for (RMDTrack track : tracks)
SAMSequenceDictionary trackDict = track.getSequenceDictionary(); manager.validateTrackSequenceDictionary(track.getName(),track.getSequenceDictionary(),referenceDictionary);
// hack: if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
if (trackDict == null || trackDict.size() == 0) {
logger.info("Track " + track.getName() + " doesn't have a sequence dictionary built in, skipping dictionary validation");
continue;
}
Set<String> trackSequences = new TreeSet<String>();
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
trackSequences.add(dictionaryEntry.getSequenceName());
SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, track.getName(), trackDict, "reference", referenceDictionary);
}
} }
/** /**

View File

@ -503,7 +503,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
if (dict == null) return; if (dict == null) return;
SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary()); SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
SequenceDictionaryUtils.validateDictionaries(logger,validationExclusionType,"GATK",dict,inputFile.getAbsolutePath(),currentDict); validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
// check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set // check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
for (SAMSequenceRecord seq : currentDict.getSequences()) { for (SAMSequenceRecord seq : currentDict.getSequences()) {
@ -518,4 +518,17 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK"); logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
} }
} }
public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) {
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
if (trackDict == null || trackDict.size() == 0)
logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
else {
Set<String> trackSequences = new TreeSet<String>();
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
trackSequences.add(dictionaryEntry.getSequenceName());
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
}
}
} }

View File

@ -3,10 +3,13 @@ package org.broadinstitute.sting.playground.gatk.walkers.validation;
import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List;
/** /**
* The pile-up tests, that test any changes to the underlying ROD system * tests for the ROD system in general; from rod system validation to empty VCF files
*/ */
public class RodSystemValidationIntegrationTest extends WalkerTest { public class RodSystemValidationIntegrationTest extends WalkerTest {
@ -31,6 +34,18 @@ public class RodSystemValidationIntegrationTest extends WalkerTest {
executeTest("testSimpleVCFPileup", spec); executeTest("testSimpleVCFPileup", spec);
} }
@Test
public void testEmptyVCF() {
File vcf = new File(validationDataLocation + "justHeader.vcf.idx");
if (vcf.exists()) vcf.delete();
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString1KG() + " -B:eval,VCF " + validationDataLocation + "justHeader.vcf", 1,
Arrays.asList("b3e80d77f8c05edf2a385b0a87708670"));
executeTest("testEmptyVCF", spec);
}
@Test @Test
public void testComplexVCFPileup() { public void testComplexVCFPileup() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(