consolidating the checking of the RMD sequence dictionary against the reference into a single function, and adding an integration test to test that empty VCFs pass (both the indexing and the seq dictionary validation).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4750 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
abc13d0a90
commit
b03ac61e9d
|
|
@ -334,7 +334,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
validateSuppliedReferenceOrderedData(tracks);
|
validateSuppliedReferenceOrderedData(tracks);
|
||||||
|
|
||||||
// validate all the sequence dictionaries against the reference
|
// validate all the sequence dictionaries against the reference
|
||||||
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), tracks);
|
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), tracks, manager);
|
||||||
|
|
||||||
rodDataSources = getReferenceOrderedDataSources(tracks);
|
rodDataSources = getReferenceOrderedDataSources(tracks);
|
||||||
}
|
}
|
||||||
|
|
@ -505,7 +505,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
* @param reference Reference data source.
|
* @param reference Reference data source.
|
||||||
* @param tracks a collection of the reference ordered data tracks
|
* @param tracks a collection of the reference ordered data tracks
|
||||||
*/
|
*/
|
||||||
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<RMDTrack> tracks) {
|
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<RMDTrack> tracks, RMDTrackBuilder manager) {
|
||||||
if ((reads.isEmpty() && (tracks == null || tracks.isEmpty())) || reference == null )
|
if ((reads.isEmpty() && (tracks == null || tracks.isEmpty())) || reference == null )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|
@ -531,21 +531,8 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
// compare the tracks to the reference, if they have a sequence dictionary
|
// compare the tracks to the reference, if they have a sequence dictionary
|
||||||
for (RMDTrack track : tracks) {
|
for (RMDTrack track : tracks)
|
||||||
SAMSequenceDictionary trackDict = track.getSequenceDictionary();
|
manager.validateTrackSequenceDictionary(track.getName(),track.getSequenceDictionary(),referenceDictionary);
|
||||||
|
|
||||||
// hack: if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
|
|
||||||
if (trackDict == null || trackDict.size() == 0) {
|
|
||||||
logger.info("Track " + track.getName() + " doesn't have a sequence dictionary built in, skipping dictionary validation");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
Set<String> trackSequences = new TreeSet<String>();
|
|
||||||
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
|
|
||||||
trackSequences.add(dictionaryEntry.getSequenceName());
|
|
||||||
SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, track.getName(), trackDict, "reference", referenceDictionary);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -503,7 +503,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
if (dict == null) return;
|
if (dict == null) return;
|
||||||
|
|
||||||
SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
|
SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
|
||||||
SequenceDictionaryUtils.validateDictionaries(logger,validationExclusionType,"GATK",dict,inputFile.getAbsolutePath(),currentDict);
|
validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
|
||||||
|
|
||||||
// check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
|
// check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
|
||||||
for (SAMSequenceRecord seq : currentDict.getSequences()) {
|
for (SAMSequenceRecord seq : currentDict.getSequences()) {
|
||||||
|
|
@ -518,4 +518,17 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
|
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) {
|
||||||
|
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
|
||||||
|
if (trackDict == null || trackDict.size() == 0)
|
||||||
|
logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
|
||||||
|
else {
|
||||||
|
Set<String> trackSequences = new TreeSet<String>();
|
||||||
|
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
|
||||||
|
trackSequences.add(dictionaryEntry.getSequenceName());
|
||||||
|
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,13 @@ package org.broadinstitute.sting.playground.gatk.walkers.validation;
|
||||||
import org.broadinstitute.sting.WalkerTest;
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The pile-up tests, that test any changes to the underlying ROD system
|
* tests for the ROD system in general; from rod system validation to empty VCF files
|
||||||
*/
|
*/
|
||||||
public class RodSystemValidationIntegrationTest extends WalkerTest {
|
public class RodSystemValidationIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
|
|
@ -31,6 +34,18 @@ public class RodSystemValidationIntegrationTest extends WalkerTest {
|
||||||
executeTest("testSimpleVCFPileup", spec);
|
executeTest("testSimpleVCFPileup", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEmptyVCF() {
|
||||||
|
File vcf = new File(validationDataLocation + "justHeader.vcf.idx");
|
||||||
|
if (vcf.exists()) vcf.delete();
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString1KG() + " -B:eval,VCF " + validationDataLocation + "justHeader.vcf", 1,
|
||||||
|
Arrays.asList("b3e80d77f8c05edf2a385b0a87708670"));
|
||||||
|
executeTest("testEmptyVCF", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testComplexVCFPileup() {
|
public void testComplexVCFPileup() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue