- Fix DepthOfCoverage so that, when it abuses the ROD system by instantiating a track in onTraversalDone, it also supplies the correct sequence dictionary and parser.
- Changed RMDTrackBuilder to use SequenceDictionaryUtils.validateDictionaries for ref <-> ROD sequence dictionary validation. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4683 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f8e1ea7b64
commit
5b83942cee
|
|
@ -329,7 +329,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
|||
RMDTrackBuilder manager = new RMDTrackBuilder();
|
||||
|
||||
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
|
||||
manager.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser);
|
||||
manager.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
|
||||
|
||||
List<RMDTrack> tracks = manager.getReferenceMetaDataSources(this,argCollection);
|
||||
validateSuppliedReferenceOrderedData(tracks);
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ public class RMDIndexer extends CommandLineProgram {
|
|||
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(referenceFile);
|
||||
|
||||
// add writing of the sequence dictionary, if supplied
|
||||
RMDTrackBuilder.setIndexSequenceDictionary(index, seq.getSequenceDictionary(), indexFile, false);
|
||||
builder.setIndexSequenceDictionary(inputFileSource, index, seq.getSequenceDictionary(), indexFile, false);
|
||||
}
|
||||
|
||||
// create the output stream, and write the index
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broad.tribble.index.IndexFactory;
|
|||
import org.broad.tribble.source.BasicFeatureSource;
|
||||
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
|
||||
|
|
@ -41,6 +42,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
|||
import org.broadinstitute.sting.gatk.AbstractGenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -85,6 +87,11 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
*/
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
* Validation exclusions, for validating the sequence dictionary.
|
||||
*/
|
||||
private ValidationExclusion.TYPE validationExclusionType;
|
||||
|
||||
/** Create a new plugin manager. */
|
||||
public RMDTrackBuilder() {
|
||||
super(FeatureCodec.class, "Codecs", "Codec");
|
||||
|
|
@ -92,21 +99,25 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
|
||||
/**
|
||||
* Create a new RMDTrackBuilder, with dictionary and genomeLocParser predefined.
|
||||
* @param dict
|
||||
* @param genomeLocParser
|
||||
* @param dict Sequence dictionary to use.
|
||||
* @param genomeLocParser Location parser to use.
|
||||
* @param validationExclusionType Types of validations to exclude, for sequence dictionary verification.
|
||||
*/
|
||||
public RMDTrackBuilder(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser) {
|
||||
public RMDTrackBuilder(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser, ValidationExclusion.TYPE validationExclusionType) {
|
||||
super(FeatureCodec.class, "Codecs", "Codec");
|
||||
setSequenceDictionary(dict,genomeLocParser);
|
||||
setSequenceDictionary(dict,genomeLocParser,validationExclusionType);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param dict the sequence dictionary to use as a reference for Tribble track contig length lookups
|
||||
* Establish location-aware parsing and services for relevant reference metadata.
|
||||
* @param dict Sequence dictionary to use.
|
||||
* @param genomeLocParser Location parser to use.
|
||||
* @param validationExclusionType Types of validations to exclude, for sequence dictionary verification.
|
||||
*/
|
||||
public void setSequenceDictionary(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser) {
|
||||
public void setSequenceDictionary(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser,ValidationExclusion.TYPE validationExclusionType) {
|
||||
this.dict = dict;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.validationExclusionType = validationExclusionType;
|
||||
}
|
||||
|
||||
/** @return a list of all available track types we currently have access to create */
|
||||
|
|
@ -226,7 +237,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
// if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
|
||||
if (dictFromIndex.size() == 0 && dict != null) {
|
||||
File indexFile = Tribble.indexFile(inputFile);
|
||||
setIndexSequenceDictionary(index,dict,indexFile,true);
|
||||
setIndexSequenceDictionary(inputFile,index,dict,indexFile,true);
|
||||
dictFromIndex = getSequenceDictionaryFromProperties(index);
|
||||
}
|
||||
|
||||
|
|
@ -366,7 +377,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
// this can take a while, let them know what we're doing
|
||||
logger.info("Creating Tribble index in memory for file " + inputFile);
|
||||
Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
||||
setIndexSequenceDictionary(idx, dict, null, false);
|
||||
setIndexSequenceDictionary(inputFile, idx, dict, null, false);
|
||||
return idx;
|
||||
}
|
||||
|
||||
|
|
@ -493,25 +504,23 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
/**
|
||||
* set the sequence dictionary of the track. This function checks that the contig listing of the underlying file is compatible.
|
||||
* (that each contig in the index is in the sequence dictionary).
|
||||
* @param inputFile for proper error message formatting.
|
||||
* @param dict the sequence dictionary
|
||||
* @param index the index file
|
||||
* @param indexFile the index file
|
||||
* @param rewriteIndex should we rewrite the index when we're done?
|
||||
*
|
||||
*/
|
||||
public static void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict, File indexFile, boolean rewriteIndex) {
|
||||
public void setIndexSequenceDictionary(File inputFile, Index index, SAMSequenceDictionary dict, File indexFile, boolean rewriteIndex) {
|
||||
if (dict == null) return;
|
||||
|
||||
SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
|
||||
SequenceDictionaryUtils.validateDictionaries(logger,validationExclusionType,"GATK",dict,inputFile.getAbsolutePath(),currentDict);
|
||||
|
||||
// check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
|
||||
for (SAMSequenceRecord seq : currentDict.getSequences()) {
|
||||
if (dict.getSequence(seq.getSequenceName()) == null)
|
||||
throw new UserException.IncompatibleSequenceDictionaries("The sequence dictionary from the reference the GATK is running with is not compatible with the sequence " +
|
||||
"dictionary in the Tribble file " + indexFile + ". It doesn't contain the contig: " + seq.getSequenceName(),
|
||||
"RMD Sequence Dictionary",
|
||||
currentDict,
|
||||
"Reference Sequence Dictionary",
|
||||
dict);
|
||||
continue;
|
||||
index.addProperty(SequenceDictionaryPropertyPredicate + dict.getSequence(seq.getSequenceName()).getSequenceName(), String.valueOf(dict.getSequence(seq.getSequenceName()).getSequenceLength()));
|
||||
}
|
||||
// re-write the index
|
||||
|
|
|
|||
|
|
@ -403,7 +403,9 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
|
|||
}
|
||||
|
||||
private LocationAwareSeekableRODIterator initializeRefSeq() {
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder();
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
getToolkit().getArguments().unsafe);
|
||||
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,refSeqGeneList).first;
|
||||
try {
|
||||
return new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
|||
logger.info("Loading SNP mask... ");
|
||||
ReferenceOrderedData snp_mask;
|
||||
if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) {
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser());
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe);
|
||||
CloseableIterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPCodec.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator();
|
||||
snpMaskIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),iter);
|
||||
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils;
|
|||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -66,7 +65,7 @@ public class SequenceDictionaryUtils {
|
|||
private static SAMSequenceRecord CHR2_HG19 = new SAMSequenceRecord("chr2", 243199373);
|
||||
private static SAMSequenceRecord CHR10_HG19 = new SAMSequenceRecord("chr10", 135534747);
|
||||
|
||||
public enum SequenceDictionaryCompatability {
|
||||
public enum SequenceDictionaryCompatibility {
|
||||
IDENTICAL, // the dictionaries are identical
|
||||
COMMON_SUBSET, // there exists a common subset of equivalent contigs
|
||||
NO_COMMON_CONTIGS, // no overlap between dictionaries
|
||||
|
|
@ -96,7 +95,7 @@ public class SequenceDictionaryUtils {
|
|||
* @param dict2 the sequence dictionary dict2
|
||||
*/
|
||||
public static void validateDictionaries(Logger logger, ValidationExclusion.TYPE validationExclusion, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) {
|
||||
SequenceDictionaryCompatability type = compareDictionaries(dict1, dict2);
|
||||
SequenceDictionaryCompatibility type = compareDictionaries(dict1, dict2);
|
||||
switch ( type ) {
|
||||
case IDENTICAL:
|
||||
return;
|
||||
|
|
@ -155,23 +154,23 @@ public class SequenceDictionaryUtils {
|
|||
* @param dict2
|
||||
* @return
|
||||
*/
|
||||
public static SequenceDictionaryCompatability compareDictionaries(SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
|
||||
public static SequenceDictionaryCompatibility compareDictionaries(SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
|
||||
// If there's no overlap between reads and reference, data will be bogus. Throw an exception.
|
||||
if ( nonCanonicalHumanContigOrder( dict1 ) || nonCanonicalHumanContigOrder(dict2) )
|
||||
return SequenceDictionaryCompatability.NON_CANONICAL_HUMAN_ORDER;
|
||||
return SequenceDictionaryCompatibility.NON_CANONICAL_HUMAN_ORDER;
|
||||
|
||||
Set<String> commonContigs = getCommonContigsByName(dict1, dict2);
|
||||
|
||||
if (commonContigs.size() == 0)
|
||||
return SequenceDictionaryCompatability.NO_COMMON_CONTIGS;
|
||||
return SequenceDictionaryCompatibility.NO_COMMON_CONTIGS;
|
||||
else if ( ! commonContigsAreEquivalent( commonContigs, dict1, dict2 ) )
|
||||
return SequenceDictionaryCompatability.UNEQUAL_COMMON_CONTIGS;
|
||||
return SequenceDictionaryCompatibility.UNEQUAL_COMMON_CONTIGS;
|
||||
else if ( ! commonContigsAreInOrder( commonContigs, dict1, dict2 ) )
|
||||
return SequenceDictionaryCompatability.OUT_OF_ORDER;
|
||||
return SequenceDictionaryCompatibility.OUT_OF_ORDER;
|
||||
else if ( commonContigs.size() == dict1.size() && commonContigs.size() == dict2.size() )
|
||||
return SequenceDictionaryCompatability.IDENTICAL;
|
||||
return SequenceDictionaryCompatibility.IDENTICAL;
|
||||
else {
|
||||
return SequenceDictionaryCompatability.COMMON_SUBSET;
|
||||
return SequenceDictionaryCompatibility.COMMON_SUBSET;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
|||
seq = new IndexedFastaSequenceFile(new File(hg18Reference));
|
||||
genomeLocParser = new GenomeLocParser(seq);
|
||||
builder = new RMDTrackBuilder();
|
||||
builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser);
|
||||
builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser,null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
|||
public void setUp() {
|
||||
File file = new File(testDir + "TabularDataTest.dat");
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder();
|
||||
builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser);
|
||||
builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser,null);
|
||||
rod = builder.createInstanceOfTrack(TableCodec.class, "tableTest", file);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import net.sf.samtools.SAMSequenceDictionary;
|
|||
import org.broad.tribble.Tribble;
|
||||
import org.broad.tribble.index.Index;
|
||||
import org.broad.tribble.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -152,8 +153,8 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
|
|||
File vcfFile = createCorrectDateIndexFile(new File(validationDataLocation + "/ROD_validation/newerTribbleTrack.vcf"));
|
||||
Long indexTimeStamp = Tribble.indexFile(vcfFile).lastModified();
|
||||
try {
|
||||
builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser,null);
|
||||
Index idx = builder.loadIndex(vcfFile, new VCFCodec());
|
||||
RMDTrackBuilder.setIndexSequenceDictionary(idx,seq.getSequenceDictionary(),vcfFile,false);
|
||||
// catch any exception; this call should pass correctly
|
||||
SAMSequenceDictionary dict = RMDTrackBuilder.getSequenceDictionaryFromProperties(idx);
|
||||
} catch (IOException e) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue