Merge pull request #594 from broadinstitute/dr_vcf_sample_renaming
Extend on-the-fly sample renaming feature to vcfs
This commit is contained in:
commit
b07c0a6b4c
|
|
@ -758,13 +758,18 @@ public class GenomeAnalysisEngine {
|
|||
validateSuppliedReads();
|
||||
initializeReadTransformers(walker);
|
||||
|
||||
readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference());
|
||||
final Map<String, String> sampleRenameMap = argCollection.sampleRenameMappingFile != null ?
|
||||
loadSampleRenameMap(argCollection.sampleRenameMappingFile) :
|
||||
null;
|
||||
|
||||
readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference(), sampleRenameMap);
|
||||
|
||||
for (ReadFilter filter : filters)
|
||||
filter.initialize(this);
|
||||
|
||||
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
|
||||
rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
|
||||
rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),
|
||||
genomeLocParser,argCollection.unsafe,sampleRenameMap);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -846,7 +851,8 @@ public class GenomeAnalysisEngine {
|
|||
* @param refReader reader
|
||||
* @return A data source for the given set of reads.
|
||||
*/
|
||||
private SAMDataSource createReadsDataSource(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) {
|
||||
private SAMDataSource createReadsDataSource(final GATKArgumentCollection argCollection, final GenomeLocParser genomeLocParser,
|
||||
final IndexedFastaSequenceFile refReader, final Map<String, String> sampleRenameMap) {
|
||||
DownsamplingMethod downsamplingMethod = getDownsamplingMethod();
|
||||
|
||||
// Synchronize the method back into the collection so that it shows up when
|
||||
|
|
@ -865,10 +871,6 @@ public class GenomeAnalysisEngine {
|
|||
|
||||
final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker;
|
||||
|
||||
final Map<SAMReaderID, String> sampleRenameMap = argCollection.sampleRenameMappingFile != null ?
|
||||
loadSampleRenameMap(argCollection.sampleRenameMappingFile) :
|
||||
null;
|
||||
|
||||
return new SAMDataSource(
|
||||
samReaderIDs,
|
||||
threadAllocation,
|
||||
|
|
@ -892,19 +894,19 @@ public class GenomeAnalysisEngine {
|
|||
* Loads a user-provided sample rename map file for use in on-the-fly sample renaming into an in-memory
|
||||
* HashMap. This file must consist of lines with two whitespace-separated fields:
|
||||
*
|
||||
* absolute_path_to_bam_file new_sample_name
|
||||
* absolute_path_to_file new_sample_name
|
||||
*
|
||||
* The engine will verify that each bam file contains reads from only one sample when the on-the-fly sample
|
||||
* renaming feature is being used.
|
||||
* The engine will verify that each file contains data from only one sample when the on-the-fly sample
|
||||
* renaming feature is being used. Note that this feature works only with bam and vcf files.
|
||||
*
|
||||
* @param sampleRenameMapFile sample rename map file from which to load data
|
||||
* @return a HashMap containing the contents of the map file, with the keys being the bam file paths and
|
||||
* @return a HashMap containing the contents of the map file, with the keys being the input file paths and
|
||||
* the values being the new sample names.
|
||||
*/
|
||||
protected Map<SAMReaderID, String> loadSampleRenameMap( final File sampleRenameMapFile ) {
|
||||
logger.info("Renaming samples from BAM files on-the-fly using mapping file " + sampleRenameMapFile.getAbsolutePath());
|
||||
protected Map<String, String> loadSampleRenameMap( final File sampleRenameMapFile ) {
|
||||
logger.info("Renaming samples from input files on-the-fly using mapping file " + sampleRenameMapFile.getAbsolutePath());
|
||||
|
||||
final Map<SAMReaderID, String> sampleRenameMap = new HashMap<>((int)sampleRenameMapFile.length() / 50);
|
||||
final Map<String, String> sampleRenameMap = new HashMap<>((int)sampleRenameMapFile.length() / 50);
|
||||
|
||||
try {
|
||||
for ( final String line : new XReadLines(sampleRenameMapFile) ) {
|
||||
|
|
@ -916,21 +918,21 @@ public class GenomeAnalysisEngine {
|
|||
tokens.length, line));
|
||||
}
|
||||
|
||||
final File bamFile = new File(tokens[0]);
|
||||
final File inputFile = new File(tokens[0]);
|
||||
final String newSampleName = tokens[1];
|
||||
|
||||
if ( ! bamFile.isAbsolute() ) {
|
||||
throw new UserException.MalformedFile(sampleRenameMapFile, "Bam file path not absolute at line: " + line);
|
||||
if ( ! inputFile.isAbsolute() ) {
|
||||
throw new UserException.MalformedFile(sampleRenameMapFile, "Input file path not absolute at line: " + line);
|
||||
}
|
||||
|
||||
final SAMReaderID bamID = new SAMReaderID(bamFile, new Tags());
|
||||
final String inputFilePath = inputFile.getAbsolutePath();
|
||||
|
||||
if ( sampleRenameMap.containsKey(bamID) ) {
|
||||
if ( sampleRenameMap.containsKey(inputFilePath) ) {
|
||||
throw new UserException.MalformedFile(sampleRenameMapFile,
|
||||
String.format("Bam file %s appears more than once", bamFile.getAbsolutePath()));
|
||||
String.format("Input file %s appears more than once", inputFilePath));
|
||||
}
|
||||
|
||||
sampleRenameMap.put(bamID, newSampleName);
|
||||
sampleRenameMap.put(inputFilePath, newSampleName);
|
||||
}
|
||||
}
|
||||
catch ( FileNotFoundException e ) {
|
||||
|
|
@ -958,15 +960,18 @@ public class GenomeAnalysisEngine {
|
|||
* @param sequenceDictionary GATK-wide sequnce dictionary to use for validation.
|
||||
* @param genomeLocParser to use when creating and validating GenomeLocs.
|
||||
* @param validationExclusionType potentially indicate which validations to include / exclude.
|
||||
* @param sampleRenameMap map of file -> new sample name used when doing on-the-fly sample renaming
|
||||
*
|
||||
* @return A list of reference-ordered data sources.
|
||||
*/
|
||||
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(Collection<RMDTriplet> referenceMetaDataFiles,
|
||||
SAMSequenceDictionary sequenceDictionary,
|
||||
GenomeLocParser genomeLocParser,
|
||||
ValidationExclusion.TYPE validationExclusionType) {
|
||||
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(final Collection<RMDTriplet> referenceMetaDataFiles,
|
||||
final SAMSequenceDictionary sequenceDictionary,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final ValidationExclusion.TYPE validationExclusionType,
|
||||
final Map<String, String> sampleRenameMap) {
|
||||
final RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, validationExclusionType,
|
||||
getArguments().disableAutoIndexCreationAndLockingWhenReadingRods);
|
||||
getArguments().disableAutoIndexCreationAndLockingWhenReadingRods,
|
||||
sampleRenameMap);
|
||||
|
||||
final List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||
for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
|
||||
|
|
|
|||
|
|
@ -359,14 +359,18 @@ public class GATKArgumentCollection {
|
|||
*/
|
||||
@Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Keep program records in the SAM header", required = false)
|
||||
public boolean keepProgramRecords = false;
|
||||
|
||||
/**
|
||||
* This option requires that each BAM file listed in the mapping file have only a single sample specified in its header
|
||||
* (though there may be multiple read groups for that sample). Each line of the mapping file must contain the absolute
|
||||
* path to a BAM file, followed by whitespace, followed by the new sample name for that BAM file.
|
||||
* On-the-fly sample renaming works only with single-sample BAM and VCF files. Each line of the mapping file must
|
||||
* contain the absolute path to a BAM or VCF file, followed by whitespace, followed by the new sample name for that
|
||||
* BAM or VCF file. The engine will verify at runtime that each BAM/VCF targeted for sample renaming has only
|
||||
* a single sample specified in its header (though, in the case of BAM files, there may be multiple read groups for
|
||||
* that sample).
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "sample_rename_mapping_file", shortName = "sample_rename_mapping_file", doc = "Rename sample IDs on-the-fly at runtime using the provided mapping file", required = false)
|
||||
public File sampleRenameMappingFile = null;
|
||||
|
||||
/**
|
||||
* For expert users only who know what they are doing. We do not support usage of this argument, so we may refuse to help you if you use it and something goes wrong.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -132,9 +132,9 @@ public class SAMDataSource {
|
|||
private final Map<SAMReaderID,ReadGroupMapping> originalToMergedReadGroupMappings = new HashMap<SAMReaderID,ReadGroupMapping>();
|
||||
|
||||
/**
|
||||
* Mapping from bam file ID to new sample name. Used only when doing on-the-fly sample renaming.
|
||||
* Mapping from input file path to new sample name. Used only when doing on-the-fly sample renaming.
|
||||
*/
|
||||
private Map<SAMReaderID, String> sampleRenameMap = null;
|
||||
private Map<String, String> sampleRenameMap = null;
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(SAMDataSource.class);
|
||||
|
|
@ -253,7 +253,7 @@ public class SAMDataSource {
|
|||
byte defaultBaseQualities,
|
||||
boolean removeProgramRecords,
|
||||
final boolean keepReadsInLIBS,
|
||||
final Map<SAMReaderID, String> sampleRenameMap) {
|
||||
final Map<String, String> sampleRenameMap) {
|
||||
|
||||
this.readMetrics = new ReadMetrics();
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
|
|
@ -879,7 +879,7 @@ public class SAMDataSource {
|
|||
|
||||
// The remappedSampleName will be null if either no on-the-fly sample renaming was requested,
|
||||
// or the user's sample rename map file didn't contain an entry for this bam file:
|
||||
final String remappedSampleName = sampleRenameMap != null ? sampleRenameMap.get(readerID) : null;
|
||||
final String remappedSampleName = sampleRenameMap != null ? sampleRenameMap.get(readerID.getSamFilePath()) : null;
|
||||
|
||||
// If we've been asked to rename the sample for this bam file, do so now. We'll check to
|
||||
// make sure this bam only contains reads from one sample before proceeding.
|
||||
|
|
|
|||
|
|
@ -252,18 +252,28 @@ public class FeatureManager {
|
|||
*
|
||||
* @param descriptor FeatureDescriptor of the Tribble FeatureCodec we want to create
|
||||
* @param name the name to assign this codec
|
||||
* @param genomeLocParser GenomeLocParser for ReferenceDependentFeatureCodecs
|
||||
* @param remappedSampleName replacement sample name for single-sample vcfs, or null if we're not performing
|
||||
* sample name remapping
|
||||
* @return the feature codec itself
|
||||
*/
|
||||
@Requires({"descriptor != null", "name != null", "genomeLocParser != null"})
|
||||
@Ensures("result != null")
|
||||
public FeatureCodec createCodec(FeatureDescriptor descriptor, String name, GenomeLocParser genomeLocParser) {
|
||||
public FeatureCodec createCodec(final FeatureDescriptor descriptor, final String name, final GenomeLocParser genomeLocParser,
|
||||
final String remappedSampleName) {
|
||||
FeatureCodec codex = pluginManager.createByType(descriptor.getCodecClass());
|
||||
if ( codex instanceof NameAwareCodec )
|
||||
((NameAwareCodec)codex).setName(name);
|
||||
if ( codex instanceof ReferenceDependentFeatureCodec )
|
||||
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
|
||||
if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing )
|
||||
((AbstractVCFCodec)codex).disableOnTheFlyModifications();
|
||||
if ( codex instanceof AbstractVCFCodec ) {
|
||||
if ( lenientVCFProcessing ) {
|
||||
((AbstractVCFCodec)codex).disableOnTheFlyModifications();
|
||||
}
|
||||
if ( remappedSampleName != null ) {
|
||||
((AbstractVCFCodec)codex).setRemappedSampleName(remappedSampleName);
|
||||
}
|
||||
}
|
||||
|
||||
return codex;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof;
|
|||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -86,6 +87,9 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
// make any file lock acquisition calls on the index files.
|
||||
private final boolean disableAutoIndexCreation;
|
||||
|
||||
// Map of file name -> new sample name used when performing on-the-fly sample renaming
|
||||
private final Map<String, String> sampleRenameMap;
|
||||
|
||||
/**
|
||||
* Construct an RMDTrackerBuilder, allowing the user to define tracks to build after-the-fact. This is generally
|
||||
* used when walkers want to directly manage the ROD system for whatever reason. Before using this constructor,
|
||||
|
|
@ -96,16 +100,19 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
* @param disableAutoIndexCreation Do not auto-create index files, and do not use file locking when accessing index files.
|
||||
* UNSAFE in general (because it causes us not to lock index files before reading them) --
|
||||
* suitable only for test suite use.
|
||||
* @param sampleRenameMap Map of file name -> new sample name used when performing on-the-fly sample renaming
|
||||
*/
|
||||
public RMDTrackBuilder(final SAMSequenceDictionary dict,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final ValidationExclusion.TYPE validationExclusionType,
|
||||
final boolean disableAutoIndexCreation) {
|
||||
final boolean disableAutoIndexCreation,
|
||||
final Map<String, String> sampleRenameMap) {
|
||||
this.dict = dict;
|
||||
this.validationExclusionType = validationExclusionType;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.featureManager = new FeatureManager(GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
|
||||
this.disableAutoIndexCreation = disableAutoIndexCreation;
|
||||
this.sampleRenameMap = sampleRenameMap;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -139,7 +146,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
else
|
||||
pair = getFeatureSource(descriptor, name, inputFile, fileDescriptor.getStorageType());
|
||||
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
|
||||
return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name));
|
||||
return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name, inputFile));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -173,7 +180,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
try {
|
||||
final File indexFile = null;//new File(inputFile.getAbsoluteFile() + TabixUtils.STANDARD_INDEX_EXTENSION);
|
||||
final SAMSequenceDictionary dict = null; //TabixUtils.getSequenceDictionary(indexFile);
|
||||
return new Pair<>(AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name)), dict);
|
||||
return new Pair<>(AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name, inputFile)), dict);
|
||||
} catch (TribbleException e) {
|
||||
throw new UserException(e.getMessage(), e);
|
||||
}
|
||||
|
|
@ -183,10 +190,15 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
* add a name to the codec, if it takes one
|
||||
* @param descriptor the class to create a codec for
|
||||
* @param name the name to assign this codec
|
||||
* @param inputFile input file that we will be decoding
|
||||
* @return the feature codec itself
|
||||
*/
|
||||
private FeatureCodec createCodec(FeatureManager.FeatureDescriptor descriptor, String name) {
|
||||
return featureManager.createCodec(descriptor, name, genomeLocParser);
|
||||
private FeatureCodec createCodec(final FeatureManager.FeatureDescriptor descriptor, final String name, final File inputFile) {
|
||||
// The remappedSampleName will be null if either no on-the-fly sample renaming was requested,
|
||||
// or the user's sample rename map file didn't contain an entry for this file:
|
||||
final String remappedSampleName = sampleRenameMap != null ? sampleRenameMap.get(inputFile.getAbsolutePath()) : null;
|
||||
|
||||
return featureManager.createCodec(descriptor, name, genomeLocParser, remappedSampleName);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -210,7 +222,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
|
||||
if(canBeIndexed) {
|
||||
try {
|
||||
Index index = loadIndex(inputFile, createCodec(descriptor, name));
|
||||
Index index = loadIndex(inputFile, createCodec(descriptor, name, inputFile));
|
||||
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
|
||||
catch (ReviewedStingException e) { }
|
||||
|
||||
|
|
@ -232,7 +244,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
|
||||
}
|
||||
|
||||
featureSource = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name), index);
|
||||
featureSource = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name, inputFile), index);
|
||||
}
|
||||
catch (TribbleException e) {
|
||||
throw new UserException(e.getMessage());
|
||||
|
|
@ -242,7 +254,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
}
|
||||
}
|
||||
else {
|
||||
featureSource = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name), false);
|
||||
featureSource = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name, inputFile), false);
|
||||
}
|
||||
|
||||
return new Pair<AbstractFeatureReader,SAMSequenceDictionary>(featureSource,sequenceDictionary);
|
||||
|
|
|
|||
|
|
@ -593,7 +593,8 @@ public class DepthOfCoverage extends LocusWalker<Map<DoCOutputType.Partition,Map
|
|||
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
getToolkit().getArguments().unsafe,
|
||||
getToolkit().getArguments().disableAutoIndexCreationAndLockingWhenReadingRods);
|
||||
getToolkit().getArguments().disableAutoIndexCreationAndLockingWhenReadingRods,
|
||||
null);
|
||||
RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,refSeqGeneList);
|
||||
return new SeekableRODIterator(refseq.getHeader(),refseq.getSequenceDictionary(),getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),refseq.getIterator());
|
||||
|
|
|
|||
|
|
@ -195,7 +195,8 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
getToolkit().getArguments().unsafe,
|
||||
getToolkit().getArguments().disableAutoIndexCreationAndLockingWhenReadingRods);
|
||||
getToolkit().getArguments().disableAutoIndexCreationAndLockingWhenReadingRods,
|
||||
null);
|
||||
dbsnpIterator = builder.createInstanceOfTrack(VCFCodec.class, new File(dbsnp.dbsnp.getSource())).getIterator();
|
||||
// Note that we should really use some sort of seekable iterator here so that the search doesn't take forever
|
||||
// (but it's complicated because the hapmap location doesn't match the dbsnp location, so we don't know where to seek to)
|
||||
|
|
|
|||
|
|
@ -28,20 +28,33 @@ package org.broadinstitute.sting.gatk;
|
|||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broad.tribble.readers.LineIterator;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.qc.ErrorThrowing;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
|
|
@ -504,6 +517,91 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
|
|||
executeTest("testOnTheFlySampleRenamingVerifyWalkerSeesNewSamplesInReads", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOnTheFlySampleRenamingSingleSampleVCF() throws IOException {
|
||||
final File sampleRenameMapFile = createTestSampleRenameMapFile(
|
||||
Arrays.asList(privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf newSampleForNA12878"));
|
||||
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(" -T CombineVariants" +
|
||||
" -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf" +
|
||||
" --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() +
|
||||
" -o %s",
|
||||
1,
|
||||
Arrays.asList("")); // No MD5s -- we will inspect the output file manually
|
||||
|
||||
final File outputVCF = executeTest("testOnTheFlySampleRenamingSingleSampleVCF", spec).first.get(0);
|
||||
verifySampleRenaming(outputVCF, "newSampleForNA12878");
|
||||
}
|
||||
|
||||
private void verifySampleRenaming( final File outputVCF, final String newSampleName ) throws IOException {
|
||||
final Pair<VCFHeader, GATKVCFUtils.VCIterable<LineIterator>> headerAndVCIter = GATKVCFUtils.readAllVCs(outputVCF, new VCFCodec());
|
||||
final VCFHeader header = headerAndVCIter.getFirst();
|
||||
final GATKVCFUtils.VCIterable<LineIterator> iter = headerAndVCIter.getSecond();
|
||||
|
||||
// Verify that sample renaming occurred at both the header and record levels (checking only the first 10 records):
|
||||
|
||||
Assert.assertEquals(header.getGenotypeSamples().size(), 1, "Wrong number of samples in output vcf header");
|
||||
Assert.assertEquals(header.getGenotypeSamples().get(0), newSampleName, "Wrong sample name in output vcf header");
|
||||
|
||||
int recordCount = 0;
|
||||
while ( iter.hasNext() && recordCount < 10 ) {
|
||||
final VariantContext vcfRecord = iter.next();
|
||||
Assert.assertEquals(vcfRecord.getSampleNames().size(), 1, "Wrong number of samples in output vcf record");
|
||||
Assert.assertEquals(vcfRecord.getSampleNames().iterator().next(), newSampleName, "Wrong sample name in output vcf record");
|
||||
recordCount++;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOnTheFlySampleRenamingVerifyWalkerSeesNewSamplesInVCFRecords() throws Exception {
|
||||
final File sampleRenameMapFile = createTestSampleRenameMapFile(
|
||||
Arrays.asList(privateTestDir + "samplerenametest_single_sample_gvcf.vcf FOOSAMPLE"));
|
||||
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(" -T OnTheFlySampleRenamingVerifyingRodWalker" +
|
||||
" -R " + hg19Reference +
|
||||
" -V " + privateTestDir + "samplerenametest_single_sample_gvcf.vcf" +
|
||||
" --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() +
|
||||
" --expectedSampleName FOOSAMPLE" +
|
||||
" -o %s",
|
||||
1,
|
||||
Arrays.asList("")); // No MD5s -- custom walker will throw an exception if there's a problem
|
||||
|
||||
executeTest("testOnTheFlySampleRenamingVerifyWalkerSeesNewSamplesInVCFRecords", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOnTheFlySampleRenamingMultiSampleVCF() throws Exception {
|
||||
final File sampleRenameMapFile = createTestSampleRenameMapFile(
|
||||
Arrays.asList(privateTestDir + "vcf/vcfWithGenotypes.vcf badSample"));
|
||||
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(" -T CombineVariants" +
|
||||
" -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "vcf/vcfWithGenotypes.vcf" +
|
||||
" --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() +
|
||||
" -o %s",
|
||||
1,
|
||||
UserException.class); // expecting a UserException here
|
||||
|
||||
executeTest("testOnTheFlySampleRenamingMultiSampleVCF", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOnTheFlySampleRenamingSitesOnlyVCF() throws Exception {
|
||||
final File sampleRenameMapFile = createTestSampleRenameMapFile(
|
||||
Arrays.asList(privateTestDir + "vcf/vcfWithoutGenotypes.vcf badSample"));
|
||||
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(" -T CombineVariants" +
|
||||
" -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "vcf/vcfWithoutGenotypes.vcf" +
|
||||
" --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() +
|
||||
" -o %s",
|
||||
1,
|
||||
UserException.class); // expecting a UserException here
|
||||
|
||||
executeTest("testOnTheFlySampleRenamingSitesOnlyVCF", spec);
|
||||
}
|
||||
|
||||
private File createTestSampleRenameMapFile( final List<String> contents ) throws IOException {
|
||||
final File mapFile = createTempFile("TestSampleRenameMapFile", ".tmp");
|
||||
final PrintWriter writer = new PrintWriter(mapFile);
|
||||
|
|
@ -532,4 +630,43 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
|
|||
public Integer reduceInit() { return 0; }
|
||||
public Integer reduce(Integer value, Integer sum) { return value + sum; }
|
||||
}
|
||||
|
||||
public static class OnTheFlySampleRenamingVerifyingRodWalker extends RodWalker<Integer, Integer> {
|
||||
@Argument(fullName = "expectedSampleName", shortName = "expectedSampleName", doc = "", required = true)
|
||||
String expectedSampleName = null;
|
||||
|
||||
@Output
|
||||
PrintStream out;
|
||||
|
||||
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
|
||||
public RodBinding<VariantContext> variants;
|
||||
|
||||
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
if ( tracker == null ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for ( final VariantContext vc : tracker.getValues(variants, context.getLocation()) ) {
|
||||
if ( vc.getSampleNames().size() != 1 ) {
|
||||
throw new IllegalStateException("Encountered a vcf record with num samples != 1");
|
||||
}
|
||||
|
||||
final String actualSampleName = vc.getSampleNames().iterator().next();
|
||||
if ( ! expectedSampleName.equals(actualSampleName)) {
|
||||
throw new IllegalStateException(String.format("Encountered vcf record with wrong sample name. Expected %s found %s",
|
||||
expectedSampleName, actualSampleName));
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer counter, Integer sum) {
|
||||
return counter + sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -128,7 +128,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
"/foo/bar/second.bam newSample2",
|
||||
"/foo/bar2/third.bam newSample3"));
|
||||
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
final Map<SAMReaderID, String> renameMap = engine.loadSampleRenameMap(mapFile);
|
||||
final Map<String, String> renameMap = engine.loadSampleRenameMap(mapFile);
|
||||
|
||||
Assert.assertEquals(renameMap.size(), 3, "Sample rename map was wrong size after loading from file");
|
||||
|
||||
|
|
@ -137,8 +137,8 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
final String expectedKey = expectedResultsIterator.next();
|
||||
final String expectedValue = expectedResultsIterator.next();
|
||||
|
||||
Assert.assertNotNull(renameMap.get(new SAMReaderID(expectedKey, new Tags())), String.format("Entry for %s not found in sample rename map", expectedKey));
|
||||
Assert.assertEquals(renameMap.get(new SAMReaderID(expectedKey, new Tags())), expectedValue, "Wrong value in sample rename map for " + expectedKey);
|
||||
Assert.assertNotNull(renameMap.get(expectedKey), String.format("Entry for %s not found in sample rename map", expectedKey));
|
||||
Assert.assertEquals(renameMap.get(expectedKey), expectedValue, "Wrong value in sample rename map for " + expectedKey);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -166,7 +166,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
logger.info("Executing test " + testName);
|
||||
|
||||
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
final Map<SAMReaderID, String> renameMap = engine.loadSampleRenameMap(mapFile);
|
||||
final Map<String, String> renameMap = engine.loadSampleRenameMap(mapFile);
|
||||
}
|
||||
|
||||
private File createTestSampleRenameMapFile( final List<String> contents ) throws IOException {
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
|||
seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference));
|
||||
genomeLocParser = new GenomeLocParser(seq);
|
||||
// disable auto-index creation/locking in the RMDTrackBuilder for tests
|
||||
builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null,true);
|
||||
builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null,true,null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
|||
|
||||
triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE,new Tags());
|
||||
// disable auto-index creation/locking in the RMDTrackBuilder for tests
|
||||
builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null,true);
|
||||
builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null,true,null);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -153,7 +153,7 @@ public class FeatureManagerUnitTest extends BaseTest {
|
|||
FeatureManager.FeatureDescriptor descriptor = manager.getByName("vcf");
|
||||
Assert.assertNotNull(descriptor, "Couldn't find VCF feature descriptor!");
|
||||
|
||||
FeatureCodec c = manager.createCodec(descriptor, "foo", genomeLocParser);
|
||||
FeatureCodec c = manager.createCodec(descriptor, "foo", genomeLocParser, null);
|
||||
Assert.assertNotNull(c, "Couldn't create codec");
|
||||
Assert.assertEquals(c.getClass(), descriptor.getCodecClass());
|
||||
Assert.assertEquals(c.getFeatureType(), descriptor.getFeatureClass());
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
|
|||
// We have to disable auto-index creation/locking in the RMDTrackBuilder for tests,
|
||||
// as the lock acquisition calls were intermittently hanging on our farm. This unfortunately
|
||||
// means that we can't include tests for the auto-index creation feature.
|
||||
builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null,true);
|
||||
builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null,true,null);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ public class TestRMDTrackBuilder extends RMDTrackBuilder {
|
|||
|
||||
public TestRMDTrackBuilder(SAMSequenceDictionary dict, GenomeLocParser genomeLocParser) {
|
||||
// disable auto-index creation/locking in the RMDTrackBuilder for tests
|
||||
super(dict, genomeLocParser, null, true);
|
||||
super(dict, genomeLocParser, null, true, null);
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
}
|
||||
|
||||
|
|
@ -55,7 +55,7 @@ public class TestRMDTrackBuilder extends RMDTrackBuilder {
|
|||
String name = fileDescriptor.getName();
|
||||
File inputFile = new File(fileDescriptor.getFile());
|
||||
FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByTriplet(fileDescriptor);
|
||||
FeatureCodec codec = getFeatureManager().createCodec(descriptor, name, genomeLocParser);
|
||||
FeatureCodec codec = getFeatureManager().createCodec(descriptor, name, genomeLocParser, null);
|
||||
TestFeatureReader featureReader;
|
||||
Index index;
|
||||
try {
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -3,23 +3,23 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>picard</artifactId>
|
||||
<version>1.110.1773</version>
|
||||
<version>1.111.1902</version>
|
||||
<name>picard</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>sam</artifactId>
|
||||
<version>1.110.1773</version>
|
||||
<version>1.111.1902</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.broadinstitute</groupId>
|
||||
<artifactId>variant</artifactId>
|
||||
<version>1.110.1773</version>
|
||||
<version>1.111.1902</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.broad</groupId>
|
||||
<artifactId>tribble</artifactId>
|
||||
<version>1.110.1773</version>
|
||||
<version>1.111.1902</version>
|
||||
</dependency>
|
||||
<!-- TODO: Picard is using a custom zip with just ant's BZip2 classes. See also: http://www.kohsuke.org/bzip2 -->
|
||||
<dependency>
|
||||
Binary file not shown.
|
|
@ -3,7 +3,7 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>sam</artifactId>
|
||||
<version>1.110.1773</version>
|
||||
<version>1.111.1902</version>
|
||||
<name>sam-jdk</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
Binary file not shown.
|
|
@ -3,13 +3,13 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.broad</groupId>
|
||||
<artifactId>tribble</artifactId>
|
||||
<version>1.110.1773</version>
|
||||
<version>1.111.1902</version>
|
||||
<name>tribble</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>sam</artifactId>
|
||||
<version>1.110.1773</version>
|
||||
<version>1.111.1902</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
Binary file not shown.
|
|
@ -3,18 +3,18 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.broadinstitute</groupId>
|
||||
<artifactId>variant</artifactId>
|
||||
<version>1.110.1773</version>
|
||||
<version>1.111.1902</version>
|
||||
<name>variant</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.broad</groupId>
|
||||
<artifactId>tribble</artifactId>
|
||||
<version>1.110.1773</version>
|
||||
<version>1.111.1902</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>net.sf</groupId>
|
||||
<artifactId>sam</artifactId>
|
||||
<version>1.110.1773</version>
|
||||
<version>1.111.1902</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
|
@ -43,7 +43,7 @@
|
|||
<test.args>-Xmx${test.maxmemory} -XX:+UseParallelOldGC -XX:ParallelGCThreads=${java.gc.threads} -XX:GCTimeLimit=${java.gc.timeLimit} -XX:GCHeapFreeLimit=${java.gc.heapFreeLimit}</test.args>
|
||||
|
||||
<!-- Version numbers for picard sam-jdk. Usually kept in sync. -->
|
||||
<picard.public.version>1.110.1773</picard.public.version>
|
||||
<picard.public.version>1.111.1902</picard.public.version>
|
||||
<sam.version>${picard.public.version}</sam.version>
|
||||
<picard.version>${picard.public.version}</picard.version>
|
||||
<variant.version>${picard.public.version}</variant.version>
|
||||
|
|
|
|||
Loading…
Reference in New Issue