diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 8df294b21..7c807cf36 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -758,13 +758,18 @@ public class GenomeAnalysisEngine { validateSuppliedReads(); initializeReadTransformers(walker); - readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference()); + final Map sampleRenameMap = argCollection.sampleRenameMappingFile != null ? + loadSampleRenameMap(argCollection.sampleRenameMappingFile) : + null; + + readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference(), sampleRenameMap); for (ReadFilter filter : filters) filter.initialize(this); // set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference - rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe); + rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(), + genomeLocParser,argCollection.unsafe,sampleRenameMap); } /** @@ -846,7 +851,8 @@ public class GenomeAnalysisEngine { * @param refReader reader * @return A data source for the given set of reads. */ - private SAMDataSource createReadsDataSource(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) { + private SAMDataSource createReadsDataSource(final GATKArgumentCollection argCollection, final GenomeLocParser genomeLocParser, + final IndexedFastaSequenceFile refReader, final Map sampleRenameMap) { DownsamplingMethod downsamplingMethod = getDownsamplingMethod(); // Synchronize the method back into the collection so that it shows up when @@ -865,10 +871,6 @@ public class GenomeAnalysisEngine { final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker; - final Map sampleRenameMap = argCollection.sampleRenameMappingFile != null ? - loadSampleRenameMap(argCollection.sampleRenameMappingFile) : - null; - return new SAMDataSource( samReaderIDs, threadAllocation, @@ -892,19 +894,19 @@ public class GenomeAnalysisEngine { * Loads a user-provided sample rename map file for use in on-the-fly sample renaming into an in-memory * HashMap. This file must consist of lines with two whitespace-separated fields: * - * absolute_path_to_bam_file new_sample_name + * absolute_path_to_file new_sample_name * - * The engine will verify that each bam file contains reads from only one sample when the on-the-fly sample - * renaming feature is being used. + * The engine will verify that each file contains data from only one sample when the on-the-fly sample + * renaming feature is being used. Note that this feature works only with bam and vcf files. * * @param sampleRenameMapFile sample rename map file from which to load data - * @return a HashMap containing the contents of the map file, with the keys being the bam file paths and + * @return a HashMap containing the contents of the map file, with the keys being the input file paths and * the values being the new sample names. */ - protected Map loadSampleRenameMap( final File sampleRenameMapFile ) { - logger.info("Renaming samples from BAM files on-the-fly using mapping file " + sampleRenameMapFile.getAbsolutePath()); + protected Map loadSampleRenameMap( final File sampleRenameMapFile ) { + logger.info("Renaming samples from input files on-the-fly using mapping file " + sampleRenameMapFile.getAbsolutePath()); - final Map sampleRenameMap = new HashMap<>((int)sampleRenameMapFile.length() / 50); + final Map sampleRenameMap = new HashMap<>((int)sampleRenameMapFile.length() / 50); try { for ( final String line : new XReadLines(sampleRenameMapFile) ) { @@ -916,21 +918,21 @@ public class GenomeAnalysisEngine { tokens.length, line)); } - final File bamFile = new File(tokens[0]); + final File inputFile = new File(tokens[0]); final String newSampleName = tokens[1]; - if ( ! bamFile.isAbsolute() ) { - throw new UserException.MalformedFile(sampleRenameMapFile, "Bam file path not absolute at line: " + line); + if ( ! inputFile.isAbsolute() ) { + throw new UserException.MalformedFile(sampleRenameMapFile, "Input file path not absolute at line: " + line); } - final SAMReaderID bamID = new SAMReaderID(bamFile, new Tags()); + final String inputFilePath = inputFile.getAbsolutePath(); - if ( sampleRenameMap.containsKey(bamID) ) { + if ( sampleRenameMap.containsKey(inputFilePath) ) { throw new UserException.MalformedFile(sampleRenameMapFile, - String.format("Bam file %s appears more than once", bamFile.getAbsolutePath())); + String.format("Input file %s appears more than once", inputFilePath)); } - sampleRenameMap.put(bamID, newSampleName); + sampleRenameMap.put(inputFilePath, newSampleName); } } catch ( FileNotFoundException e ) { @@ -958,15 +960,18 @@ public class GenomeAnalysisEngine { * @param sequenceDictionary GATK-wide sequnce dictionary to use for validation. * @param genomeLocParser to use when creating and validating GenomeLocs. * @param validationExclusionType potentially indicate which validations to include / exclude. + * @param sampleRenameMap map of file -> new sample name used when doing on-the-fly sample renaming * * @return A list of reference-ordered data sources. */ - private List getReferenceOrderedDataSources(Collection referenceMetaDataFiles, - SAMSequenceDictionary sequenceDictionary, - GenomeLocParser genomeLocParser, - ValidationExclusion.TYPE validationExclusionType) { + private List getReferenceOrderedDataSources(final Collection referenceMetaDataFiles, + final SAMSequenceDictionary sequenceDictionary, + final GenomeLocParser genomeLocParser, + final ValidationExclusion.TYPE validationExclusionType, + final Map sampleRenameMap) { final RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, validationExclusionType, - getArguments().disableAutoIndexCreationAndLockingWhenReadingRods); + getArguments().disableAutoIndexCreationAndLockingWhenReadingRods, + sampleRenameMap); final List dataSources = new ArrayList(); for (RMDTriplet fileDescriptor : referenceMetaDataFiles) diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index bb4c2ee82..f333dcffa 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -359,14 +359,18 @@ public class GATKArgumentCollection { */ @Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Keep program records in the SAM header", required = false) public boolean keepProgramRecords = false; + /** - * This option requires that each BAM file listed in the mapping file have only a single sample specified in its header - * (though there may be multiple read groups for that sample). Each line of the mapping file must contain the absolute - * path to a BAM file, followed by whitespace, followed by the new sample name for that BAM file. + * On-the-fly sample renaming works only with single-sample BAM and VCF files. Each line of the mapping file must + * contain the absolute path to a BAM or VCF file, followed by whitespace, followed by the new sample name for that + * BAM or VCF file. The engine will verify at runtime that each BAM/VCF targeted for sample renaming has only + * a single sample specified in its header (though, in the case of BAM files, there may be multiple read groups for + * that sample). */ @Advanced @Argument(fullName = "sample_rename_mapping_file", shortName = "sample_rename_mapping_file", doc = "Rename sample IDs on-the-fly at runtime using the provided mapping file", required = false) public File sampleRenameMappingFile = null; + /** * For expert users only who know what they are doing. We do not support usage of this argument, so we may refuse to help you if you use it and something goes wrong. */ diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 3f49d4759..e8a2455e4 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -132,9 +132,9 @@ public class SAMDataSource { private final Map originalToMergedReadGroupMappings = new HashMap(); /** - * Mapping from bam file ID to new sample name. Used only when doing on-the-fly sample renaming. + * Mapping from input file path to new sample name. Used only when doing on-the-fly sample renaming. */ - private Map sampleRenameMap = null; + private Map sampleRenameMap = null; /** our log, which we want to capture anything from this class */ private static Logger logger = Logger.getLogger(SAMDataSource.class); @@ -253,7 +253,7 @@ public class SAMDataSource { byte defaultBaseQualities, boolean removeProgramRecords, final boolean keepReadsInLIBS, - final Map sampleRenameMap) { + final Map sampleRenameMap) { this.readMetrics = new ReadMetrics(); this.genomeLocParser = genomeLocParser; @@ -879,7 +879,7 @@ public class SAMDataSource { // The remappedSampleName will be null if either no on-the-fly sample renaming was requested, // or the user's sample rename map file didn't contain an entry for this bam file: - final String remappedSampleName = sampleRenameMap != null ? sampleRenameMap.get(readerID) : null; + final String remappedSampleName = sampleRenameMap != null ? sampleRenameMap.get(readerID.getSamFilePath()) : null; // If we've been asked to rename the sample for this bam file, do so now. We'll check to // make sure this bam only contains reads from one sample before proceeding. diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java index 60b6f4683..20e1d4f1c 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java @@ -252,18 +252,28 @@ public class FeatureManager { * * @param descriptor FeatureDescriptor of the Tribble FeatureCodec we want to create * @param name the name to assign this codec + * @param genomeLocParser GenomeLocParser for ReferenceDependentFeatureCodecs + * @param remappedSampleName replacement sample name for single-sample vcfs, or null if we're not performing + * sample name remapping * @return the feature codec itself */ @Requires({"descriptor != null", "name != null", "genomeLocParser != null"}) @Ensures("result != null") - public FeatureCodec createCodec(FeatureDescriptor descriptor, String name, GenomeLocParser genomeLocParser) { + public FeatureCodec createCodec(final FeatureDescriptor descriptor, final String name, final GenomeLocParser genomeLocParser, + final String remappedSampleName) { FeatureCodec codex = pluginManager.createByType(descriptor.getCodecClass()); if ( codex instanceof NameAwareCodec ) ((NameAwareCodec)codex).setName(name); if ( codex instanceof ReferenceDependentFeatureCodec ) ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser); - if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing ) - ((AbstractVCFCodec)codex).disableOnTheFlyModifications(); + if ( codex instanceof AbstractVCFCodec ) { + if ( lenientVCFProcessing ) { + ((AbstractVCFCodec)codex).disableOnTheFlyModifications(); + } + if ( remappedSampleName != null ) { + ((AbstractVCFCodec)codex).setRemappedSampleName(remappedSampleName); + } + } return codex; } diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index df5cf91ca..1fe0a8bed 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -49,6 +49,7 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.util.Map; /** @@ -86,6 +87,9 @@ public class RMDTrackBuilder { // extends PluginManager { // make any file lock acquisition calls on the index files. private final boolean disableAutoIndexCreation; + // Map of file name -> new sample name used when performing on-the-fly sample renaming + private final Map sampleRenameMap; + /** * Construct an RMDTrackerBuilder, allowing the user to define tracks to build after-the-fact. This is generally * used when walkers want to directly manage the ROD system for whatever reason. Before using this constructor, @@ -96,16 +100,19 @@ public class RMDTrackBuilder { // extends PluginManager { * @param disableAutoIndexCreation Do not auto-create index files, and do not use file locking when accessing index files. * UNSAFE in general (because it causes us not to lock index files before reading them) -- * suitable only for test suite use. + * @param sampleRenameMap Map of file name -> new sample name used when performing on-the-fly sample renaming */ public RMDTrackBuilder(final SAMSequenceDictionary dict, final GenomeLocParser genomeLocParser, final ValidationExclusion.TYPE validationExclusionType, - final boolean disableAutoIndexCreation) { + final boolean disableAutoIndexCreation, + final Map sampleRenameMap) { this.dict = dict; this.validationExclusionType = validationExclusionType; this.genomeLocParser = genomeLocParser; this.featureManager = new FeatureManager(GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType)); this.disableAutoIndexCreation = disableAutoIndexCreation; + this.sampleRenameMap = sampleRenameMap; } /** @@ -139,7 +146,7 @@ public class RMDTrackBuilder { // extends PluginManager { else pair = getFeatureSource(descriptor, name, inputFile, fileDescriptor.getStorageType()); if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file"); - return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name)); + return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name, inputFile)); } /** @@ -173,7 +180,7 @@ public class RMDTrackBuilder { // extends PluginManager { try { final File indexFile = null;//new File(inputFile.getAbsoluteFile() + TabixUtils.STANDARD_INDEX_EXTENSION); final SAMSequenceDictionary dict = null; //TabixUtils.getSequenceDictionary(indexFile); - return new Pair<>(AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name)), dict); + return new Pair<>(AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name, inputFile)), dict); } catch (TribbleException e) { throw new UserException(e.getMessage(), e); } @@ -183,10 +190,15 @@ public class RMDTrackBuilder { // extends PluginManager { * add a name to the codec, if it takes one * @param descriptor the class to create a codec for * @param name the name to assign this codec + * @param inputFile input file that we will be decoding * @return the feature codec itself */ - private FeatureCodec createCodec(FeatureManager.FeatureDescriptor descriptor, String name) { - return featureManager.createCodec(descriptor, name, genomeLocParser); + private FeatureCodec createCodec(final FeatureManager.FeatureDescriptor descriptor, final String name, final File inputFile) { + // The remappedSampleName will be null if either no on-the-fly sample renaming was requested, + // or the user's sample rename map file didn't contain an entry for this file: + final String remappedSampleName = sampleRenameMap != null ? sampleRenameMap.get(inputFile.getAbsolutePath()) : null; + + return featureManager.createCodec(descriptor, name, genomeLocParser, remappedSampleName); } /** @@ -210,7 +222,7 @@ public class RMDTrackBuilder { // extends PluginManager { if(canBeIndexed) { try { - Index index = loadIndex(inputFile, createCodec(descriptor, name)); + Index index = loadIndex(inputFile, createCodec(descriptor, name, inputFile)); try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); } catch (ReviewedStingException e) { } @@ -232,7 +244,7 @@ public class RMDTrackBuilder { // extends PluginManager { sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index); } - featureSource = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name), index); + featureSource = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name, inputFile), index); } catch (TribbleException e) { throw new UserException(e.getMessage()); @@ -242,7 +254,7 @@ public class RMDTrackBuilder { // extends PluginManager { } } else { - featureSource = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name), false); + featureSource = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name, inputFile), false); } return new Pair(featureSource,sequenceDictionary); diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java index 3a51a9a6a..10a87adee 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java @@ -593,7 +593,8 @@ public class DepthOfCoverage extends LocusWalker { RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), getToolkit().getGenomeLocParser(), getToolkit().getArguments().unsafe, - getToolkit().getArguments().disableAutoIndexCreationAndLockingWhenReadingRods); + getToolkit().getArguments().disableAutoIndexCreationAndLockingWhenReadingRods, + null); dbsnpIterator = builder.createInstanceOfTrack(VCFCodec.class, new File(dbsnp.dbsnp.getSource())).getIterator(); // Note that we should really use some sort of seekable iterator here so that the search doesn't take forever // (but it's complicated because the hapmap location doesn't match the dbsnp location, so we don't know where to seek to) diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index f1839e6ac..4012a07a9 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -28,20 +28,33 @@ package org.broadinstitute.sting.gatk; import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.util.CloseableIterator; +import org.broad.tribble.readers.LineIterator; import org.broadinstitute.sting.WalkerTest; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.qc.ErrorThrowing; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.vcf.VCFCodec; import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.variant.vcf.VCFHeaderLine; @@ -504,6 +517,91 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { executeTest("testOnTheFlySampleRenamingVerifyWalkerSeesNewSamplesInReads", spec); } + @Test + public void testOnTheFlySampleRenamingSingleSampleVCF() throws IOException { + final File sampleRenameMapFile = createTestSampleRenameMapFile( + Arrays.asList(privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf newSampleForNA12878")); + + final WalkerTestSpec spec = new WalkerTestSpec(" -T CombineVariants" + + " -R " + b37KGReference + + " -V " + privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf" + + " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() + + " -o %s", + 1, + Arrays.asList("")); // No MD5s -- we will inspect the output file manually + + final File outputVCF = executeTest("testOnTheFlySampleRenamingSingleSampleVCF", spec).first.get(0); + verifySampleRenaming(outputVCF, "newSampleForNA12878"); + } + + private void verifySampleRenaming( final File outputVCF, final String newSampleName ) throws IOException { + final Pair> headerAndVCIter = GATKVCFUtils.readAllVCs(outputVCF, new VCFCodec()); + final VCFHeader header = headerAndVCIter.getFirst(); + final GATKVCFUtils.VCIterable iter = headerAndVCIter.getSecond(); + + // Verify that sample renaming occurred at both the header and record levels (checking only the first 10 records): + + Assert.assertEquals(header.getGenotypeSamples().size(), 1, "Wrong number of samples in output vcf header"); + Assert.assertEquals(header.getGenotypeSamples().get(0), newSampleName, "Wrong sample name in output vcf header"); + + int recordCount = 0; + while ( iter.hasNext() && recordCount < 10 ) { + final VariantContext vcfRecord = iter.next(); + Assert.assertEquals(vcfRecord.getSampleNames().size(), 1, "Wrong number of samples in output vcf record"); + Assert.assertEquals(vcfRecord.getSampleNames().iterator().next(), newSampleName, "Wrong sample name in output vcf record"); + recordCount++; + } + } + + @Test + public void testOnTheFlySampleRenamingVerifyWalkerSeesNewSamplesInVCFRecords() throws Exception { + final File sampleRenameMapFile = createTestSampleRenameMapFile( + Arrays.asList(privateTestDir + "samplerenametest_single_sample_gvcf.vcf FOOSAMPLE")); + + final WalkerTestSpec spec = new WalkerTestSpec(" -T OnTheFlySampleRenamingVerifyingRodWalker" + + " -R " + hg19Reference + + " -V " + privateTestDir + "samplerenametest_single_sample_gvcf.vcf" + + " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() + + " --expectedSampleName FOOSAMPLE" + + " -o %s", + 1, + Arrays.asList("")); // No MD5s -- custom walker will throw an exception if there's a problem + + executeTest("testOnTheFlySampleRenamingVerifyWalkerSeesNewSamplesInVCFRecords", spec); + } + + @Test + public void testOnTheFlySampleRenamingMultiSampleVCF() throws Exception { + final File sampleRenameMapFile = createTestSampleRenameMapFile( + Arrays.asList(privateTestDir + "vcf/vcfWithGenotypes.vcf badSample")); + + final WalkerTestSpec spec = new WalkerTestSpec(" -T CombineVariants" + + " -R " + b37KGReference + + " -V " + privateTestDir + "vcf/vcfWithGenotypes.vcf" + + " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() + + " -o %s", + 1, + UserException.class); // expecting a UserException here + + executeTest("testOnTheFlySampleRenamingMultiSampleVCF", spec); + } + + @Test + public void testOnTheFlySampleRenamingSitesOnlyVCF() throws Exception { + final File sampleRenameMapFile = createTestSampleRenameMapFile( + Arrays.asList(privateTestDir + "vcf/vcfWithoutGenotypes.vcf badSample")); + + final WalkerTestSpec spec = new WalkerTestSpec(" -T CombineVariants" + + " -R " + b37KGReference + + " -V " + privateTestDir + "vcf/vcfWithoutGenotypes.vcf" + + " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() + + " -o %s", + 1, + UserException.class); // expecting a UserException here + + executeTest("testOnTheFlySampleRenamingSitesOnlyVCF", spec); + } + private File createTestSampleRenameMapFile( final List contents ) throws IOException { final File mapFile = createTempFile("TestSampleRenameMapFile", ".tmp"); final PrintWriter writer = new PrintWriter(mapFile); @@ -532,4 +630,43 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { public Integer reduceInit() { return 0; } public Integer reduce(Integer value, Integer sum) { return value + sum; } } + + public static class OnTheFlySampleRenamingVerifyingRodWalker extends RodWalker { + @Argument(fullName = "expectedSampleName", shortName = "expectedSampleName", doc = "", required = true) + String expectedSampleName = null; + + @Output + PrintStream out; + + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; + + public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { + if ( tracker == null ) { + return 0; + } + + for ( final VariantContext vc : tracker.getValues(variants, context.getLocation()) ) { + if ( vc.getSampleNames().size() != 1 ) { + throw new IllegalStateException("Encountered a vcf record with num samples != 1"); + } + + final String actualSampleName = vc.getSampleNames().iterator().next(); + if ( ! expectedSampleName.equals(actualSampleName)) { + throw new IllegalStateException(String.format("Encountered vcf record with wrong sample name. Expected %s found %s", + expectedSampleName, actualSampleName)); + } + } + + return 1; + } + + public Integer reduceInit() { + return 0; + } + + public Integer reduce(Integer counter, Integer sum) { + return counter + sum; + } + } } \ No newline at end of file diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java index b10043340..21a18f804 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java @@ -128,7 +128,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { "/foo/bar/second.bam newSample2", "/foo/bar2/third.bam newSample3")); final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); - final Map renameMap = engine.loadSampleRenameMap(mapFile); + final Map renameMap = engine.loadSampleRenameMap(mapFile); Assert.assertEquals(renameMap.size(), 3, "Sample rename map was wrong size after loading from file"); @@ -137,8 +137,8 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { final String expectedKey = expectedResultsIterator.next(); final String expectedValue = expectedResultsIterator.next(); - Assert.assertNotNull(renameMap.get(new SAMReaderID(expectedKey, new Tags())), String.format("Entry for %s not found in sample rename map", expectedKey)); - Assert.assertEquals(renameMap.get(new SAMReaderID(expectedKey, new Tags())), expectedValue, "Wrong value in sample rename map for " + expectedKey); + Assert.assertNotNull(renameMap.get(expectedKey), String.format("Entry for %s not found in sample rename map", expectedKey)); + Assert.assertEquals(renameMap.get(expectedKey), expectedValue, "Wrong value in sample rename map for " + expectedKey); } } @@ -166,7 +166,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { logger.info("Executing test " + testName); final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); - final Map renameMap = engine.loadSampleRenameMap(mapFile); + final Map renameMap = engine.loadSampleRenameMap(mapFile); } private File createTestSampleRenameMapFile( final List contents ) throws IOException { diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java index 1d39f43c6..4d9283beb 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java @@ -85,7 +85,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference)); genomeLocParser = new GenomeLocParser(seq); // disable auto-index creation/locking in the RMDTrackBuilder for tests - builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null,true); + builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null,true,null); } /** diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java index 4a6d14d32..2e72d1679 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java @@ -97,7 +97,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE,new Tags()); // disable auto-index creation/locking in the RMDTrackBuilder for tests - builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null,true); + builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null,true,null); } @Test diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java index 97f9f5cde..579ecd681 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java @@ -153,7 +153,7 @@ public class FeatureManagerUnitTest extends BaseTest { FeatureManager.FeatureDescriptor descriptor = manager.getByName("vcf"); Assert.assertNotNull(descriptor, "Couldn't find VCF feature descriptor!"); - FeatureCodec c = manager.createCodec(descriptor, "foo", genomeLocParser); + FeatureCodec c = manager.createCodec(descriptor, "foo", genomeLocParser, null); Assert.assertNotNull(c, "Couldn't create codec"); Assert.assertEquals(c.getClass(), descriptor.getCodecClass()); Assert.assertEquals(c.getFeatureType(), descriptor.getFeatureClass()); diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java index 4904428d0..dc31d55e1 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java @@ -71,7 +71,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest { // We have to disable auto-index creation/locking in the RMDTrackBuilder for tests, // as the lock acquisition calls were intermittently hanging on our farm. This unfortunately // means that we can't include tests for the auto-index creation feature. - builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null,true); + builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null,true,null); } @Test diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/refdata/utils/TestRMDTrackBuilder.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/refdata/utils/TestRMDTrackBuilder.java index 48e3bbd8c..02b1fa2f7 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/refdata/utils/TestRMDTrackBuilder.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/refdata/utils/TestRMDTrackBuilder.java @@ -46,7 +46,7 @@ public class TestRMDTrackBuilder extends RMDTrackBuilder { public TestRMDTrackBuilder(SAMSequenceDictionary dict, GenomeLocParser genomeLocParser) { // disable auto-index creation/locking in the RMDTrackBuilder for tests - super(dict, genomeLocParser, null, true); + super(dict, genomeLocParser, null, true, null); this.genomeLocParser = genomeLocParser; } @@ -55,7 +55,7 @@ public class TestRMDTrackBuilder extends RMDTrackBuilder { String name = fileDescriptor.getName(); File inputFile = new File(fileDescriptor.getFile()); FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByTriplet(fileDescriptor); - FeatureCodec codec = getFeatureManager().createCodec(descriptor, name, genomeLocParser); + FeatureCodec codec = getFeatureManager().createCodec(descriptor, name, genomeLocParser, null); TestFeatureReader featureReader; Index index; try { diff --git a/public/repo/net/sf/picard/1.110.1773/picard-1.110.1773.jar b/public/repo/net/sf/picard/1.111.1902/picard-1.111.1902.jar similarity index 93% rename from public/repo/net/sf/picard/1.110.1773/picard-1.110.1773.jar rename to public/repo/net/sf/picard/1.111.1902/picard-1.111.1902.jar index 2b83e8e13..94c96b136 100644 Binary files a/public/repo/net/sf/picard/1.110.1773/picard-1.110.1773.jar and b/public/repo/net/sf/picard/1.111.1902/picard-1.111.1902.jar differ diff --git a/public/repo/net/sf/picard/1.110.1773/picard-1.110.1773.pom b/public/repo/net/sf/picard/1.111.1902/picard-1.111.1902.pom similarity index 90% rename from public/repo/net/sf/picard/1.110.1773/picard-1.110.1773.pom rename to public/repo/net/sf/picard/1.111.1902/picard-1.111.1902.pom index a95cdf671..00b43094e 100644 --- a/public/repo/net/sf/picard/1.110.1773/picard-1.110.1773.pom +++ b/public/repo/net/sf/picard/1.111.1902/picard-1.111.1902.pom @@ -3,23 +3,23 @@ 4.0.0 net.sf picard - 1.110.1773 + 1.111.1902 picard net.sf sam - 1.110.1773 + 1.111.1902 org.broadinstitute variant - 1.110.1773 + 1.111.1902 org.broad tribble - 1.110.1773 + 1.111.1902 diff --git a/public/repo/net/sf/sam/1.110.1773/sam-1.110.1773.jar b/public/repo/net/sf/sam/1.111.1902/sam-1.111.1902.jar similarity index 87% rename from public/repo/net/sf/sam/1.110.1773/sam-1.110.1773.jar rename to public/repo/net/sf/sam/1.111.1902/sam-1.111.1902.jar index 686252e01..fa4fd4c24 100644 Binary files a/public/repo/net/sf/sam/1.110.1773/sam-1.110.1773.jar and b/public/repo/net/sf/sam/1.111.1902/sam-1.111.1902.jar differ diff --git a/public/repo/net/sf/sam/1.110.1773/sam-1.110.1773.pom b/public/repo/net/sf/sam/1.111.1902/sam-1.111.1902.pom similarity index 95% rename from public/repo/net/sf/sam/1.110.1773/sam-1.110.1773.pom rename to public/repo/net/sf/sam/1.111.1902/sam-1.111.1902.pom index 3c951c14f..b2a77fb13 100644 --- a/public/repo/net/sf/sam/1.110.1773/sam-1.110.1773.pom +++ b/public/repo/net/sf/sam/1.111.1902/sam-1.111.1902.pom @@ -3,7 +3,7 @@ 4.0.0 net.sf sam - 1.110.1773 + 1.111.1902 sam-jdk diff --git a/public/repo/org/broad/tribble/1.110.1773/tribble-1.110.1773.jar b/public/repo/org/broad/tribble/1.111.1902/tribble-1.111.1902.jar similarity index 95% rename from public/repo/org/broad/tribble/1.110.1773/tribble-1.110.1773.jar rename to public/repo/org/broad/tribble/1.111.1902/tribble-1.111.1902.jar index 1cf68d647..1f4c9c2ac 100644 Binary files a/public/repo/org/broad/tribble/1.110.1773/tribble-1.110.1773.jar and b/public/repo/org/broad/tribble/1.111.1902/tribble-1.111.1902.jar differ diff --git a/public/repo/org/broad/tribble/1.110.1773/tribble-1.110.1773.pom b/public/repo/org/broad/tribble/1.111.1902/tribble-1.111.1902.pom similarity index 87% rename from public/repo/org/broad/tribble/1.110.1773/tribble-1.110.1773.pom rename to public/repo/org/broad/tribble/1.111.1902/tribble-1.111.1902.pom index 7d1233599..2c84f11a5 100644 --- a/public/repo/org/broad/tribble/1.110.1773/tribble-1.110.1773.pom +++ b/public/repo/org/broad/tribble/1.111.1902/tribble-1.111.1902.pom @@ -3,13 +3,13 @@ 4.0.0 org.broad tribble - 1.110.1773 + 1.111.1902 tribble net.sf sam - 1.110.1773 + 1.111.1902 diff --git a/public/repo/org/broadinstitute/variant/1.110.1773/variant-1.110.1773.jar b/public/repo/org/broadinstitute/variant/1.111.1902/variant-1.111.1902.jar similarity index 90% rename from public/repo/org/broadinstitute/variant/1.110.1773/variant-1.110.1773.jar rename to public/repo/org/broadinstitute/variant/1.111.1902/variant-1.111.1902.jar index 5ece61a1b..4e1779cdd 100644 Binary files a/public/repo/org/broadinstitute/variant/1.110.1773/variant-1.110.1773.jar and b/public/repo/org/broadinstitute/variant/1.111.1902/variant-1.111.1902.jar differ diff --git a/public/repo/org/broadinstitute/variant/1.110.1773/variant-1.110.1773.pom b/public/repo/org/broadinstitute/variant/1.111.1902/variant-1.111.1902.pom similarity index 90% rename from public/repo/org/broadinstitute/variant/1.110.1773/variant-1.110.1773.pom rename to public/repo/org/broadinstitute/variant/1.111.1902/variant-1.111.1902.pom index f37c23ede..a8c37196f 100644 --- a/public/repo/org/broadinstitute/variant/1.110.1773/variant-1.110.1773.pom +++ b/public/repo/org/broadinstitute/variant/1.111.1902/variant-1.111.1902.pom @@ -3,18 +3,18 @@ 4.0.0 org.broadinstitute variant - 1.110.1773 + 1.111.1902 variant org.broad tribble - 1.110.1773 + 1.111.1902 net.sf sam - 1.110.1773 + 1.111.1902 org.apache.commons diff --git a/public/sting-root/pom.xml b/public/sting-root/pom.xml index d59b6d415..946147f54 100644 --- a/public/sting-root/pom.xml +++ b/public/sting-root/pom.xml @@ -43,7 +43,7 @@ -Xmx${test.maxmemory} -XX:+UseParallelOldGC -XX:ParallelGCThreads=${java.gc.threads} -XX:GCTimeLimit=${java.gc.timeLimit} -XX:GCHeapFreeLimit=${java.gc.heapFreeLimit} - 1.110.1773 + 1.111.1902 ${picard.public.version} ${picard.public.version} ${picard.public.version}