Merge pull request #611 from broadinstitute/mm_otf_sample_rename_support_whitespacing_sample_names
Allow for whitespace in sample names when performing on-the-fly sample-renaming.
This commit is contained in:
commit
58c8b2dd84
|
|
@ -64,6 +64,7 @@ import org.broadinstitute.sting.utils.progressmeter.ProgressMeter;
|
|||
import org.broadinstitute.sting.utils.recalibration.BQSRArgumentSet;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -892,7 +893,8 @@ public class GenomeAnalysisEngine {
|
|||
|
||||
/**
|
||||
* Loads a user-provided sample rename map file for use in on-the-fly sample renaming into an in-memory
|
||||
* HashMap. This file must consist of lines with two whitespace-separated fields:
|
||||
* HashMap. This file must consist of lines with two whitespace-separated fields, the second of which
|
||||
* may contain whitespace:
|
||||
*
|
||||
* absolute_path_to_file new_sample_name
|
||||
*
|
||||
|
|
@ -910,7 +912,7 @@ public class GenomeAnalysisEngine {
|
|||
|
||||
try {
|
||||
for ( final String line : new XReadLines(sampleRenameMapFile) ) {
|
||||
final String[] tokens = line.split("\\s+");
|
||||
final String[] tokens = line.split("\\s+", 2);
|
||||
|
||||
if ( tokens.length != 2 ) {
|
||||
throw new UserException.MalformedFile(sampleRenameMapFile,
|
||||
|
|
@ -919,7 +921,16 @@ public class GenomeAnalysisEngine {
|
|||
}
|
||||
|
||||
final File inputFile = new File(tokens[0]);
|
||||
final String newSampleName = tokens[1];
|
||||
final String newSampleName = tokens[1].trim();
|
||||
|
||||
if (newSampleName.contains(VCFConstants.FIELD_SEPARATOR)) {
|
||||
throw new UserException.MalformedFile(sampleRenameMapFile, String.format(
|
||||
"Encountered illegal sample name; sample names may not include the VCF field delimiter (%s). Sample name: %s; line: %s",
|
||||
VCFConstants.FIELD_SEPARATOR,
|
||||
newSampleName,
|
||||
line
|
||||
));
|
||||
}
|
||||
|
||||
if ( ! inputFile.isAbsolute() ) {
|
||||
throw new UserException.MalformedFile(sampleRenameMapFile, "Input file path not absolute at line: " + line);
|
||||
|
|
|
|||
|
|
@ -363,9 +363,10 @@ public class GATKArgumentCollection {
|
|||
/**
|
||||
* On-the-fly sample renaming works only with single-sample BAM and VCF files. Each line of the mapping file must
|
||||
* contain the absolute path to a BAM or VCF file, followed by whitespace, followed by the new sample name for that
|
||||
* BAM or VCF file. The engine will verify at runtime that each BAM/VCF targeted for sample renaming has only
|
||||
* a single sample specified in its header (though, in the case of BAM files, there may be multiple read groups for
|
||||
* that sample).
|
||||
* BAM or VCF file. The sample name may contain non-tab whitespace, but leading or trailing whitespace will be
|
||||
* ignored. The engine will verify at runtime that each BAM/VCF targeted for sample renaming has only a single
|
||||
* sample specified in its header (though, in the case of BAM files, there may be multiple read groups for that
|
||||
* sample).
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "sample_rename_mapping_file", shortName = "sample_rename_mapping_file", doc = "Rename sample IDs on-the-fly at runtime using the provided mapping file", required = false)
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.commandline.ArgumentException;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
|
|
@ -126,13 +125,21 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
public void testLoadWellFormedSampleRenameMapFile() throws IOException {
|
||||
final File mapFile = createTestSampleRenameMapFile(Arrays.asList("/foo/bar/first.bam newSample1",
|
||||
"/foo/bar/second.bam newSample2",
|
||||
"/foo/bar2/third.bam newSample3"));
|
||||
"/foo/bar2/third.bam newSample3",
|
||||
"/foo/bar2/fourth.bam new sample 4",
|
||||
"/foo/bar2/fifth.bam new sample 5 "));
|
||||
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
final Map<String, String> renameMap = engine.loadSampleRenameMap(mapFile);
|
||||
|
||||
Assert.assertEquals(renameMap.size(), 3, "Sample rename map was wrong size after loading from file");
|
||||
Assert.assertEquals(renameMap.size(), 5, "Sample rename map was wrong size after loading from file");
|
||||
|
||||
final Iterator<String> expectedResultsIterator = Arrays.asList("/foo/bar/first.bam", "newSample1", "/foo/bar/second.bam", "newSample2", "/foo/bar2/third.bam", "newSample3").iterator();
|
||||
final Iterator<String> expectedResultsIterator = Arrays.asList(
|
||||
"/foo/bar/first.bam", "newSample1",
|
||||
"/foo/bar/second.bam", "newSample2",
|
||||
"/foo/bar2/third.bam", "newSample3",
|
||||
"/foo/bar2/fourth.bam", "new sample 4",
|
||||
"/foo/bar2/fifth.bam", "new sample 5"
|
||||
).iterator();
|
||||
while ( expectedResultsIterator.hasNext() ) {
|
||||
final String expectedKey = expectedResultsIterator.next();
|
||||
final String expectedValue = expectedResultsIterator.next();
|
||||
|
|
@ -148,16 +155,15 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
|
||||
tests.add(new Object[]{"testLoadSampleRenameMapFileNonExistentFile",
|
||||
new File("/foo/bar/nonexistent")});
|
||||
tests.add(new Object[]{"testLoadSampleRenameMapFileMalformedLine1",
|
||||
tests.add(new Object[]{"testLoadSampleRenameMapFileMalformedLine",
|
||||
createTestSampleRenameMapFile(Arrays.asList("/path/to/foo.bam"))});
|
||||
tests.add(new Object[]{"testLoadSampleRenameMapFileMalformedLine2",
|
||||
createTestSampleRenameMapFile(Arrays.asList("/path/to/foo.bam newSample extraField"))});
|
||||
tests.add(new Object[]{"testLoadSampleRenameMapFileNonAbsoluteBamPath",
|
||||
createTestSampleRenameMapFile(Arrays.asList("relative/path/to/foo.bam newSample"))});
|
||||
tests.add(new Object[]{"testLoadSampleRenameMapFileDuplicateBamPath",
|
||||
createTestSampleRenameMapFile(Arrays.asList("/path/to/dupe.bam newSample1",
|
||||
"/path/to/dupe.bam newSample2"))});
|
||||
|
||||
tests.add(new Object[]{"testLoadSampleRenameMapFileTabInSampleName",
|
||||
createTestSampleRenameMapFile(Arrays.asList("/path/to/stuff.bam some wonky\tsample "))});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue