Fix sample sort order bug in DepthOfCoverage

Rare bug triggered by hash collision between sample names
 PT 66183936
This commit is contained in:
Phillip Dexheimer 2014-08-05 21:55:34 -04:00
parent 03e7ee6e9c
commit b0c026e671
3 changed files with 31 additions and 3 deletions

View File

@ -1054,7 +1054,7 @@ class CoveragePartitioner {
private Map<DoCOutputType.Partition,List<String>> identifiersByType;
private Set<String> allIdentifiers;
public CoveragePartitioner(Collection<DoCOutputType.Partition> typesToUse, int start, int stop, int nBins) {
coverageProfiles = new HashMap<DoCOutputType.Partition,DepthOfCoverageStats>();
coverageProfiles = new TreeMap<DoCOutputType.Partition,DepthOfCoverageStats>();
identifiersByType = new HashMap<DoCOutputType.Partition,List<String>>();
types = typesToUse;
for ( DoCOutputType.Partition type : types ) {

View File

@ -31,6 +31,7 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
/**
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
@ -111,8 +112,8 @@ public class DepthOfCoverageStats {
public DepthOfCoverageStats(DepthOfCoverageStats cloneMe) {
this.binLeftEndpoints = cloneMe.binLeftEndpoints;
granularHistogramBySample = new HashMap<String,long[]>();
totalCoverages = new HashMap<String,Long>();
granularHistogramBySample = new TreeMap<String,long[]>();
totalCoverages = new TreeMap<String,Long>();
for ( String s : cloneMe.getAllSamples() ) {
granularHistogramBySample.put(s,new long[cloneMe.getHistograms().get(s).length]);
for ( int i = 0; i < granularHistogramBySample.get(s).length; i++ ) {

View File

@ -142,6 +142,33 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
execute("testAdjacentIntervals", spec);
}
@Test
public void testSortOrder() {
// This test came from a user who discovered that the columns and data in the gene_summary file didn't align for the specific
// sample names in these files.
String[] intervals = {"1:1600000-1700000"};
String[] bams = {privateTestDir+"badHashName1.bam", privateTestDir+"badHashName2.bam"};
String cmd = buildRootCmd(b37KGReference, new ArrayList<String>(Arrays.asList(bams)), new ArrayList<String>(Arrays.asList(intervals))) +
" -geneList "+privateTestDir+"refGene_CDK11B.txt";
WalkerTestSpec spec = new WalkerTestSpec(cmd, 0, new ArrayList<String>());
File baseOutputFile = WalkerTest.createTempFile("depthofcoveragesortorder", ".tmp");
spec.setOutputFileLocation(baseOutputFile);
spec.addAuxFile("a148e50f9db207adfd5d5f0f29eb54d8", baseOutputFile);
spec.addAuxFile("7ccd5193a3c035d1cc856cbc89e3daf4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
spec.addAuxFile("2efe59c20721ce61bc5b334a26d11720", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
spec.addAuxFile("9194cec953e0fe0b84a681f9bb63ffbe", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_gene_summary"));
spec.addAuxFile("cf62d95ec1f459fbbe35370c3f0ca481", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
spec.addAuxFile("b4fcb739b7f9e309e38a7d5e7e4ebb9f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
spec.addAuxFile("6bf63f9c62071e850c6f0b6356fb63eb", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
spec.addAuxFile("e53e6a494bf1cf817762b74917c6f0c9", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
execute("testSortOrder", spec);
}
public void testRefNHandling(boolean includeNs, final String md5) {
String command = "-R " + b37KGReference + " -L 20:26,319,565-26,319,575 -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -T DepthOfCoverage -baseCounts --omitIntervalStatistics --omitLocusTable --omitPerSampleStats -o %s";
if ( includeNs ) command += " --includeRefNSites";