From c9c3cf477ae57ab28b9dfe6da093c7f0038817db Mon Sep 17 00:00:00 2001 From: ebanks Date: Tue, 10 Nov 2009 02:41:37 +0000 Subject: [PATCH] Based on feedback from Kiran, we know uniquify sample names as sample.rodName (instead of sample.1, sample.2, ...) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2005 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/utils/genotype/vcf/VCFUtils.java | 43 +++++++++++-------- .../CallsetConcordanceIntegrationTest.java | 6 +-- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java index 2858b9b30..c910e36f1 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java @@ -9,6 +9,7 @@ import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.Genotype; import java.util.*; +import java.util.Map.Entry; /** * A set of static utility methods for common operations on VCF files/records. @@ -32,7 +33,7 @@ public class VCFUtils { */ public static void getUniquifiedSamplesFromRods(GenomeAnalysisEngine toolkit, Set samples, Map, String> rodNamesToSampleNames) { - // keep a map of sample name to next available uniquified index + // keep a map of sample name to occurrences encountered HashMap sampleOverlapMap = new HashMap(); // iterate to get all of the sample names @@ -51,39 +52,47 @@ public class VCFUtils { private static void addUniqueSample(Set samples, Map sampleOverlapMap, Map, String> rodNamesToSampleNames, String newSample, String rodName) { - // if it's already a non-unique sample name, give it a unique suffix and increment the value - Integer uniqueIndex = sampleOverlapMap.get(newSample); - if ( uniqueIndex != null ) { - String uniqueName = newSample + "." + uniqueIndex; + // how many occurrences have we seen so far? + Integer occurrences = sampleOverlapMap.get(newSample); + + // if this is the first one, just add it to the list of samples + if ( occurrences == null ) { + samples.add(newSample); + rodNamesToSampleNames.put(new Pair(rodName, newSample), newSample); + sampleOverlapMap.put(newSample, 1); + } + + // if it's already been seen multiple times, give it a unique suffix and increment the value + else if ( occurrences >= 2 ) { + String uniqueName = newSample + "." + rodName; samples.add(uniqueName); rodNamesToSampleNames.put(new Pair(rodName, newSample), uniqueName); - sampleOverlapMap.put(newSample, uniqueIndex + 1); + sampleOverlapMap.put(newSample, occurrences + 1); } // if this is the second occurrence of the sample name, uniquify both of them - else if ( samples.contains(newSample) ) { + else { // occurrences == 2 + + // remove the 1st occurrence, uniquify it, and add it back samples.remove(newSample); - String uniqueName1 = newSample + "." + 1; - samples.add(uniqueName1); - for ( java.util.Map.Entry, String> entry : rodNamesToSampleNames.entrySet() ) { + String uniqueName1 = null; + for ( Entry, String> entry : rodNamesToSampleNames.entrySet() ) { if ( entry.getValue().equals(newSample) ) { + uniqueName1 = newSample + "." + entry.getKey().first; entry.setValue(uniqueName1); break; } } + samples.add(uniqueName1); - String uniqueName2 = newSample + "." + 2; + // add the second one + String uniqueName2 = newSample + "." + rodName; samples.add(uniqueName2); rodNamesToSampleNames.put(new Pair(rodName, newSample), uniqueName2); - sampleOverlapMap.put(newSample, 3); + sampleOverlapMap.put(newSample, 2); } - // otherwise, just add it to the list of samples - else { - samples.add(newSample); - rodNamesToSampleNames.put(new Pair(rodName, newSample), newSample); - } } /** diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java index ca0db4d25..480219bcc 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java @@ -14,7 +14,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { public void testSimpleVenn() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SimpleVenn", 1, - Arrays.asList("0a71c8f06b4179ba59cefad962cd034c")); + Arrays.asList("1b8e26cd30e993da9318abd6475f38d0")); executeTest("testSimpleVenn", spec); } @@ -22,7 +22,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { public void testSNPConcordance() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1, - Arrays.asList("5da8bf664813f0ab8b22070097f6900e")); + Arrays.asList("5a89b8edcdf2e3f469ac354cb1524033")); executeTest("testSNPConcordance", spec); } @@ -30,7 +30,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { public void testNWayVenn() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -B set3,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/CEU.sample.vcf -CT NWayVenn", 1, - Arrays.asList("9da88442eea094da8b6110d8f5ed4408")); + Arrays.asList("1dec083580b75a9c59fcb61426117134")); executeTest("testNWayVenn", spec); } } \ No newline at end of file