Based on feedback from Kiran, we know uniquify sample names as sample.rodName (instead of sample.1, sample.2, ...)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2005 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-11-10 02:41:37 +00:00
parent 3990c6d950
commit c9c3cf477a
2 changed files with 29 additions and 20 deletions

View File

@ -9,6 +9,7 @@ import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.Genotype;
import java.util.*;
import java.util.Map.Entry;
/**
* A set of static utility methods for common operations on VCF files/records.
@ -32,7 +33,7 @@ public class VCFUtils {
*/
public static void getUniquifiedSamplesFromRods(GenomeAnalysisEngine toolkit, Set<String> samples, Map<Pair<String, String>, String> rodNamesToSampleNames) {
// keep a map of sample name to next available uniquified index
// keep a map of sample name to occurrences encountered
HashMap<String, Integer> sampleOverlapMap = new HashMap<String, Integer>();
// iterate to get all of the sample names
@ -51,39 +52,47 @@ public class VCFUtils {
private static void addUniqueSample(Set<String> samples, Map<String, Integer> sampleOverlapMap, Map<Pair<String, String>, String> rodNamesToSampleNames, String newSample, String rodName) {
// if it's already a non-unique sample name, give it a unique suffix and increment the value
Integer uniqueIndex = sampleOverlapMap.get(newSample);
if ( uniqueIndex != null ) {
String uniqueName = newSample + "." + uniqueIndex;
// how many occurrences have we seen so far?
Integer occurrences = sampleOverlapMap.get(newSample);
// if this is the first one, just add it to the list of samples
if ( occurrences == null ) {
samples.add(newSample);
rodNamesToSampleNames.put(new Pair<String, String>(rodName, newSample), newSample);
sampleOverlapMap.put(newSample, 1);
}
// if it's already been seen multiple times, give it a unique suffix and increment the value
else if ( occurrences >= 2 ) {
String uniqueName = newSample + "." + rodName;
samples.add(uniqueName);
rodNamesToSampleNames.put(new Pair<String, String>(rodName, newSample), uniqueName);
sampleOverlapMap.put(newSample, uniqueIndex + 1);
sampleOverlapMap.put(newSample, occurrences + 1);
}
// if this is the second occurrence of the sample name, uniquify both of them
else if ( samples.contains(newSample) ) {
else { // occurrences == 2
// remove the 1st occurrence, uniquify it, and add it back
samples.remove(newSample);
String uniqueName1 = newSample + "." + 1;
samples.add(uniqueName1);
for ( java.util.Map.Entry<Pair<String, String>, String> entry : rodNamesToSampleNames.entrySet() ) {
String uniqueName1 = null;
for ( Entry<Pair<String, String>, String> entry : rodNamesToSampleNames.entrySet() ) {
if ( entry.getValue().equals(newSample) ) {
uniqueName1 = newSample + "." + entry.getKey().first;
entry.setValue(uniqueName1);
break;
}
}
samples.add(uniqueName1);
String uniqueName2 = newSample + "." + 2;
// add the second one
String uniqueName2 = newSample + "." + rodName;
samples.add(uniqueName2);
rodNamesToSampleNames.put(new Pair<String, String>(rodName, newSample), uniqueName2);
sampleOverlapMap.put(newSample, 3);
sampleOverlapMap.put(newSample, 2);
}
// otherwise, just add it to the list of samples
else {
samples.add(newSample);
rodNamesToSampleNames.put(new Pair<String, String>(rodName, newSample), newSample);
}
}
/**

View File

@ -14,7 +14,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
public void testSimpleVenn() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SimpleVenn", 1,
Arrays.asList("0a71c8f06b4179ba59cefad962cd034c"));
Arrays.asList("1b8e26cd30e993da9318abd6475f38d0"));
executeTest("testSimpleVenn", spec);
}
@ -22,7 +22,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
public void testSNPConcordance() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1,
Arrays.asList("5da8bf664813f0ab8b22070097f6900e"));
Arrays.asList("5a89b8edcdf2e3f469ac354cb1524033"));
executeTest("testSNPConcordance", spec);
}
@ -30,7 +30,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
public void testNWayVenn() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -B set3,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/CEU.sample.vcf -CT NWayVenn", 1,
Arrays.asList("9da88442eea094da8b6110d8f5ed4408"));
Arrays.asList("1dec083580b75a9c59fcb61426117134"));
executeTest("testNWayVenn", spec);
}
}