From ed2fff13aa9a098ddbbe700a21ad2adf9a0642a9 Mon Sep 17 00:00:00 2001 From: ebanks Date: Mon, 4 Jan 2010 02:28:47 +0000 Subject: [PATCH] -Misc improvements to VCF code -Small fix to callset concordance git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2497 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/concordance/NWayVenn.java | 3 ++- .../concordance/SNPGenotypeConcordance.java | 4 ++++ .../gatk/walkers/concordance/SimpleVenn.java | 4 ++++ .../utils/genotype/vcf/VCFGenotypeRecord.java | 22 +++++++++++-------- .../sting/utils/genotype/vcf/VCFUtils.java | 18 ++++++++++----- .../CallsetConcordanceIntegrationTest.java | 10 ++++++++- 6 files changed, 44 insertions(+), 17 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java index c77619207..7bde65b7a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java @@ -25,7 +25,8 @@ public class NWayVenn implements ConcordanceType { TreeSet concordantSamples = new TreeSet(); for ( Entry entry : samplesToRecords.entrySet() ) { - concordantSamples.add(entry.getKey()); + if ( !entry.getValue().isNoCall() ) + concordantSamples.add(entry.getKey()); } StringBuffer tag = new StringBuffer(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java index d4311c485..1a2a1a6bd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java @@ -34,7 +34,11 @@ public class SNPGenotypeConcordance implements ConcordanceType { char refBase = ref.getBase(); Genotype call1 = samplesToRecords.get(sample1); + if ( call1 != null && call1.isNoCall() ) + call1 = null; Genotype call2 = samplesToRecords.get(sample2); + if ( call2 != null && call2.isNoCall() ) + call2 = null; if ( call1 == null || call2 == null ) { if ( call1 != null && call1.isPointGenotype() && call1.isVariant(refBase) ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java index a659712bb..d27ca4a35 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java @@ -29,7 +29,11 @@ public class SimpleVenn implements ConcordanceType { public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { Genotype call1 = samplesToRecords.get(sample1); + if ( call1 != null && call1.isNoCall() ) + call1 = null; Genotype call2 = samplesToRecords.get(sample2); + if ( call2 != null && call2.isNoCall() ) + call2 = null; if ( call1 == null && call2 == null ) return null; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index 74187739f..781e870e0 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -247,19 +247,23 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked { public String toStringEncoding(List altAlleles, String[] genotypeFormatStrings) { StringBuilder builder = new StringBuilder(); builder.append(toGenotypeString(altAlleles)); - for ( String field : genotypeFormatStrings ) { - String value = mFields.get(field); - if ( value == null && field.equals(OLD_DEPTH_KEY) ) + + if ( !isEmptyGenotype() ) { + for ( String field : genotypeFormatStrings ) { + String value = mFields.get(field); + if ( value == null && field.equals(OLD_DEPTH_KEY) ) value = mFields.get(DEPTH_KEY); - if ( value == null ) + if ( value == null ) continue; - builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR); - if (value.equals("")) - builder.append(getMissingFieldValue(field)); - else - builder.append(value); + builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR); + if (value.equals("")) + builder.append(getMissingFieldValue(field)); + else + builder.append(value); + } } + return builder.toString(); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java index 905adcfe4..25ee85e87 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java @@ -139,6 +139,8 @@ public class VCFUtils { int SLODsSeen = 0; double totalFreq = 0.0; int freqsSeen = 0; + String id = null; + String filter = null; for ( RodVCF rod : rods ) { List myGenotypes = rod.getVCFGenotypeRecords(); @@ -158,7 +160,7 @@ public class VCFUtils { if ( confidence > maxConfidence ) maxConfidence = confidence; - if ( rod.hasNonRefAlleleFrequency() ) { + if ( !rod.isReference() && rod.hasNonRefAlleleFrequency() ) { totalFreq += rod.getNonRefAlleleFrequency(); freqsSeen++; } @@ -167,6 +169,12 @@ public class VCFUtils { totalSLOD += rod.getStrandBias(); SLODsSeen++; } + + if ( rod.getID() != null ) + id = rod.getID(); + + if ( rod.hasFilteringCodes() ) + filter = rod.getFilterString(); } Map infoFields = new HashMap(); @@ -178,16 +186,14 @@ public class VCFUtils { infoFields.put(VCFRecord.STRAND_BIAS_KEY, String.format("%.2f", (totalSLOD/(double)SLODsSeen))); if ( freqsSeen > 0 ) infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen))); - - // TODO -- "." and "0" are wrong -- need to use values from the records - + return new VCFRecord(params.getReferenceBase(), params.getContig(), params.getPosition(), - ".", + (id != null ? id : "."), params.getAlternateBases(), maxConfidence, - "0", + (filter != null ? filter : "."), infoFields, params.getFormatString(), params.getGenotypesRecords()); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java index 072c0cee2..120027cc3 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java @@ -30,7 +30,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { public void testNWayVenn() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1, - Arrays.asList("0527ea8ec7de3a144bd0a56db80d62ba")); + Arrays.asList("86d2342fabc8c0916a6d42a29f750ea2")); executeTest("testNWayVenn", spec); } @@ -41,4 +41,12 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { Arrays.asList("6fbe00cb68d2cdc59dfcb79024fd9893")); executeTest("testMulti", spec); } + + @Test + public void testComplex() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString() + " -B set1,VCF," + validationDataLocation + "complexExample.vcf -B set2,VCF," + validationDataLocation + "complexExample.vcf -CT NWayVenn", 1, + Arrays.asList("8b72e557c0dd111738eaa69e9003fb3f")); + executeTest("testComplex", spec); + } } \ No newline at end of file