From 5fdf17fccb3d0d26f26b4e3edba0d2ea042ad9ea Mon Sep 17 00:00:00 2001 From: ebanks Date: Tue, 29 Dec 2009 17:30:47 +0000 Subject: [PATCH] Removed the VCF "NS" annotation (which wasn't working for pooled calls anyways) since it's ambiguous and not useful. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2465 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/genotyper/UnifiedGenotyper.java | 2 +- .../vcf/VCFGenotypeWriterAdapter.java | 7 ++-- .../sting/utils/genotype/vcf/VCFUtils.java | 2 +- .../CallsetConcordanceIntegrationTest.java | 8 ++-- .../UnifiedGenotyperIntegrationTest.java | 37 +++++++++++++------ 5 files changed, 34 insertions(+), 22 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 5525f50db..e65375685 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -180,7 +180,7 @@ public class UnifiedGenotyper extends LocusWalker infoFields = getInfoFields((VCFVariationCall)locusdata, params); + Map infoFields = getInfoFields((VCFVariationCall)locusdata); // q-score double qual = (locusdata == null) ? 0 : ((VCFVariationCall)locusdata).getConfidence(); @@ -165,11 +165,10 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter { * get the information fields of the VCF record, given the meta data and parameters * * @param locusdata the metadata associated with this multi sample call - * @param params the parameters * * @return a mapping of info field to value */ - private static Map getInfoFields(VCFVariationCall locusdata, VCFParameters params) { + private static Map getInfoFields(VCFVariationCall locusdata) { Map infoFields = new HashMap(); if ( locusdata != null ) { if ( locusdata.getSLOD() != null ) @@ -181,7 +180,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter { infoFields.putAll(otherFields); } } - infoFields.put(VCFRecord.SAMPLE_NUMBER_KEY, String.valueOf(params.getGenotypesRecords().size())); + // no longer used: infoFields.put(VCFRecord.SAMPLE_NUMBER_KEY, String.valueOf(params.getGenotypesRecords().size())); return infoFields; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java index 1ea423556..905adcfe4 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java @@ -171,7 +171,7 @@ public class VCFUtils { Map infoFields = new HashMap(); infoFields.put(VCFRecord.DEPTH_KEY, String.format("%d", totalReadDepth)); - infoFields.put(VCFRecord.SAMPLE_NUMBER_KEY, String.valueOf(params.getGenotypesRecords().size())); + // no longer used: infoFields.put(VCFRecord.SAMPLE_NUMBER_KEY, String.valueOf(params.getGenotypesRecords().size())); // set the overall strand bias and allele frequency to be the average of all entries we've seen if ( SLODsSeen > 0 ) diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java index a8ab8b157..072c0cee2 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java @@ -14,7 +14,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { public void testSimpleVenn() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn", 1, - Arrays.asList("5c8e4757d2ce46bc50991a171f988327")); + Arrays.asList("f4c4b7430f0e293c27ce38cb89b9338b")); executeTest("testSimpleVenn", spec); } @@ -22,7 +22,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { public void testSNPConcordance() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1, - Arrays.asList("b4904813cdfd37f0a092aa6cadcd3f71")); + Arrays.asList("f754e046bc0fa3a4b3430061e412ef0d")); executeTest("testSNPConcordance", spec); } @@ -30,7 +30,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { public void testNWayVenn() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1, - Arrays.asList("cf915dc9762d6a44a7bdadc0d7eae9b8")); + Arrays.asList("0527ea8ec7de3a144bd0a56db80d62ba")); executeTest("testNWayVenn", spec); } @@ -38,7 +38,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { public void testMulti() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn -CT NWayVenn -CT SNPGenotypeConcordance:qscore=5", 1, - Arrays.asList("d046599a2fef386fa0ad5dfc9671c3a9")); + Arrays.asList("6fbe00cb68d2cdc59dfcb79024fd9893")); executeTest("testMulti", spec); } } \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index e51a535e1..2c5434397 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -22,7 +22,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("caeb030b47503e9d79cf1e18b86e8bc9")); + Arrays.asList("94c6c400cbeae33fcd6fea3388fcf73a")); executeTest("testMultiSamplePilot1 - Point Estimate EM", spec); } @@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("f87c182b694a7baeab886d8f75c91e28")); + Arrays.asList("ee14f4328fde95b35e3b1cb919c3712b")); executeTest("testMultiSamplePilot2 - Point Estimate EM", spec); } @@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testPooled1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1, - Arrays.asList("acf8006174a460247fabbc650802c29b")); + Arrays.asList("68a4120d7dc9f1880f41311f095978ea")); executeTest("testPooled1", spec); } @@ -56,7 +56,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("92b32599938bc60d6d636b425c5e0a6c")); + Arrays.asList("4504dd9c77dc502e9acbe687063a82c7")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -64,7 +64,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("cc90c3dd5d30dd4ac0fceb748283ddb9")); + Arrays.asList("15fefcebedae65c1f0c94b8498bc647a")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -72,10 +72,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("d04c02cdbf1e1adbdf84540c861a64f7")); + Arrays.asList("d87b46694da0cc8b0ff82c1c69ee073f")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); } + // -------------------------------------------------------------------------------------------------------------- + // + // testing joint estimation model + // + // -------------------------------------------------------------------------------------------------------------- + @Test + public void testParallelization() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,400,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 -nt 4", 1, + Arrays.asList("bcbdd0369a0621d40bbdd6ef4c13f057")); + executeTest("test parallelization", spec); + } + // -------------------------------------------------------------------------------------------------------------- // // testing parameters @@ -85,11 +98,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testParameter() { HashMap e = new HashMap(); - e.put( "-genotype", "ce888ec4bdd85e24b1129198327ff315" ); - e.put( "-all_bases", "95be4777a8f89f98d0a033fc3d75a3c1" ); - e.put( "--min_base_quality_score 10", "390802b120cba6019af39d4775f504d1" ); - e.put( "--min_mapping_quality_score 10", "f7fe79dace81157bc83c8e4d27e1ae40" ); - e.put( "--max_mismatches_in_40bp_window 5", "0e7741a7a683a6d4d00876372bb70991" ); + e.put( "-genotype", "990d3e9b63310f56bf781959763804ae" ); + e.put( "-all_bases", "6f24401c4b82b270739d596077da8582" ); + e.put( "--min_base_quality_score 10", "2a53a3889fe1c32b066228f749ab4790" ); + e.put( "--min_mapping_quality_score 10", "224c962fc6178059ae36ed9a4d614d26" ); + e.put( "--max_mismatches_in_40bp_window 5", "fa8dd3c00d36ca62a88b5ceeb50ee33b" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -103,7 +116,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1, - Arrays.asList("c8c4a463ab23585d8373f3e8a7fbec22")); + Arrays.asList("13aad04333ef26eca6179221acf8abc0")); executeTest("testConfidence", spec); }