From 94f5edb68a2274f4752841acef60b48ffbcfb09d Mon Sep 17 00:00:00 2001 From: ebanks Date: Fri, 18 Dec 2009 04:14:14 +0000 Subject: [PATCH] 1. Fixed VCFGenotypeRecord bug (it needs to emit fields in the order specified by the GenotypeFormatString) 2. isNoCall() added to Genotype interface so that we can distinguish between ref and no calls (all we had before was isVariant()) 3. Added Hardy-Weinberg annotation; still experimental - not working yet so don't use it. 4. Move 'output type' argument out of the UnifiedArgumentCollection and into the UnifiedGenotyper, in preparation for parallelization. 5. Improved some of the UG integration tests. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2398 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/annotator/HardyWeinberg.java | 52 +++++++++++++++++++ .../genotyper/GenotypeCalculationModel.java | 6 ++- .../GenotypeCalculationModelFactory.java | 9 ++-- .../genotyper/UnifiedArgumentCollection.java | 4 -- .../walkers/genotyper/UnifiedGenotyper.java | 11 ++-- .../sting/utils/genotype/BasicGenotype.java | 2 + .../sting/utils/genotype/Genotype.java | 7 +++ .../utils/genotype/geli/GeliGenotypeCall.java | 2 + .../utils/genotype/glf/GLFGenotypeCall.java | 2 + .../utils/genotype/vcf/VCFGenotypeCall.java | 3 ++ .../utils/genotype/vcf/VCFGenotypeRecord.java | 23 ++++++-- .../sting/utils/genotype/vcf/VCFRecord.java | 7 +-- .../VariantAnnotatorIntegrationTest.java | 16 +++--- .../UnifiedGenotyperIntegrationTest.java | 41 ++++++++------- .../utils/genotype/vcf/VCFReaderTest.java | 4 +- 15 files changed, 140 insertions(+), 49 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java new file mode 100755 index 000000000..7fedcdc08 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -0,0 +1,52 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; +import org.broadinstitute.sting.utils.genotype.*; +import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.QualityUtils; + +import java.util.List; +import java.util.Map; + + +public class HardyWeinberg implements VariantAnnotation { + + private static final int MIN_SAMPLES = 10; + private static final int MIN_GENOTYPE_QUALITY = 10; + private static final int MIN_NEG_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10; + + public String annotate(ReferenceContext ref, Map stratifiedContexts, Variation variation) { + + if ( !(variation instanceof VariantBackedByGenotype) ) + return null; + final List genotypes = ((VariantBackedByGenotype)variation).getGenotypes(); + if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) + return null; + + int refCount = 0; + int hetCount = 0; + int homCount = 0; + for ( Genotype genotype : genotypes ) { + if ( genotype.isNoCall() ) + continue; + if ( genotype.getNegLog10PError() < MIN_NEG_LOG10_PERROR ) + continue; + + if ( !genotype.isVariant(ref.getBase()) ) + refCount++; + else if ( genotype.isHet() ) + hetCount++; + else + homCount++; + } + + double pvalue = HardyWeinbergCalculation.hwCalculate(refCount, hetCount, homCount); + System.out.println(refCount + " " + hetCount + " " + homCount + " " + pvalue); + return String.format("%.1f", QualityUtils.phredScaleErrorRate(pvalue)); + } + + public String getKeyName() { return "HW"; } + + public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("HW", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Phred-scaled p-value for Hardy-Weinberg violation"); } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index 902b79f72..fe8b9d471 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -49,16 +49,18 @@ public abstract class GenotypeCalculationModel implements Cloneable { * @param samples samples in input bam * @param logger logger * @param UAC unified arg collection + * @param outputFormat output format */ protected void initialize(Set samples, Logger logger, - UnifiedArgumentCollection UAC) { + UnifiedArgumentCollection UAC, + GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat) { this.samples = new TreeSet(samples); this.logger = logger; baseModel = UAC.baseModel; heterozygosity = UAC.heterozygosity; defaultPlatform = UAC.defaultPlatform; - OUTPUT_FORMAT = UAC.VAR_FORMAT; + OUTPUT_FORMAT = outputFormat; ALL_BASE_MODE = UAC.ALL_BASES; GENOTYPE_MODE = UAC.GENOTYPE; POOL_SIZE = UAC.POOLSIZE; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java index 68ac7ddc3..2d0eaec44 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import static org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model.*; +import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import org.apache.log4j.Logger; import java.util.Set; @@ -45,14 +46,16 @@ public class GenotypeCalculationModelFactory { /** * General and correct way to create GenotypeCalculationModel objects for arbitrary models * - * @param UAC The unified argument collection * @param samples samples in bam * @param logger logger + * @param UAC the unified argument collection + * @param outputFormat the output format * @return model */ public static GenotypeCalculationModel makeGenotypeCalculation(Set samples, Logger logger, - UnifiedArgumentCollection UAC) { + UnifiedArgumentCollection UAC, + GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat) { GenotypeCalculationModel gcm; switch ( UAC.genotypeModel ) { case EM_POINT_ESTIMATE: @@ -67,7 +70,7 @@ public class GenotypeCalculationModelFactory { default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel); } - gcm.initialize(samples, logger, UAC); + gcm.initialize(samples, logger, UAC, outputFormat); return gcm; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 0c9dbb7e9..175bce691 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; public class UnifiedArgumentCollection { @@ -45,9 +44,6 @@ public class UnifiedArgumentCollection { public int POOLSIZE = 0; // control the output - @Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false) - public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; - @Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false) public boolean GENOTYPE = false; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index b45430593..c7f7653b5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -57,6 +57,9 @@ public class UnifiedGenotyper extends LocusWalker headerInfo = new HashSet(); // this is only applicable to VCF - if ( UAC.VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.VCF ) + if ( VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.VCF ) return headerInfo; // first, the basic info diff --git a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java index c51fc2498..df6025729 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java @@ -91,6 +91,8 @@ public class BasicGenotype implements Genotype { return true; } + public boolean isNoCall() { return false; } + /** * Returns true if observed allele bases differ (regardless of whether they are ref or alt) * diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java index 977d14df3..d48a8e31f 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java @@ -45,6 +45,13 @@ public interface Genotype { */ public boolean isHet(); + /** + * Returns true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF) + * + * @return true if we're het, false otherwise + */ + public boolean isNoCall(); + /** * get the genotype's location * diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java index cca345c32..1851e73c5 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java @@ -205,6 +205,8 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot return !isHom(); } + public boolean isNoCall() { return false; } + /** * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base * is located right after the specified location diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java index 29f5f1812..043160302 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java @@ -126,6 +126,8 @@ public class GLFGenotypeCall implements GenotypeCall, ReadBacked, LikelihoodsBac return !isHom(); } + public boolean isNoCall() { return false; } + /** * * @return return this genotype as a variant diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java index 4dbff559a..833deee69 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java @@ -129,6 +129,9 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty return !isHom(); } + // You can't make a 'no call' genotype call + public boolean isNoCall() { return false; } + /** * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base * is located right after the specified location diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index 19b00a8db..74187739f 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -176,6 +176,14 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked { return !isHom(); } + public boolean isNoCall() { + for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) { + if ( encoding.getType() != VCFGenotypeEncoding.TYPE.UNCALLED ) + return false; + } + return true; + } + public int getPloidy() { return 2; } @@ -232,18 +240,25 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked { * output a string representation of the VCFGenotypeRecord, given the alternate alleles * * @param altAlleles the alternate alleles, needed for toGenotypeString() + * @param genotypeFormatStrings genotype format strings * * @return a string */ - public String toStringEncoding(List altAlleles) { + public String toStringEncoding(List altAlleles, String[] genotypeFormatStrings) { StringBuilder builder = new StringBuilder(); builder.append(toGenotypeString(altAlleles)); - for (String field : mFields.keySet()) { + for ( String field : genotypeFormatStrings ) { + String value = mFields.get(field); + if ( value == null && field.equals(OLD_DEPTH_KEY) ) + value = mFields.get(DEPTH_KEY); + if ( value == null ) + continue; + builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR); - if (mFields.get(field).equals("")) + if (value.equals("")) builder.append(getMissingFieldValue(field)); else - builder.append(mFields.get(field)); + builder.append(value); } return builder.toString(); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index 5072626f1..b28a59dfe 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -114,7 +114,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { */ public VCFRecord(Map columnValues) { extractFields(columnValues); - mGenotypeFormatString = null; + mGenotypeFormatString = ""; } /** @@ -553,17 +553,18 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { * @param header the header object */ private void addGenotypeData(StringBuilder builder, VCFHeader header) { - builder.append(FIELD_SEPERATOR + this.getGenotypeFormatString()); + builder.append(FIELD_SEPERATOR + mGenotypeFormatString); if (header.getGenotypeSamples().size() < getGenotypes().size()) throw new RuntimeException("We have more genotype samples than the header specified"); Map gMap = genotypeListToMap(getGenotypes()); + String[] genotypeFormatStrings = mGenotypeFormatString.split(":"); for (String genotype : header.getGenotypeSamples()) { builder.append(FIELD_SEPERATOR); if (gMap.containsKey(genotype)) { VCFGenotypeRecord rec = gMap.get(genotype); - builder.append(rec.toStringEncoding(this.mAlts)); + builder.append(rec.toStringEncoding(this.mAlts, genotypeFormatStrings)); gMap.remove(genotype); } else { builder.append(VCFGenotypeRecord.EMPTY_GENOTYPE); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 44057e592..f50629949 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -50,7 +50,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("5c73e85a91bf8f8e9c47c445ec77233c")); + Arrays.asList("fecfec68226bbd9b458ede55d48e0762")); executeTest("test file has annotations, not asking for annotations, #1", spec); } @@ -58,7 +58,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("5298c24e956361d209f14ac6138a3bbd")); + Arrays.asList("59845ada9dc5bc66e0042cfefdf8f16f")); executeTest("test file has annotations, not asking for annotations, #2", spec); } @@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("2d3e220e96eb00c4c7bb2dbc8353d9bd")); + Arrays.asList("1b9815b89dde8a705b3744527fe1200a")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("d0e70ee36ed1a59b5f086bc30c9b2673")); + Arrays.asList("618056046f1ce83afad9effdebe1a74e")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("acef97201f2d17007dbdee6c639536ea")); + Arrays.asList("c70cc6bb6b83748ec8d968dc3bf879c4")); executeTest("test file doesn't have annotations, not asking for annotations, #1", spec); } @@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("aaae89c48fcab615fe4204220ec62859")); + Arrays.asList("e7c8900ff9a18f2c8a033ae741e7143b")); executeTest("test file doesn't have annotations, not asking for annotations, #2", spec); } @@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("2031599f8554d16342f53cb80fae296a")); + Arrays.asList("85ec66095c67865be9777c7c93ceb263")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("2b47c7a6a7f0ce15fd1d1dd02ecab73b")); + Arrays.asList("97da2e0699faaa17ea85e03ce3cd61fa")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index cf26ce48d..0fb643c29 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -22,7 +22,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("b19f85fddd08485c668e7192df79d944")); + Arrays.asList("44942d4e56aec62520903d297cee5fd8")); executeTest("testMultiSamplePilot1 - Point Estimate EM", spec); } @@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("a6c8e77d1741f3f6d958a0634fa59e14")); + Arrays.asList("aabea51b271df324eb543bc3417c0a42")); executeTest("testMultiSamplePilot2 - Point Estimate EM", spec); } @@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testPooled1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1, - Arrays.asList("459b8f6a6265b67718495e9bffe96fa8")); + Arrays.asList("a733878ce9c257148a224aa00c0e63e9")); executeTest("testPooled1", spec); } @@ -56,7 +56,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("c132c93cec5fdf02c3235180a7aa7dcc")); + Arrays.asList("b68ad77195f6e926c4fce5f1a91f3bc9")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -64,7 +64,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("09d48c56eae25fb6418e10feeb5b3fc5")); + Arrays.asList("b63c43ba827cfc74bb8ef679ce3d847d")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -72,7 +72,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("3544b1eb97834e2050239c101eaddb2d")); + Arrays.asList("8ae0be3b65c83d973519abad858c0073")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); } @@ -85,11 +85,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testParameter() { HashMap e = new HashMap(); - e.put( "-genotype", "dc908834d97bde696b6bdccf756c9ab6" ); - e.put( "-all_bases", "484d424cbcd646b3c5d823cad43f4f5f" ); - e.put( "--min_base_quality_score 10", "45e336766e94f05e6ac44d1e50473e18" ); - e.put( "--min_mapping_quality_score 10", "de417c1156483ab87b0f8a25f10d4d87" ); - e.put( "--max_mismatches_in_40bp_window 5", "05132003c73e10fbdab2af14af677764" ); + e.put( "-genotype", "c984952b91193d9550066f9514434183" ); + e.put( "-all_bases", "0062942f2323ea0f2c9d3a3739a28d20" ); + e.put( "--min_base_quality_score 10", "8a645a76d85e8a6edda6acdf8071ee6b" ); + e.put( "--min_mapping_quality_score 10", "cf6c38de9872bdbf5da1a22d8c962ebe" ); + e.put( "--max_mismatches_in_40bp_window 5", "e21a319eb0efbe11c3c1eeea7c9c0144" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -103,7 +103,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1, - Arrays.asList("ea154d6ab6bbaf40da0dac5162b7e9fd")); + Arrays.asList("6823ce46547cf9693cda040dcbd4c9c1")); executeTest("testConfidence", spec); } @@ -117,7 +117,6 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testOtherFormat() { HashMap e = new HashMap(); e.put( "GLF", "8c72131dfb2b830efb9938a582672a3e" ); - e.put( "GELI", "e9e00bdb32ce63420988956c1a9b805f" ); e.put( "GELI_BINARY", "46162567eac3a5004f5f9b4c93d1b8d3" ); for ( Map.Entry entry : e.entrySet() ) { @@ -128,6 +127,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { } } + // --------------------------------------------- // + // ALL REMAINING TESTS ARE OUTPUT IN GELI FORMAT // + // --------------------------------------------- // + // -------------------------------------------------------------------------------------------------------------- // // testing heterozygosity @@ -136,12 +139,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "ae0134840e0c9fa295b23ecf4ba3e768" ); - e.put( 1.0 / 1850, "71ddcf22f4c0538d03508b720d01d3fe" ); + e.put( 0.01, "700e6426c4142c823f7ac1dde2aa19ea" ); + e.put( 1.0 / 1850, "e9e00bdb32ce63420988956c1a9b805f" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 --heterozygosity " + entry.getKey(), 1, + "-T UnifiedGenotyper -vf GELI -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 --heterozygosity " + entry.getKey(), 1, Arrays.asList(entry.getValue())); executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec); } @@ -156,12 +159,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOtherBaseCallModel() { HashMap e = new HashMap(); - e.put( "one_state", "ab71d814e897d7f1440af8f02365b4fa" ); - e.put( "three_state", "ce55849c55c58c59773200b2a88db0ee" ); + e.put( "one_state", "d69abadc3bf861d621017c0e41b87b0a" ); + e.put( "three_state", "ebcc76cc4579393f98aecb59bdc56507" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -gm JOINT_ESTIMATE -confidence 30 -bm " + entry.getKey(), 1, + "-T UnifiedGenotyper -vf GELI -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -gm JOINT_ESTIMATE -confidence 30 -bm " + entry.getKey(), 1, Arrays.asList(entry.getValue())); executeTest(String.format("testOtherBaseCallModel[%s]", entry.getKey()), spec); } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java index 3d166c727..32137c33f 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java @@ -195,8 +195,8 @@ public class VCFReaderTest extends BaseTest { if (!header.contains(record.getSampleName())) { Assert.fail("Parsed header doesn't contain field " + record.getSampleName()); } - if (!variant.get(header.indexOf(record.getSampleName())).equals(record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles()))) { - Assert.fail("Parsed value for " + record.getSampleName() + " doesn't contain the same value ( " + record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles()) + if (!variant.get(header.indexOf(record.getSampleName())).equals(record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles(), newRod.mCurrentRecord.getGenotypeFormatString().split(":")))) { + Assert.fail("Parsed value for " + record.getSampleName() + " doesn't contain the same value ( " + record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles(), newRod.mCurrentRecord.getGenotypeFormatString().split(":")) + " != " + variant.get(header.indexOf(record.getSampleName()))); } }