diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java new file mode 100755 index 000000000..7fedcdc08 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -0,0 +1,52 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; +import org.broadinstitute.sting.utils.genotype.*; +import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.QualityUtils; + +import java.util.List; +import java.util.Map; + + +public class HardyWeinberg implements VariantAnnotation { + + private static final int MIN_SAMPLES = 10; + private static final int MIN_GENOTYPE_QUALITY = 10; + private static final int MIN_NEG_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10; + + public String annotate(ReferenceContext ref, Map stratifiedContexts, Variation variation) { + + if ( !(variation instanceof VariantBackedByGenotype) ) + return null; + final List genotypes = ((VariantBackedByGenotype)variation).getGenotypes(); + if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) + return null; + + int refCount = 0; + int hetCount = 0; + int homCount = 0; + for ( Genotype genotype : genotypes ) { + if ( genotype.isNoCall() ) + continue; + if ( genotype.getNegLog10PError() < MIN_NEG_LOG10_PERROR ) + continue; + + if ( !genotype.isVariant(ref.getBase()) ) + refCount++; + else if ( genotype.isHet() ) + hetCount++; + else + homCount++; + } + + double pvalue = HardyWeinbergCalculation.hwCalculate(refCount, hetCount, homCount); + System.out.println(refCount + " " + hetCount + " " + homCount + " " + pvalue); + return String.format("%.1f", QualityUtils.phredScaleErrorRate(pvalue)); + } + + public String getKeyName() { return "HW"; } + + public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("HW", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Phred-scaled p-value for Hardy-Weinberg violation"); } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index 902b79f72..fe8b9d471 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -49,16 +49,18 @@ public abstract class GenotypeCalculationModel implements Cloneable { * @param samples samples in input bam * @param logger logger * @param UAC unified arg collection + * @param outputFormat output format */ protected void initialize(Set samples, Logger logger, - UnifiedArgumentCollection UAC) { + UnifiedArgumentCollection UAC, + GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat) { this.samples = new TreeSet(samples); this.logger = logger; baseModel = UAC.baseModel; heterozygosity = UAC.heterozygosity; defaultPlatform = UAC.defaultPlatform; - OUTPUT_FORMAT = UAC.VAR_FORMAT; + OUTPUT_FORMAT = outputFormat; ALL_BASE_MODE = UAC.ALL_BASES; GENOTYPE_MODE = UAC.GENOTYPE; POOL_SIZE = UAC.POOLSIZE; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java index 68ac7ddc3..2d0eaec44 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import static org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model.*; +import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import org.apache.log4j.Logger; import java.util.Set; @@ -45,14 +46,16 @@ public class GenotypeCalculationModelFactory { /** * General and correct way to create GenotypeCalculationModel objects for arbitrary models * - * @param UAC The unified argument collection * @param samples samples in bam * @param logger logger + * @param UAC the unified argument collection + * @param outputFormat the output format * @return model */ public static GenotypeCalculationModel makeGenotypeCalculation(Set samples, Logger logger, - UnifiedArgumentCollection UAC) { + UnifiedArgumentCollection UAC, + GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat) { GenotypeCalculationModel gcm; switch ( UAC.genotypeModel ) { case EM_POINT_ESTIMATE: @@ -67,7 +70,7 @@ public class GenotypeCalculationModelFactory { default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel); } - gcm.initialize(samples, logger, UAC); + gcm.initialize(samples, logger, UAC, outputFormat); return gcm; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 0c9dbb7e9..175bce691 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; public class UnifiedArgumentCollection { @@ -45,9 +44,6 @@ public class UnifiedArgumentCollection { public int POOLSIZE = 0; // control the output - @Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false) - public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; - @Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false) public boolean GENOTYPE = false; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index b45430593..c7f7653b5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -57,6 +57,9 @@ public class UnifiedGenotyper extends LocusWalker headerInfo = new HashSet(); // this is only applicable to VCF - if ( UAC.VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.VCF ) + if ( VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.VCF ) return headerInfo; // first, the basic info diff --git a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java index c51fc2498..df6025729 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java @@ -91,6 +91,8 @@ public class BasicGenotype implements Genotype { return true; } + public boolean isNoCall() { return false; } + /** * Returns true if observed allele bases differ (regardless of whether they are ref or alt) * diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java index 977d14df3..d48a8e31f 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java @@ -45,6 +45,13 @@ public interface Genotype { */ public boolean isHet(); + /** + * Returns true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF) + * + * @return true if we're het, false otherwise + */ + public boolean isNoCall(); + /** * get the genotype's location * diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java index cca345c32..1851e73c5 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java @@ -205,6 +205,8 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot return !isHom(); } + public boolean isNoCall() { return false; } + /** * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base * is located right after the specified location diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java index 29f5f1812..043160302 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java @@ -126,6 +126,8 @@ public class GLFGenotypeCall implements GenotypeCall, ReadBacked, LikelihoodsBac return !isHom(); } + public boolean isNoCall() { return false; } + /** * * @return return this genotype as a variant diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java index 4dbff559a..833deee69 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java @@ -129,6 +129,9 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty return !isHom(); } + // You can't make a 'no call' genotype call + public boolean isNoCall() { return false; } + /** * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base * is located right after the specified location diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index 19b00a8db..74187739f 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -176,6 +176,14 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked { return !isHom(); } + public boolean isNoCall() { + for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) { + if ( encoding.getType() != VCFGenotypeEncoding.TYPE.UNCALLED ) + return false; + } + return true; + } + public int getPloidy() { return 2; } @@ -232,18 +240,25 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked { * output a string representation of the VCFGenotypeRecord, given the alternate alleles * * @param altAlleles the alternate alleles, needed for toGenotypeString() + * @param genotypeFormatStrings genotype format strings * * @return a string */ - public String toStringEncoding(List altAlleles) { + public String toStringEncoding(List altAlleles, String[] genotypeFormatStrings) { StringBuilder builder = new StringBuilder(); builder.append(toGenotypeString(altAlleles)); - for (String field : mFields.keySet()) { + for ( String field : genotypeFormatStrings ) { + String value = mFields.get(field); + if ( value == null && field.equals(OLD_DEPTH_KEY) ) + value = mFields.get(DEPTH_KEY); + if ( value == null ) + continue; + builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR); - if (mFields.get(field).equals("")) + if (value.equals("")) builder.append(getMissingFieldValue(field)); else - builder.append(mFields.get(field)); + builder.append(value); } return builder.toString(); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index 5072626f1..b28a59dfe 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -114,7 +114,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { */ public VCFRecord(Map columnValues) { extractFields(columnValues); - mGenotypeFormatString = null; + mGenotypeFormatString = ""; } /** @@ -553,17 +553,18 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { * @param header the header object */ private void addGenotypeData(StringBuilder builder, VCFHeader header) { - builder.append(FIELD_SEPERATOR + this.getGenotypeFormatString()); + builder.append(FIELD_SEPERATOR + mGenotypeFormatString); if (header.getGenotypeSamples().size() < getGenotypes().size()) throw new RuntimeException("We have more genotype samples than the header specified"); Map gMap = genotypeListToMap(getGenotypes()); + String[] genotypeFormatStrings = mGenotypeFormatString.split(":"); for (String genotype : header.getGenotypeSamples()) { builder.append(FIELD_SEPERATOR); if (gMap.containsKey(genotype)) { VCFGenotypeRecord rec = gMap.get(genotype); - builder.append(rec.toStringEncoding(this.mAlts)); + builder.append(rec.toStringEncoding(this.mAlts, genotypeFormatStrings)); gMap.remove(genotype); } else { builder.append(VCFGenotypeRecord.EMPTY_GENOTYPE); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 44057e592..f50629949 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -50,7 +50,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("5c73e85a91bf8f8e9c47c445ec77233c")); + Arrays.asList("fecfec68226bbd9b458ede55d48e0762")); executeTest("test file has annotations, not asking for annotations, #1", spec); } @@ -58,7 +58,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("5298c24e956361d209f14ac6138a3bbd")); + Arrays.asList("59845ada9dc5bc66e0042cfefdf8f16f")); executeTest("test file has annotations, not asking for annotations, #2", spec); } @@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("2d3e220e96eb00c4c7bb2dbc8353d9bd")); + Arrays.asList("1b9815b89dde8a705b3744527fe1200a")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("d0e70ee36ed1a59b5f086bc30c9b2673")); + Arrays.asList("618056046f1ce83afad9effdebe1a74e")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("acef97201f2d17007dbdee6c639536ea")); + Arrays.asList("c70cc6bb6b83748ec8d968dc3bf879c4")); executeTest("test file doesn't have annotations, not asking for annotations, #1", spec); } @@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("aaae89c48fcab615fe4204220ec62859")); + Arrays.asList("e7c8900ff9a18f2c8a033ae741e7143b")); executeTest("test file doesn't have annotations, not asking for annotations, #2", spec); } @@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("2031599f8554d16342f53cb80fae296a")); + Arrays.asList("85ec66095c67865be9777c7c93ceb263")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("2b47c7a6a7f0ce15fd1d1dd02ecab73b")); + Arrays.asList("97da2e0699faaa17ea85e03ce3cd61fa")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index cf26ce48d..0fb643c29 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -22,7 +22,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("b19f85fddd08485c668e7192df79d944")); + Arrays.asList("44942d4e56aec62520903d297cee5fd8")); executeTest("testMultiSamplePilot1 - Point Estimate EM", spec); } @@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("a6c8e77d1741f3f6d958a0634fa59e14")); + Arrays.asList("aabea51b271df324eb543bc3417c0a42")); executeTest("testMultiSamplePilot2 - Point Estimate EM", spec); } @@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testPooled1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1, - Arrays.asList("459b8f6a6265b67718495e9bffe96fa8")); + Arrays.asList("a733878ce9c257148a224aa00c0e63e9")); executeTest("testPooled1", spec); } @@ -56,7 +56,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("c132c93cec5fdf02c3235180a7aa7dcc")); + Arrays.asList("b68ad77195f6e926c4fce5f1a91f3bc9")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -64,7 +64,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("09d48c56eae25fb6418e10feeb5b3fc5")); + Arrays.asList("b63c43ba827cfc74bb8ef679ce3d847d")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -72,7 +72,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("3544b1eb97834e2050239c101eaddb2d")); + Arrays.asList("8ae0be3b65c83d973519abad858c0073")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); } @@ -85,11 +85,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testParameter() { HashMap e = new HashMap(); - e.put( "-genotype", "dc908834d97bde696b6bdccf756c9ab6" ); - e.put( "-all_bases", "484d424cbcd646b3c5d823cad43f4f5f" ); - e.put( "--min_base_quality_score 10", "45e336766e94f05e6ac44d1e50473e18" ); - e.put( "--min_mapping_quality_score 10", "de417c1156483ab87b0f8a25f10d4d87" ); - e.put( "--max_mismatches_in_40bp_window 5", "05132003c73e10fbdab2af14af677764" ); + e.put( "-genotype", "c984952b91193d9550066f9514434183" ); + e.put( "-all_bases", "0062942f2323ea0f2c9d3a3739a28d20" ); + e.put( "--min_base_quality_score 10", "8a645a76d85e8a6edda6acdf8071ee6b" ); + e.put( "--min_mapping_quality_score 10", "cf6c38de9872bdbf5da1a22d8c962ebe" ); + e.put( "--max_mismatches_in_40bp_window 5", "e21a319eb0efbe11c3c1eeea7c9c0144" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -103,7 +103,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1, - Arrays.asList("ea154d6ab6bbaf40da0dac5162b7e9fd")); + Arrays.asList("6823ce46547cf9693cda040dcbd4c9c1")); executeTest("testConfidence", spec); } @@ -117,7 +117,6 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testOtherFormat() { HashMap e = new HashMap(); e.put( "GLF", "8c72131dfb2b830efb9938a582672a3e" ); - e.put( "GELI", "e9e00bdb32ce63420988956c1a9b805f" ); e.put( "GELI_BINARY", "46162567eac3a5004f5f9b4c93d1b8d3" ); for ( Map.Entry entry : e.entrySet() ) { @@ -128,6 +127,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { } } + // --------------------------------------------- // + // ALL REMAINING TESTS ARE OUTPUT IN GELI FORMAT // + // --------------------------------------------- // + // -------------------------------------------------------------------------------------------------------------- // // testing heterozygosity @@ -136,12 +139,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "ae0134840e0c9fa295b23ecf4ba3e768" ); - e.put( 1.0 / 1850, "71ddcf22f4c0538d03508b720d01d3fe" ); + e.put( 0.01, "700e6426c4142c823f7ac1dde2aa19ea" ); + e.put( 1.0 / 1850, "e9e00bdb32ce63420988956c1a9b805f" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 --heterozygosity " + entry.getKey(), 1, + "-T UnifiedGenotyper -vf GELI -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 --heterozygosity " + entry.getKey(), 1, Arrays.asList(entry.getValue())); executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec); } @@ -156,12 +159,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOtherBaseCallModel() { HashMap e = new HashMap(); - e.put( "one_state", "ab71d814e897d7f1440af8f02365b4fa" ); - e.put( "three_state", "ce55849c55c58c59773200b2a88db0ee" ); + e.put( "one_state", "d69abadc3bf861d621017c0e41b87b0a" ); + e.put( "three_state", "ebcc76cc4579393f98aecb59bdc56507" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -gm JOINT_ESTIMATE -confidence 30 -bm " + entry.getKey(), 1, + "-T UnifiedGenotyper -vf GELI -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -gm JOINT_ESTIMATE -confidence 30 -bm " + entry.getKey(), 1, Arrays.asList(entry.getValue())); executeTest(String.format("testOtherBaseCallModel[%s]", entry.getKey()), spec); } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java index 3d166c727..32137c33f 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java @@ -195,8 +195,8 @@ public class VCFReaderTest extends BaseTest { if (!header.contains(record.getSampleName())) { Assert.fail("Parsed header doesn't contain field " + record.getSampleName()); } - if (!variant.get(header.indexOf(record.getSampleName())).equals(record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles()))) { - Assert.fail("Parsed value for " + record.getSampleName() + " doesn't contain the same value ( " + record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles()) + if (!variant.get(header.indexOf(record.getSampleName())).equals(record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles(), newRod.mCurrentRecord.getGenotypeFormatString().split(":")))) { + Assert.fail("Parsed value for " + record.getSampleName() + " doesn't contain the same value ( " + record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles(), newRod.mCurrentRecord.getGenotypeFormatString().split(":")) + " != " + variant.get(header.indexOf(record.getSampleName()))); } }