From 7d09c0064b29ae0b502fbc4b76f505e0f5a3c4a3 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sat, 19 Nov 2011 18:40:15 -0500 Subject: [PATCH] Bug fixes and code cleanup throughout -- chromosomeCounts now takes builder as well, cleaning up a lot of code throughout the codebase. --- .../walkers/annotator/ChromosomeCounts.java | 6 +- .../beagle/BeagleOutputToVCFWalker.java | 14 ++-- .../walkers/genotyper/UGCallVariants.java | 6 +- .../gatk/walkers/phasing/PhasingUtils.java | 10 +-- .../varianteval/util/VariantEvalUtils.java | 17 ++--- .../walkers/variantutils/CombineVariants.java | 13 ++-- .../walkers/variantutils/SelectVariants.java | 7 +- .../variantcontext/VariantContextBuilder.java | 26 ++++++- .../variantcontext/VariantContextUtils.java | 74 +++++++++++++++++-- .../VariantAnnotatorIntegrationTest.java | 10 ++- 10 files changed, 125 insertions(+), 58 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 5ed2a6761..0acd3e841 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -59,10 +59,8 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( ! vc.hasGenotypes() ) return null; - - Map map = new HashMap(); - VariantContextUtils.calculateChromosomeCounts(vc, map, true); - return map; + + return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap(), true); } public List getKeyNames() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 64c4948ff..8c6038e7e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -338,23 +338,21 @@ public class BeagleOutputToVCFWalker extends RodWalker { builder.alleles(new HashSet(Arrays.asList(vc_input.getReference()))).filters(removedFilters); } - HashMap attributes = new HashMap(vc_input.getAttributes()); // re-compute chromosome counts - VariantContextUtils.calculateChromosomeCounts(vc_input, attributes, false); + VariantContextUtils.calculateChromosomeCounts(builder, false); // Get Hapmap AC and AF if (vc_comp != null) { - attributes.put("ACH", alleleCountH.toString() ); - attributes.put("ANH", chrCountH.toString() ); - attributes.put("AFH", String.format("%4.2f", (double)alleleCountH/chrCountH) ); + builder.attribute("ACH", alleleCountH.toString() ); + builder.attribute("ANH", chrCountH.toString() ); + builder.attribute("AFH", String.format("%4.2f", (double)alleleCountH/chrCountH) ); } - attributes.put("NumGenotypesChanged", numGenotypesChangedByBeagle ); + builder.attribute("NumGenotypesChanged", numGenotypesChangedByBeagle ); if( !beagleR2Feature.getR2value().equals(Double.NaN) ) { - attributes.put("R2", beagleR2Feature.getR2value().toString() ); + builder.attribute("R2", beagleR2Feature.getR2value().toString() ); } - builder.attributes(attributes); vcfWriter.add(builder.make()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java index 83da319ea..97f7b21eb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java @@ -106,9 +106,9 @@ public class UGCallVariants extends RodWalker { return sum; try { - Map attrs = new HashMap(value.getAttributes()); - VariantContextUtils.calculateChromosomeCounts(value, attrs, true); - writer.add(new VariantContextBuilder(value).attributes(attrs).make()); + VariantContextBuilder builder = new VariantContextBuilder(value); + VariantContextUtils.calculateChromosomeCounts(builder, true); + writer.add(builder.make()); } catch (IllegalArgumentException e) { throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java index 9aa23fdfb..75d0773f1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java @@ -131,13 +131,9 @@ class PhasingUtils { if ( vc2.hasID() ) mergedIDs.add(vc2.getID()); String mergedID = mergedIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs); - VariantContext mergedVc = new VariantContextBuilder(mergedName, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles()).id(mergedID).genotypes(mergedGenotypes).log10PError(mergedLog10PError).filters(mergedFilters).attributes(mergedAttribs).make(); - - mergedAttribs = new HashMap(mergedVc.getAttributes()); - VariantContextUtils.calculateChromosomeCounts(mergedVc, mergedAttribs, true); - mergedVc = new VariantContextBuilder(mergedVc).attributes(mergedAttribs).make(); - - return mergedVc; + VariantContextBuilder mergedBuilder = new VariantContextBuilder(mergedName, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles()).id(mergedID).genotypes(mergedGenotypes).log10PError(mergedLog10PError).filters(mergedFilters).attributes(mergedAttribs); + VariantContextUtils.calculateChromosomeCounts(mergedBuilder, true); + return mergedBuilder.make(); } static String mergeVariantContextNames(String name1, String name2) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 6da693c7a..aa246b58d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -279,22 +279,17 @@ public class VariantEvalUtils { */ public VariantContext getSubsetOfVariantContext(VariantContext vc, Set sampleNames) { VariantContext vcsub = vc.subContextFromSamples(sampleNames, vc.getAlleles()); + VariantContextBuilder builder = new VariantContextBuilder(vcsub); - HashMap newAts = new HashMap(vcsub.getAttributes()); - - int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount(); - int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount(); + final int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount(); + final int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount(); if (originalAlleleCount == newAlleleCount && newAlleleCount == 1) { - newAts.put("ISSINGLETON", true); + builder.attribute("ISSINGLETON", true); } - VariantContextUtils.calculateChromosomeCounts(vcsub, newAts, true); - vcsub = new VariantContextBuilder(vcsub).attributes(newAts).make(); - - //VariantEvalWalker.logger.debug(String.format("VC %s subset to %s AC%n", vc.getSource(), vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY))); - - return vcsub; + VariantContextUtils.calculateChromosomeCounts(builder, true); + return builder.make(); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index d74f5a269..096085330 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -222,7 +222,7 @@ public class CombineVariants extends RodWalker { for ( final VariantContext vc : vcs ) { vcfWriter.add(vc); } - + return vcs.isEmpty() ? 0 : 1; } @@ -245,18 +245,17 @@ public class CombineVariants extends RodWalker { SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); } - for ( VariantContext mergedVC : mergedVCs ) { + for ( VariantContext mergedVC : mergedVCs ) { // only operate at the start of events if ( mergedVC == null ) continue; - HashMap attributes = new HashMap(mergedVC.getAttributes()); + final VariantContextBuilder builder = new VariantContextBuilder(mergedVC); // re-compute chromosome counts - VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false); - VariantContext annotatedMergedVC = new VariantContextBuilder(mergedVC).attributes(attributes).make(); + VariantContextUtils.calculateChromosomeCounts(builder, false); if ( minimalVCF ) - annotatedMergedVC = VariantContextUtils.pruneVariantContext(annotatedMergedVC, Arrays.asList(SET_KEY)); - vcfWriter.add(annotatedMergedVC); + VariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY)); + vcfWriter.add(builder.make()); } return vcs.isEmpty() ? 0 : 1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index babc88966..b0016ff4b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -684,11 +684,10 @@ public class SelectVariants extends RodWalker { builder.attribute("AN_Orig", sub.getAttribute(VCFConstants.ALLELE_NUMBER_KEY)); } - Map attributes = new HashMap(builder.make().getAttributes()); - VariantContextUtils.calculateChromosomeCounts(sub, attributes, false); - attributes.put("DP", depth); + VariantContextUtils.calculateChromosomeCounts(builder, false); + builder.attribute("DP", depth); - return new VariantContextBuilder(builder.make()).attributes(attributes).make(); + return builder.make(); } private void randomlyAddVariant(int rank, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java index cb2ef4678..379a01bb4 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java @@ -158,12 +158,34 @@ public class VariantContextBuilder { @Requires({"key != null"}) @Ensures({"this.attributes.size() == old(this.attributes.size()) || this.attributes.size() == old(this.attributes.size()+1)"}) public VariantContextBuilder attribute(final String key, final Object value) { + makeAttributesModifiable(); + attributes.put(key, value); + return this; + } + + /** + * Removes key if present in the attributes + * + * @param key + * @return + */ + @Requires({"key != null"}) + @Ensures({"this.attributes.size() == old(this.attributes.size()) || this.attributes.size() == old(this.attributes.size()-1)"}) + public VariantContextBuilder rmAttribute(final String key) { + makeAttributesModifiable(); + attributes.remove(key); + return this; + } + + /** + * Makes the attributes field modifiable. In many cases attributes is just a pointer to an immutable + * collection, so methods that want to add / remove records require the attributes to be copied first + */ + private void makeAttributesModifiable() { if ( ! attributesCanBeModified ) { this.attributesCanBeModified = true; this.attributes = new HashMap(attributes); } - attributes.put(key, value); - return this; } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index b6e0d2e4d..07c0f7c32 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -55,13 +55,15 @@ public class VariantContextUtils { } /** - * Update the attributes of the attributes map given the VariantContext to reflect the proper chromosome-based VCF tags + * Update the attributes of the attributes map given the VariantContext to reflect the + * proper chromosome-based VCF tags * * @param vc the VariantContext * @param attributes the attributes map to populate; must not be null; may contain old values * @param removeStaleValues should we remove stale values from the mapping? + * @return the attributes map provided as input, returned for programming convenience */ - public static void calculateChromosomeCounts(VariantContext vc, Map attributes, boolean removeStaleValues) { + public static Map calculateChromosomeCounts(VariantContext vc, Map attributes, boolean removeStaleValues) { // if everyone is a no-call, remove the old attributes if requested if ( vc.getCalledChrCount() == 0 && removeStaleValues ) { if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) ) @@ -70,7 +72,7 @@ public class VariantContextUtils { attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY); if ( attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY) ) attributes.remove(VCFConstants.ALLELE_NUMBER_KEY); - return; + return attributes; } if ( vc.hasGenotypes() ) { @@ -96,6 +98,54 @@ public class VariantContextUtils { attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0); } } + + return attributes; + } + + /** + * Update the attributes of the attributes map in the VariantContextBuilder to reflect the proper + * chromosome-based VCF tags based on the current VC produced by builder.make() + * + * @param builder the VariantContextBuilder we are updating + * @param removeStaleValues should we remove stale values from the mapping? + */ + public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues) { + final VariantContext vc = builder.make(); + + // if everyone is a no-call, remove the old attributes if requested + if ( vc.getCalledChrCount() == 0 && removeStaleValues ) { + if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) + builder.rmAttribute(VCFConstants.ALLELE_COUNT_KEY); + if ( vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) + builder.rmAttribute(VCFConstants.ALLELE_FREQUENCY_KEY); + if ( vc.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) + builder.rmAttribute(VCFConstants.ALLELE_NUMBER_KEY); + return; + } + + if ( vc.hasGenotypes() ) { + builder.attribute(VCFConstants.ALLELE_NUMBER_KEY, vc.getCalledChrCount()); + + // if there are alternate alleles, record the relevant tags + if ( vc.getAlternateAlleles().size() > 0 ) { + ArrayList alleleFreqs = new ArrayList(); + ArrayList alleleCounts = new ArrayList(); + double totalChromosomes = (double)vc.getCalledChrCount(); + for ( Allele allele : vc.getAlternateAlleles() ) { + int altChromosomes = vc.getCalledChrCount(allele); + alleleCounts.add(altChromosomes); + String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalChromosomes), ((double)altChromosomes / totalChromosomes)); + alleleFreqs.add(freq); + } + + builder.attribute(VCFConstants.ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts); + builder.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs); + } + else { + builder.attribute(VCFConstants.ALLELE_COUNT_KEY, 0); + builder.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0); + } + } } private static String makePrecisionFormatStringFromDenominatorValue(double maxValue) { @@ -348,10 +398,6 @@ public class VariantContextUtils { return r; } - public static VariantContext pruneVariantContext(VariantContext vc) { - return pruneVariantContext(vc, null); - } - private final static Map subsetAttributes(final CommonInfo igc, final Collection keysToPreserve) { Map attributes = new HashMap(keysToPreserve.size()); for ( final String key : keysToPreserve ) { @@ -361,7 +407,19 @@ public class VariantContextUtils { return attributes; } + /** + * @deprecated use variant context builder version instead + * @param vc + * @param keysToPreserve + * @return + */ + @Deprecated public static VariantContext pruneVariantContext(final VariantContext vc, Collection keysToPreserve ) { + return pruneVariantContext(new VariantContextBuilder(vc), keysToPreserve).make(); + } + + public static VariantContextBuilder pruneVariantContext(final VariantContextBuilder builder, Collection keysToPreserve ) { + final VariantContext vc = builder.make(); if ( keysToPreserve == null ) keysToPreserve = Collections.emptyList(); // VC info @@ -375,7 +433,7 @@ public class VariantContextUtils { genotypeAttributes, g.isPhased())); } - return new VariantContextBuilder(vc).genotypes(genotypes).attributes(attributes).make(); + return builder.genotypes(genotypes).attributes(attributes); } public enum GenotypeMergeType { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 3bfb81dd0..75c27e429 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("9beb795536e95954f810835c6058f2ad")); + Arrays.asList("fbb656369eaa48153d127bd12db59d8f")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -54,9 +54,11 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testNoAnnotsNotAsking2() { + // this genotype annotations in this file are actually out of order. If you don't parse the genotypes + // they don't get reordered. It's a good test of the genotype ordering system. WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("f2ddfa8105c290b1f34b7a261a02a1ac")); + Arrays.asList("0cc0ec59f0328792e6413b6ff3f71780")); executeTest("test file doesn't have annotations, not asking for annotations, #2", spec); } @@ -64,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("49d989f467b8d6d8f98f7c1b67cd4a05")); + Arrays.asList("42dd979a0a931c18dc9be40308bac321")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -80,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testExcludeAnnotations() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("33062eccd6eb73bc49440365430454c4")); + Arrays.asList("477eac07989593b58bb361f3429c085a")); executeTest("test exclude annotations", spec); }