From 66cca7de0fb52015fbc82a545664b077caca7d4a Mon Sep 17 00:00:00 2001 From: depristo Date: Wed, 22 Dec 2010 17:42:05 +0000 Subject: [PATCH] renamed genotypesArePhased to isPhased, as the previous name was incorrect for several reasons. Added setPhase() to MutableGenotype. Other classes changed to reflect renaming to isPhased(). CombineVariants now supports an experimental MASTER mode where it consumes -B:master,vcf and -B:xi,vcf for any number i and updates the master with phasing information in xi. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4896 348d0f76-0448-11de-a6fe-93d51630548a --- .../variantcontext/VariantContextUtils.java | 70 +++++++++++++++++-- .../annotator/VariantAnnotatorEngine.java | 2 +- .../filters/VariantFiltrationWalker.java | 2 +- .../varianteval/GenotypePhasingEvaluator.java | 2 +- .../walkers/variantutils/CombineVariants.java | 9 ++- .../walkers/vcftools/FixRefBases.java | 4 +- 6 files changed, 78 insertions(+), 11 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index f901a8f4e..bc7721fe8 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -316,9 +316,71 @@ public class VariantContextUtils { } public enum VariantMergeType { - UNION, INTERSECT + UNION, INTERSECT, MASTER } + /** + * Performs a master merge on the VCs. Here there is a master input [contains all of the information] and many + * VCs containing partial, extra genotype information which should be added to the master. For example, + * we scatter out the phasing algorithm over some samples in the master, producing a minimal VCF with phasing + * information per genotype. The master merge will add the PQ information from each genotype record, where + * appropriate, to the master VC. + * + * @param genomeLocParser + * @param unsortedVCs + * @param masterName + * @param refBase + * @return + */ + public static VariantContext masterMerge(Collection unsortedVCs, String masterName) { + VariantContext master = findMaster(unsortedVCs, masterName); + Map genotypes = master.getGenotypes(); + for ( Genotype g : genotypes.values() ) { + genotypes.put(g.getSampleName(), new MutableGenotype(g)); + } + + for ( VariantContext vc : unsortedVCs ) { + if ( ! vc.getSource() .equals(masterName) ) { + for ( Genotype g : vc.getGenotypes().values() ) { + MutableGenotype masterG = (MutableGenotype)genotypes.get(g.getSampleName()); + for ( Map.Entry attr : g.getAttributes().entrySet() ) { + if ( ! masterG.hasAttribute(attr.getKey()) ) { + //System.out.printf("Adding attribute %s to masterG %s, new %s%n", attr, masterG, g); + masterG.putAttribute(attr.getKey(), attr.getValue()); + } + } + + if ( masterG.isPhased() != g.isPhased() ) { + // System.out.printf("Updating phasing %s to masterG %s, new %s%n", g.isPhased(), masterG, g); + masterG.setPhase(g.isPhased()); + } + +// if ( MathUtils.compareDoubles(masterG.getNegLog10PError(), g.getNegLog10PError()) != 0 ) { +// System.out.printf("Updating GQ %s to masterG %s, new %s%n", g.getNegLog10PError(), masterG, g); +// masterG.setNegLog10PError(g.getNegLog10PError()); +// } + + // TODO -- WARNING -- THIS CODE DOES NOT ACTUALLY LOOK AT ALL OF THE GENOTYPE ATTRIBUTES, ONLY THOSE + // TODO -- WARNING -- IMMEDIATELY USEFUL TO PHASING. A COMPLETE IMPLEMENTATION WILL NEED TO HANDLE + // TODO -- WARNING -- ALL OF THE FIELDS LIKE ALLELES, ETC + } + } + } + + return new VariantContext(master.getSource(), master.getChr(), master.getStart(), master.getEnd(), master.getAlleles(), genotypes, master.getNegLog10PError(), master.getFilters(), master.getAttributes()); + } + + private static final VariantContext findMaster(Collection unsortedVCs, String masterName) { + for ( VariantContext vc : unsortedVCs ) { + if ( vc.getSource() .equals(masterName) ) { + return vc; + } + } + + throw new ReviewedStingException(String.format("Couldn't find master VCF %s at %s", masterName, unsortedVCs.iterator().next())); + } + + public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, byte refBase) { return simpleMerge(genomeLocParser, unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase); } @@ -952,7 +1014,7 @@ public class VariantContextUtils { HOWEVER, EVEN if this is not the case, but gt1.isHom(), it is trivially known that each of gt2's alleles segregate with the single allele type present in gt1. */ - return (gt2.genotypesArePhased() || gt2.isHom() || gt1.isHom()); + return (gt2.isPhased() || gt2.isHom() || gt1.isHom()); } private static class PhaseAndQuality { @@ -960,7 +1022,7 @@ public class VariantContextUtils { public Double PQ = null; public PhaseAndQuality(Genotype gt) { - this.isPhased = gt.genotypesArePhased(); + this.isPhased = gt.isPhased(); if (this.isPhased) this.PQ = gt.getAttributeAsDoubleNoException(ReadBackedPhasingWalker.PQ_KEY); } @@ -969,7 +1031,7 @@ public class VariantContextUtils { // Assumes that alleleSegregationIsKnown(gt1, gt2): private static PhaseAndQuality calcPhaseForMergedGenotypes(Genotype gt1, Genotype gt2) { - if (gt2.genotypesArePhased() || gt2.isHom()) + if (gt2.isPhased() || gt2.isHom()) return new PhaseAndQuality(gt1); // maintain the phase of gt1 if (!gt1.isHom()) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 4e57912e3..537be0b44 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -260,7 +260,7 @@ public class VariantAnnotatorEngine { if ( result != null ) genotypeAnnotations.putAll(result); } - genotypes.put(g.getKey(), new Genotype(g.getKey(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.genotypesArePhased())); + genotypes.put(g.getKey(), new Genotype(g.getKey(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.isPhased())); } return genotypes; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 0d8b99d48..b97a92b8a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -201,7 +201,7 @@ public class VariantFiltrationWalker extends RodWalker { if ( VariantContextUtils.match(vc, g, exp) ) filters.add(exp.name); } - genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.genotypesArePhased())); + genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.isPhased())); } else { genotypes.put(genotype.getKey(), g); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypePhasingEvaluator.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypePhasingEvaluator.java index 65ce08c51..4e30ba6d3 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypePhasingEvaluator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypePhasingEvaluator.java @@ -199,7 +199,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator { } public boolean genotypesArePhasedAboveThreshold(Genotype gt) { - if (!gt.genotypesArePhased()) + if (!gt.isPhased()) return false; Double pq = getPQ(gt); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index efff0c750..1a21a92e0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -126,9 +126,14 @@ public class CombineVariants extends RodWalker { // get all of the vcf rods at this locus // Need to provide reference bases to simpleMerge starting at current locus Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation()); - VariantContext mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption, - genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled); + VariantContext mergedVC = null; + if ( variantMergeOption == VariantContextUtils.VariantMergeType.MASTER ) { + mergedVC = VariantContextUtils.masterMerge(vcs, "master"); + } else { + mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption, + genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled); + } //out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/FixRefBases.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/FixRefBases.java index 1fddcfc0e..f0c4bbf10 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/FixRefBases.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/FixRefBases.java @@ -153,7 +153,7 @@ public class FixRefBases extends RodWalker { } return new Genotype(g.getSampleName(), newAlleles,g.hasNegLog10PError() ? g.getNegLog10PError() : VCFConstants.MISSING_QUALITY_v3_DOUBLE, - g.getAttributes().keySet(), g.getAttributes(),g.genotypesArePhased()); + g.getAttributes().keySet(), g.getAttributes(),g.isPhased()); } private Map flipGenotypes(Map old, Set newAlleles) { @@ -192,7 +192,7 @@ public class FixRefBases extends RodWalker { return new Genotype(old.getSampleName(), newGTAlleles,old.hasNegLog10PError() ? old.getNegLog10PError() : VCFConstants.MISSING_QUALITY_v3_DOUBLE, - old.getAttributes().keySet(), old.getAttributes(),old.genotypesArePhased()); + old.getAttributes().keySet(), old.getAttributes(),old.isPhased()); } }