diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index f901a8f4e..bc7721fe8 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -316,9 +316,71 @@ public class VariantContextUtils { } public enum VariantMergeType { - UNION, INTERSECT + UNION, INTERSECT, MASTER } + /** + * Performs a master merge on the VCs. Here there is a master input [contains all of the information] and many + * VCs containing partial, extra genotype information which should be added to the master. For example, + * we scatter out the phasing algorithm over some samples in the master, producing a minimal VCF with phasing + * information per genotype. The master merge will add the PQ information from each genotype record, where + * appropriate, to the master VC. + * + * @param genomeLocParser + * @param unsortedVCs + * @param masterName + * @param refBase + * @return + */ + public static VariantContext masterMerge(Collection unsortedVCs, String masterName) { + VariantContext master = findMaster(unsortedVCs, masterName); + Map genotypes = master.getGenotypes(); + for ( Genotype g : genotypes.values() ) { + genotypes.put(g.getSampleName(), new MutableGenotype(g)); + } + + for ( VariantContext vc : unsortedVCs ) { + if ( ! vc.getSource() .equals(masterName) ) { + for ( Genotype g : vc.getGenotypes().values() ) { + MutableGenotype masterG = (MutableGenotype)genotypes.get(g.getSampleName()); + for ( Map.Entry attr : g.getAttributes().entrySet() ) { + if ( ! masterG.hasAttribute(attr.getKey()) ) { + //System.out.printf("Adding attribute %s to masterG %s, new %s%n", attr, masterG, g); + masterG.putAttribute(attr.getKey(), attr.getValue()); + } + } + + if ( masterG.isPhased() != g.isPhased() ) { + // System.out.printf("Updating phasing %s to masterG %s, new %s%n", g.isPhased(), masterG, g); + masterG.setPhase(g.isPhased()); + } + +// if ( MathUtils.compareDoubles(masterG.getNegLog10PError(), g.getNegLog10PError()) != 0 ) { +// System.out.printf("Updating GQ %s to masterG %s, new %s%n", g.getNegLog10PError(), masterG, g); +// masterG.setNegLog10PError(g.getNegLog10PError()); +// } + + // TODO -- WARNING -- THIS CODE DOES NOT ACTUALLY LOOK AT ALL OF THE GENOTYPE ATTRIBUTES, ONLY THOSE + // TODO -- WARNING -- IMMEDIATELY USEFUL TO PHASING. A COMPLETE IMPLEMENTATION WILL NEED TO HANDLE + // TODO -- WARNING -- ALL OF THE FIELDS LIKE ALLELES, ETC + } + } + } + + return new VariantContext(master.getSource(), master.getChr(), master.getStart(), master.getEnd(), master.getAlleles(), genotypes, master.getNegLog10PError(), master.getFilters(), master.getAttributes()); + } + + private static final VariantContext findMaster(Collection unsortedVCs, String masterName) { + for ( VariantContext vc : unsortedVCs ) { + if ( vc.getSource() .equals(masterName) ) { + return vc; + } + } + + throw new ReviewedStingException(String.format("Couldn't find master VCF %s at %s", masterName, unsortedVCs.iterator().next())); + } + + public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, byte refBase) { return simpleMerge(genomeLocParser, unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase); } @@ -952,7 +1014,7 @@ public class VariantContextUtils { HOWEVER, EVEN if this is not the case, but gt1.isHom(), it is trivially known that each of gt2's alleles segregate with the single allele type present in gt1. */ - return (gt2.genotypesArePhased() || gt2.isHom() || gt1.isHom()); + return (gt2.isPhased() || gt2.isHom() || gt1.isHom()); } private static class PhaseAndQuality { @@ -960,7 +1022,7 @@ public class VariantContextUtils { public Double PQ = null; public PhaseAndQuality(Genotype gt) { - this.isPhased = gt.genotypesArePhased(); + this.isPhased = gt.isPhased(); if (this.isPhased) this.PQ = gt.getAttributeAsDoubleNoException(ReadBackedPhasingWalker.PQ_KEY); } @@ -969,7 +1031,7 @@ public class VariantContextUtils { // Assumes that alleleSegregationIsKnown(gt1, gt2): private static PhaseAndQuality calcPhaseForMergedGenotypes(Genotype gt1, Genotype gt2) { - if (gt2.genotypesArePhased() || gt2.isHom()) + if (gt2.isPhased() || gt2.isHom()) return new PhaseAndQuality(gt1); // maintain the phase of gt1 if (!gt1.isHom()) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 4e57912e3..537be0b44 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -260,7 +260,7 @@ public class VariantAnnotatorEngine { if ( result != null ) genotypeAnnotations.putAll(result); } - genotypes.put(g.getKey(), new Genotype(g.getKey(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.genotypesArePhased())); + genotypes.put(g.getKey(), new Genotype(g.getKey(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.isPhased())); } return genotypes; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 0d8b99d48..b97a92b8a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -201,7 +201,7 @@ public class VariantFiltrationWalker extends RodWalker { if ( VariantContextUtils.match(vc, g, exp) ) filters.add(exp.name); } - genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.genotypesArePhased())); + genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.isPhased())); } else { genotypes.put(genotype.getKey(), g); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypePhasingEvaluator.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypePhasingEvaluator.java index 65ce08c51..4e30ba6d3 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypePhasingEvaluator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypePhasingEvaluator.java @@ -199,7 +199,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator { } public boolean genotypesArePhasedAboveThreshold(Genotype gt) { - if (!gt.genotypesArePhased()) + if (!gt.isPhased()) return false; Double pq = getPQ(gt); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index efff0c750..1a21a92e0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -126,9 +126,14 @@ public class CombineVariants extends RodWalker { // get all of the vcf rods at this locus // Need to provide reference bases to simpleMerge starting at current locus Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation()); - VariantContext mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption, - genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled); + VariantContext mergedVC = null; + if ( variantMergeOption == VariantContextUtils.VariantMergeType.MASTER ) { + mergedVC = VariantContextUtils.masterMerge(vcs, "master"); + } else { + mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption, + genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled); + } //out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/FixRefBases.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/FixRefBases.java index 1fddcfc0e..f0c4bbf10 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/FixRefBases.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/FixRefBases.java @@ -153,7 +153,7 @@ public class FixRefBases extends RodWalker { } return new Genotype(g.getSampleName(), newAlleles,g.hasNegLog10PError() ? g.getNegLog10PError() : VCFConstants.MISSING_QUALITY_v3_DOUBLE, - g.getAttributes().keySet(), g.getAttributes(),g.genotypesArePhased()); + g.getAttributes().keySet(), g.getAttributes(),g.isPhased()); } private Map flipGenotypes(Map old, Set newAlleles) { @@ -192,7 +192,7 @@ public class FixRefBases extends RodWalker { return new Genotype(old.getSampleName(), newGTAlleles,old.hasNegLog10PError() ? old.getNegLog10PError() : VCFConstants.MISSING_QUALITY_v3_DOUBLE, - old.getAttributes().keySet(), old.getAttributes(),old.genotypesArePhased()); + old.getAttributes().keySet(), old.getAttributes(),old.isPhased()); } }