diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index 4e70428f7..24e2a2cef 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -316,9 +316,79 @@ public class VariantContextUtils { } public enum VariantMergeType { - UNION, INTERSECT + UNION, INTERSECT, MASTER } + /** + * Performs a master merge on the VCs. Here there is a master input [contains all of the information] and many + * VCs containing partial, extra genotype information which should be added to the master. For example, + * we scatter out the phasing algorithm over some samples in the master, producing a minimal VCF with phasing + * information per genotype. The master merge will add the PQ information from each genotype record, where + * appropriate, to the master VC. + * + * @param unsortedVCs + * @param masterName + * @return + */ + public static VariantContext masterMerge(Collection unsortedVCs, String masterName) { + VariantContext master = findMaster(unsortedVCs, masterName); + Map genotypes = master.getGenotypes(); + for (Genotype g : genotypes.values()) { + genotypes.put(g.getSampleName(), new MutableGenotype(g)); + } + + Map masterAttributes = new HashMap(master.getAttributes()); + + for (VariantContext vc : unsortedVCs) { + if (!vc.getSource().equals(masterName)) { + for (Genotype g : vc.getGenotypes().values()) { + MutableGenotype masterG = (MutableGenotype) genotypes.get(g.getSampleName()); + for (Map.Entry attr : g.getAttributes().entrySet()) { + if (!masterG.hasAttribute(attr.getKey())) { + //System.out.printf("Adding GT attribute %s to masterG %s, new %s%n", attr, masterG, g); + masterG.putAttribute(attr.getKey(), attr.getValue()); + } + } + + if (masterG.isPhased() != g.isPhased()) { + if (masterG.sameGenotype(g)) { + // System.out.printf("Updating phasing %s to masterG %s, new %s%n", g.isPhased(), masterG, g); + masterG.setAlleles(g.getAlleles()); + masterG.setPhase(g.isPhased()); + } + //else System.out.println("WARNING: Not updating phase, since genotypes differ between master file and auxiliary info file!"); + } + +// if ( MathUtils.compareDoubles(masterG.getNegLog10PError(), g.getNegLog10PError()) != 0 ) { +// System.out.printf("Updating GQ %s to masterG %s, new %s%n", g.getNegLog10PError(), masterG, g); +// masterG.setNegLog10PError(g.getNegLog10PError()); +// } + + } + + for (Map.Entry attr : vc.getAttributes().entrySet()) { + if (!masterAttributes.containsKey(attr.getKey())) { + //System.out.printf("Adding VC attribute %s to master %s, new %s%n", attr, master, vc); + masterAttributes.put(attr.getKey(), attr.getValue()); + } + } + } + } + + return new VariantContext(master.getSource(), master.getChr(), master.getStart(), master.getEnd(), master.getAlleles(), genotypes, master.getNegLog10PError(), master.getFilters(), masterAttributes); + } + + private static final VariantContext findMaster(Collection unsortedVCs, String masterName) { + for (VariantContext vc : unsortedVCs) { + if (vc.getSource().equals(masterName)) { + return vc; + } + } + + throw new ReviewedStingException(String.format("Couldn't find master VCF %s at %s", masterName, unsortedVCs.iterator().next())); + } + + public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, byte refBase) { return simpleMerge(genomeLocParser, unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java index 1eee21fe5..bf162d88d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java @@ -172,7 +172,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator { for (Allele dadAllele : dadA) { if (momAllele.isCalled() && dadAllele.isCalled()) { Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele)); - if (childG.sameGenotype(possibleChild, false)) { + if (childG.sameGenotype(possibleChild)) { return false; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index b306b915e..1a21a92e0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -120,24 +120,29 @@ public class CombineVariants extends RodWalker { } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (tracker == null) // RodWalkers can make funky map calls + if ( tracker == null ) // RodWalkers can make funky map calls return 0; // get all of the vcf rods at this locus // Need to provide reference bases to simpleMerge starting at current locus Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation()); - VariantContext mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcs, priority, variantMergeOption, - genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled); + VariantContext mergedVC = null; + if ( variantMergeOption == VariantContextUtils.VariantMergeType.MASTER ) { + mergedVC = VariantContextUtils.masterMerge(vcs, "master"); + } else { + mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption, + genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled); + } //out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC); - if (mergedVC != null) { // only operate at the start of events + if ( mergedVC != null ) { // only operate at the start of events HashMap attributes = new HashMap(mergedVC.getAttributes()); // re-compute chromosome counts VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false); VariantContext annotatedMergedVC = VariantContext.modifyAttributes(mergedVC, attributes); - if (minimalVCF) + if ( minimalVCF ) annotatedMergedVC = VariantContextUtils.pruneVariantContext(annotatedMergedVC, new HashSet(Arrays.asList(SET_KEY))); vcfWriter.add(annotatedMergedVC, ref.getBase()); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/ComparePhasingToTrioPhasingNoRecombinationWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/ComparePhasingToTrioPhasingNoRecombinationWalker.java index fc1cac8e2..f10062185 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/ComparePhasingToTrioPhasingNoRecombinationWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/ComparePhasingToTrioPhasingNoRecombinationWalker.java @@ -168,7 +168,7 @@ public class ComparePhasingToTrioPhasingNoRecombinationWalker extends RodWalker< if (curTrioVc.getNSamples() > NUM_IN_TRIO || sampleCurGtInTrio == null) throw new UserException("Must provide trio data for sample: " + phasingSample); - if (!new TreeSet(curPhasingGt.getAlleles()).equals(new TreeSet(sampleCurGtInTrio.getAlleles()))) { + if (!curPhasingGt.sameGenotype(sampleCurGtInTrio)) { logger.warn("Locus " + curLoc + " breaks phase, since " + PHASING_ROD_NAME + " and " + TRIO_ROD_NAME + " tracks have different genotypes for " + phasingSample + "!"); prevLoc = null; return result; @@ -323,7 +323,8 @@ public class ComparePhasingToTrioPhasingNoRecombinationWalker extends RodWalker< stats.bothCanPhase++; useTrioPhase = false; - if (!phasedCurAlleles.equals(curPhasingGt.getAlleles())) { + boolean ignorePhase = false; + if (!phasedGt.sameGenotype(curPhasingGt, ignorePhase)) { String contradictMessage = "Phase from " + PHASING_ROD_NAME + " track at " + curLoc + " contradicts the trio-based phasing."; stats.contradictoryPhaseSites++; addToOutput += "\tcontradictory"; diff --git a/scala/qscript/oneoffs/fromer/phaseSamples.scala b/scala/qscript/oneoffs/fromer/PhaseSamples.scala similarity index 97% rename from scala/qscript/oneoffs/fromer/phaseSamples.scala rename to scala/qscript/oneoffs/fromer/PhaseSamples.scala index 4bad047d9..b06673819 100644 --- a/scala/qscript/oneoffs/fromer/phaseSamples.scala +++ b/scala/qscript/oneoffs/fromer/PhaseSamples.scala @@ -136,7 +136,9 @@ class CombineVariants(vcfsToCombine: List[File]) extends org.broadinstitute.stin this.rodBind :+= RodBind(vcf.getName, "VCF", vcf) } - this.variantMergeOptions = Some(org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.UNION) + // add the master call: + this.rodBind :+= RodBind("master", "VCF", masterCalls) + this.variantMergeOptions = Some(org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.MASTER) this.out = outputPhased }