After discussing with Mark, revert to "Master merging" of phase information from VCFs. This has the advantage of creating minimal phased VCFs from RBP, from which phase info is merged into the original "master VCF". Also, updated Genotype.sameGenotype() to be simpler and NOT REVERSE the ignorePhase flag in comparing Allele lists/sets

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5167 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
fromer 2011-02-01 19:50:15 +00:00
parent dac83d21bc
commit 798955b006
5 changed files with 88 additions and 10 deletions

View File

@ -316,9 +316,79 @@ public class VariantContextUtils {
}
public enum VariantMergeType {
UNION, INTERSECT
UNION, INTERSECT, MASTER
}
/**
* Performs a master merge on the VCs. Here there is a master input [contains all of the information] and many
* VCs containing partial, extra genotype information which should be added to the master. For example,
* we scatter out the phasing algorithm over some samples in the master, producing a minimal VCF with phasing
* information per genotype. The master merge will add the PQ information from each genotype record, where
* appropriate, to the master VC.
*
* @param unsortedVCs
* @param masterName
* @return
*/
public static VariantContext masterMerge(Collection<VariantContext> unsortedVCs, String masterName) {
VariantContext master = findMaster(unsortedVCs, masterName);
Map<String, Genotype> genotypes = master.getGenotypes();
for (Genotype g : genotypes.values()) {
genotypes.put(g.getSampleName(), new MutableGenotype(g));
}
Map<String, Object> masterAttributes = new HashMap<String, Object>(master.getAttributes());
for (VariantContext vc : unsortedVCs) {
if (!vc.getSource().equals(masterName)) {
for (Genotype g : vc.getGenotypes().values()) {
MutableGenotype masterG = (MutableGenotype) genotypes.get(g.getSampleName());
for (Map.Entry<String, Object> attr : g.getAttributes().entrySet()) {
if (!masterG.hasAttribute(attr.getKey())) {
//System.out.printf("Adding GT attribute %s to masterG %s, new %s%n", attr, masterG, g);
masterG.putAttribute(attr.getKey(), attr.getValue());
}
}
if (masterG.isPhased() != g.isPhased()) {
if (masterG.sameGenotype(g)) {
// System.out.printf("Updating phasing %s to masterG %s, new %s%n", g.isPhased(), masterG, g);
masterG.setAlleles(g.getAlleles());
masterG.setPhase(g.isPhased());
}
//else System.out.println("WARNING: Not updating phase, since genotypes differ between master file and auxiliary info file!");
}
// if ( MathUtils.compareDoubles(masterG.getNegLog10PError(), g.getNegLog10PError()) != 0 ) {
// System.out.printf("Updating GQ %s to masterG %s, new %s%n", g.getNegLog10PError(), masterG, g);
// masterG.setNegLog10PError(g.getNegLog10PError());
// }
}
for (Map.Entry<String, Object> attr : vc.getAttributes().entrySet()) {
if (!masterAttributes.containsKey(attr.getKey())) {
//System.out.printf("Adding VC attribute %s to master %s, new %s%n", attr, master, vc);
masterAttributes.put(attr.getKey(), attr.getValue());
}
}
}
}
return new VariantContext(master.getSource(), master.getChr(), master.getStart(), master.getEnd(), master.getAlleles(), genotypes, master.getNegLog10PError(), master.getFilters(), masterAttributes);
}
private static final VariantContext findMaster(Collection<VariantContext> unsortedVCs, String masterName) {
for (VariantContext vc : unsortedVCs) {
if (vc.getSource().equals(masterName)) {
return vc;
}
}
throw new ReviewedStingException(String.format("Couldn't find master VCF %s at %s", masterName, unsortedVCs.iterator().next()));
}
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, byte refBase) {
return simpleMerge(genomeLocParser, unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase);
}

View File

@ -172,7 +172,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
for (Allele dadAllele : dadA) {
if (momAllele.isCalled() && dadAllele.isCalled()) {
Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele));
if (childG.sameGenotype(possibleChild, false)) {
if (childG.sameGenotype(possibleChild)) {
return false;
}
}

View File

@ -120,24 +120,29 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (tracker == null) // RodWalkers can make funky map calls
if ( tracker == null ) // RodWalkers can make funky map calls
return 0;
// get all of the vcf rods at this locus
// Need to provide reference bases to simpleMerge starting at current locus
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
VariantContext mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcs, priority, variantMergeOption,
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled);
VariantContext mergedVC = null;
if ( variantMergeOption == VariantContextUtils.VariantMergeType.MASTER ) {
mergedVC = VariantContextUtils.masterMerge(vcs, "master");
} else {
mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption,
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled);
}
//out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC);
if (mergedVC != null) { // only operate at the start of events
if ( mergedVC != null ) { // only operate at the start of events
HashMap<String, Object> attributes = new HashMap<String, Object>(mergedVC.getAttributes());
// re-compute chromosome counts
VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false);
VariantContext annotatedMergedVC = VariantContext.modifyAttributes(mergedVC, attributes);
if (minimalVCF)
if ( minimalVCF )
annotatedMergedVC = VariantContextUtils.pruneVariantContext(annotatedMergedVC, new HashSet(Arrays.asList(SET_KEY)));
vcfWriter.add(annotatedMergedVC, ref.getBase());
}

View File

@ -168,7 +168,7 @@ public class ComparePhasingToTrioPhasingNoRecombinationWalker extends RodWalker<
if (curTrioVc.getNSamples() > NUM_IN_TRIO || sampleCurGtInTrio == null)
throw new UserException("Must provide trio data for sample: " + phasingSample);
if (!new TreeSet<Allele>(curPhasingGt.getAlleles()).equals(new TreeSet<Allele>(sampleCurGtInTrio.getAlleles()))) {
if (!curPhasingGt.sameGenotype(sampleCurGtInTrio)) {
logger.warn("Locus " + curLoc + " breaks phase, since " + PHASING_ROD_NAME + " and " + TRIO_ROD_NAME + " tracks have different genotypes for " + phasingSample + "!");
prevLoc = null;
return result;
@ -323,7 +323,8 @@ public class ComparePhasingToTrioPhasingNoRecombinationWalker extends RodWalker<
stats.bothCanPhase++;
useTrioPhase = false;
if (!phasedCurAlleles.equals(curPhasingGt.getAlleles())) {
boolean ignorePhase = false;
if (!phasedGt.sameGenotype(curPhasingGt, ignorePhase)) {
String contradictMessage = "Phase from " + PHASING_ROD_NAME + " track at " + curLoc + " contradicts the trio-based phasing.";
stats.contradictoryPhaseSites++;
addToOutput += "\tcontradictory";

View File

@ -136,7 +136,9 @@ class CombineVariants(vcfsToCombine: List[File]) extends org.broadinstitute.stin
this.rodBind :+= RodBind(vcf.getName, "VCF", vcf)
}
this.variantMergeOptions = Some(org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.UNION)
// add the master call:
this.rodBind :+= RodBind("master", "VCF", masterCalls)
this.variantMergeOptions = Some(org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.MASTER)
this.out = outputPhased
}