After discussing with Mark, revert to "Master merging" of phase information from VCFs. This has the advantage of creating minimal phased VCFs from RBP, from which phase info is merged into the original "master VCF". Also, updated Genotype.sameGenotype() to be simpler and NOT REVERSE the ignorePhase flag in comparing Allele lists/sets
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5167 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
dac83d21bc
commit
798955b006
|
|
@ -316,9 +316,79 @@ public class VariantContextUtils {
|
|||
}
|
||||
|
||||
public enum VariantMergeType {
|
||||
UNION, INTERSECT
|
||||
UNION, INTERSECT, MASTER
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a master merge on the VCs. Here there is a master input [contains all of the information] and many
|
||||
* VCs containing partial, extra genotype information which should be added to the master. For example,
|
||||
* we scatter out the phasing algorithm over some samples in the master, producing a minimal VCF with phasing
|
||||
* information per genotype. The master merge will add the PQ information from each genotype record, where
|
||||
* appropriate, to the master VC.
|
||||
*
|
||||
* @param unsortedVCs
|
||||
* @param masterName
|
||||
* @return
|
||||
*/
|
||||
public static VariantContext masterMerge(Collection<VariantContext> unsortedVCs, String masterName) {
|
||||
VariantContext master = findMaster(unsortedVCs, masterName);
|
||||
Map<String, Genotype> genotypes = master.getGenotypes();
|
||||
for (Genotype g : genotypes.values()) {
|
||||
genotypes.put(g.getSampleName(), new MutableGenotype(g));
|
||||
}
|
||||
|
||||
Map<String, Object> masterAttributes = new HashMap<String, Object>(master.getAttributes());
|
||||
|
||||
for (VariantContext vc : unsortedVCs) {
|
||||
if (!vc.getSource().equals(masterName)) {
|
||||
for (Genotype g : vc.getGenotypes().values()) {
|
||||
MutableGenotype masterG = (MutableGenotype) genotypes.get(g.getSampleName());
|
||||
for (Map.Entry<String, Object> attr : g.getAttributes().entrySet()) {
|
||||
if (!masterG.hasAttribute(attr.getKey())) {
|
||||
//System.out.printf("Adding GT attribute %s to masterG %s, new %s%n", attr, masterG, g);
|
||||
masterG.putAttribute(attr.getKey(), attr.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
if (masterG.isPhased() != g.isPhased()) {
|
||||
if (masterG.sameGenotype(g)) {
|
||||
// System.out.printf("Updating phasing %s to masterG %s, new %s%n", g.isPhased(), masterG, g);
|
||||
masterG.setAlleles(g.getAlleles());
|
||||
masterG.setPhase(g.isPhased());
|
||||
}
|
||||
//else System.out.println("WARNING: Not updating phase, since genotypes differ between master file and auxiliary info file!");
|
||||
}
|
||||
|
||||
// if ( MathUtils.compareDoubles(masterG.getNegLog10PError(), g.getNegLog10PError()) != 0 ) {
|
||||
// System.out.printf("Updating GQ %s to masterG %s, new %s%n", g.getNegLog10PError(), masterG, g);
|
||||
// masterG.setNegLog10PError(g.getNegLog10PError());
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
for (Map.Entry<String, Object> attr : vc.getAttributes().entrySet()) {
|
||||
if (!masterAttributes.containsKey(attr.getKey())) {
|
||||
//System.out.printf("Adding VC attribute %s to master %s, new %s%n", attr, master, vc);
|
||||
masterAttributes.put(attr.getKey(), attr.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new VariantContext(master.getSource(), master.getChr(), master.getStart(), master.getEnd(), master.getAlleles(), genotypes, master.getNegLog10PError(), master.getFilters(), masterAttributes);
|
||||
}
|
||||
|
||||
private static final VariantContext findMaster(Collection<VariantContext> unsortedVCs, String masterName) {
|
||||
for (VariantContext vc : unsortedVCs) {
|
||||
if (vc.getSource().equals(masterName)) {
|
||||
return vc;
|
||||
}
|
||||
}
|
||||
|
||||
throw new ReviewedStingException(String.format("Couldn't find master VCF %s at %s", masterName, unsortedVCs.iterator().next()));
|
||||
}
|
||||
|
||||
|
||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, byte refBase) {
|
||||
return simpleMerge(genomeLocParser, unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -172,7 +172,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
for (Allele dadAllele : dadA) {
|
||||
if (momAllele.isCalled() && dadAllele.isCalled()) {
|
||||
Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele));
|
||||
if (childG.sameGenotype(possibleChild, false)) {
|
||||
if (childG.sameGenotype(possibleChild)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -120,24 +120,29 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (tracker == null) // RodWalkers can make funky map calls
|
||||
if ( tracker == null ) // RodWalkers can make funky map calls
|
||||
return 0;
|
||||
|
||||
// get all of the vcf rods at this locus
|
||||
// Need to provide reference bases to simpleMerge starting at current locus
|
||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
||||
|
||||
VariantContext mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcs, priority, variantMergeOption,
|
||||
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled);
|
||||
VariantContext mergedVC = null;
|
||||
if ( variantMergeOption == VariantContextUtils.VariantMergeType.MASTER ) {
|
||||
mergedVC = VariantContextUtils.masterMerge(vcs, "master");
|
||||
} else {
|
||||
mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption,
|
||||
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled);
|
||||
}
|
||||
|
||||
//out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC);
|
||||
|
||||
if (mergedVC != null) { // only operate at the start of events
|
||||
if ( mergedVC != null ) { // only operate at the start of events
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>(mergedVC.getAttributes());
|
||||
// re-compute chromosome counts
|
||||
VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false);
|
||||
VariantContext annotatedMergedVC = VariantContext.modifyAttributes(mergedVC, attributes);
|
||||
if (minimalVCF)
|
||||
if ( minimalVCF )
|
||||
annotatedMergedVC = VariantContextUtils.pruneVariantContext(annotatedMergedVC, new HashSet(Arrays.asList(SET_KEY)));
|
||||
vcfWriter.add(annotatedMergedVC, ref.getBase());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ public class ComparePhasingToTrioPhasingNoRecombinationWalker extends RodWalker<
|
|||
if (curTrioVc.getNSamples() > NUM_IN_TRIO || sampleCurGtInTrio == null)
|
||||
throw new UserException("Must provide trio data for sample: " + phasingSample);
|
||||
|
||||
if (!new TreeSet<Allele>(curPhasingGt.getAlleles()).equals(new TreeSet<Allele>(sampleCurGtInTrio.getAlleles()))) {
|
||||
if (!curPhasingGt.sameGenotype(sampleCurGtInTrio)) {
|
||||
logger.warn("Locus " + curLoc + " breaks phase, since " + PHASING_ROD_NAME + " and " + TRIO_ROD_NAME + " tracks have different genotypes for " + phasingSample + "!");
|
||||
prevLoc = null;
|
||||
return result;
|
||||
|
|
@ -323,7 +323,8 @@ public class ComparePhasingToTrioPhasingNoRecombinationWalker extends RodWalker<
|
|||
stats.bothCanPhase++;
|
||||
useTrioPhase = false;
|
||||
|
||||
if (!phasedCurAlleles.equals(curPhasingGt.getAlleles())) {
|
||||
boolean ignorePhase = false;
|
||||
if (!phasedGt.sameGenotype(curPhasingGt, ignorePhase)) {
|
||||
String contradictMessage = "Phase from " + PHASING_ROD_NAME + " track at " + curLoc + " contradicts the trio-based phasing.";
|
||||
stats.contradictoryPhaseSites++;
|
||||
addToOutput += "\tcontradictory";
|
||||
|
|
|
|||
|
|
@ -136,7 +136,9 @@ class CombineVariants(vcfsToCombine: List[File]) extends org.broadinstitute.stin
|
|||
this.rodBind :+= RodBind(vcf.getName, "VCF", vcf)
|
||||
}
|
||||
|
||||
this.variantMergeOptions = Some(org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.UNION)
|
||||
// add the master call:
|
||||
this.rodBind :+= RodBind("master", "VCF", masterCalls)
|
||||
this.variantMergeOptions = Some(org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.MASTER)
|
||||
|
||||
this.out = outputPhased
|
||||
}
|
||||
Loading…
Reference in New Issue