renamed genotypesArePhased to isPhased, as the previous name was incorrect for several reasons. Added setPhase() to MutableGenotype. Other classes changed to reflect renaming to isPhased(). CombineVariants now supports an experimental MASTER mode where it consumes -B:master,vcf and -B:xi,vcf for any number i and updates the master with phasing information in xi.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4896 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e7569cfe6f
commit
66cca7de0f
|
|
@ -316,9 +316,71 @@ public class VariantContextUtils {
|
|||
}
|
||||
|
||||
public enum VariantMergeType {
|
||||
UNION, INTERSECT
|
||||
UNION, INTERSECT, MASTER
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a master merge on the VCs. Here there is a master input [contains all of the information] and many
|
||||
* VCs containing partial, extra genotype information which should be added to the master. For example,
|
||||
* we scatter out the phasing algorithm over some samples in the master, producing a minimal VCF with phasing
|
||||
* information per genotype. The master merge will add the PQ information from each genotype record, where
|
||||
* appropriate, to the master VC.
|
||||
*
|
||||
* @param genomeLocParser
|
||||
* @param unsortedVCs
|
||||
* @param masterName
|
||||
* @param refBase
|
||||
* @return
|
||||
*/
|
||||
public static VariantContext masterMerge(Collection<VariantContext> unsortedVCs, String masterName) {
|
||||
VariantContext master = findMaster(unsortedVCs, masterName);
|
||||
Map<String, Genotype> genotypes = master.getGenotypes();
|
||||
for ( Genotype g : genotypes.values() ) {
|
||||
genotypes.put(g.getSampleName(), new MutableGenotype(g));
|
||||
}
|
||||
|
||||
for ( VariantContext vc : unsortedVCs ) {
|
||||
if ( ! vc.getSource() .equals(masterName) ) {
|
||||
for ( Genotype g : vc.getGenotypes().values() ) {
|
||||
MutableGenotype masterG = (MutableGenotype)genotypes.get(g.getSampleName());
|
||||
for ( Map.Entry<String, Object> attr : g.getAttributes().entrySet() ) {
|
||||
if ( ! masterG.hasAttribute(attr.getKey()) ) {
|
||||
//System.out.printf("Adding attribute %s to masterG %s, new %s%n", attr, masterG, g);
|
||||
masterG.putAttribute(attr.getKey(), attr.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
if ( masterG.isPhased() != g.isPhased() ) {
|
||||
// System.out.printf("Updating phasing %s to masterG %s, new %s%n", g.isPhased(), masterG, g);
|
||||
masterG.setPhase(g.isPhased());
|
||||
}
|
||||
|
||||
// if ( MathUtils.compareDoubles(masterG.getNegLog10PError(), g.getNegLog10PError()) != 0 ) {
|
||||
// System.out.printf("Updating GQ %s to masterG %s, new %s%n", g.getNegLog10PError(), masterG, g);
|
||||
// masterG.setNegLog10PError(g.getNegLog10PError());
|
||||
// }
|
||||
|
||||
// TODO -- WARNING -- THIS CODE DOES NOT ACTUALLY LOOK AT ALL OF THE GENOTYPE ATTRIBUTES, ONLY THOSE
|
||||
// TODO -- WARNING -- IMMEDIATELY USEFUL TO PHASING. A COMPLETE IMPLEMENTATION WILL NEED TO HANDLE
|
||||
// TODO -- WARNING -- ALL OF THE FIELDS LIKE ALLELES, ETC
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new VariantContext(master.getSource(), master.getChr(), master.getStart(), master.getEnd(), master.getAlleles(), genotypes, master.getNegLog10PError(), master.getFilters(), master.getAttributes());
|
||||
}
|
||||
|
||||
private static final VariantContext findMaster(Collection<VariantContext> unsortedVCs, String masterName) {
|
||||
for ( VariantContext vc : unsortedVCs ) {
|
||||
if ( vc.getSource() .equals(masterName) ) {
|
||||
return vc;
|
||||
}
|
||||
}
|
||||
|
||||
throw new ReviewedStingException(String.format("Couldn't find master VCF %s at %s", masterName, unsortedVCs.iterator().next()));
|
||||
}
|
||||
|
||||
|
||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, byte refBase) {
|
||||
return simpleMerge(genomeLocParser, unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase);
|
||||
}
|
||||
|
|
@ -952,7 +1014,7 @@ public class VariantContextUtils {
|
|||
HOWEVER, EVEN if this is not the case, but gt1.isHom(),
|
||||
it is trivially known that each of gt2's alleles segregate with the single allele type present in gt1.
|
||||
*/
|
||||
return (gt2.genotypesArePhased() || gt2.isHom() || gt1.isHom());
|
||||
return (gt2.isPhased() || gt2.isHom() || gt1.isHom());
|
||||
}
|
||||
|
||||
private static class PhaseAndQuality {
|
||||
|
|
@ -960,7 +1022,7 @@ public class VariantContextUtils {
|
|||
public Double PQ = null;
|
||||
|
||||
public PhaseAndQuality(Genotype gt) {
|
||||
this.isPhased = gt.genotypesArePhased();
|
||||
this.isPhased = gt.isPhased();
|
||||
if (this.isPhased)
|
||||
this.PQ = gt.getAttributeAsDoubleNoException(ReadBackedPhasingWalker.PQ_KEY);
|
||||
}
|
||||
|
|
@ -969,7 +1031,7 @@ public class VariantContextUtils {
|
|||
// Assumes that alleleSegregationIsKnown(gt1, gt2):
|
||||
|
||||
private static PhaseAndQuality calcPhaseForMergedGenotypes(Genotype gt1, Genotype gt2) {
|
||||
if (gt2.genotypesArePhased() || gt2.isHom())
|
||||
if (gt2.isPhased() || gt2.isHom())
|
||||
return new PhaseAndQuality(gt1); // maintain the phase of gt1
|
||||
|
||||
if (!gt1.isHom())
|
||||
|
|
|
|||
|
|
@ -260,7 +260,7 @@ public class VariantAnnotatorEngine {
|
|||
if ( result != null )
|
||||
genotypeAnnotations.putAll(result);
|
||||
}
|
||||
genotypes.put(g.getKey(), new Genotype(g.getKey(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.genotypesArePhased()));
|
||||
genotypes.put(g.getKey(), new Genotype(g.getKey(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.isPhased()));
|
||||
}
|
||||
|
||||
return genotypes;
|
||||
|
|
|
|||
|
|
@ -201,7 +201,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
if ( VariantContextUtils.match(vc, g, exp) )
|
||||
filters.add(exp.name);
|
||||
}
|
||||
genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.genotypesArePhased()));
|
||||
genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.isPhased()));
|
||||
} else {
|
||||
genotypes.put(genotype.getKey(), g);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -199,7 +199,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public boolean genotypesArePhasedAboveThreshold(Genotype gt) {
|
||||
if (!gt.genotypesArePhased())
|
||||
if (!gt.isPhased())
|
||||
return false;
|
||||
|
||||
Double pq = getPQ(gt);
|
||||
|
|
|
|||
|
|
@ -126,9 +126,14 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
// get all of the vcf rods at this locus
|
||||
// Need to provide reference bases to simpleMerge starting at current locus
|
||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
||||
VariantContext mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption,
|
||||
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled);
|
||||
|
||||
VariantContext mergedVC = null;
|
||||
if ( variantMergeOption == VariantContextUtils.VariantMergeType.MASTER ) {
|
||||
mergedVC = VariantContextUtils.masterMerge(vcs, "master");
|
||||
} else {
|
||||
mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption,
|
||||
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled);
|
||||
}
|
||||
|
||||
//out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC);
|
||||
|
||||
|
|
|
|||
|
|
@ -153,7 +153,7 @@ public class FixRefBases extends RodWalker<Integer,Integer> {
|
|||
}
|
||||
return new Genotype(g.getSampleName(),
|
||||
newAlleles,g.hasNegLog10PError() ? g.getNegLog10PError() : VCFConstants.MISSING_QUALITY_v3_DOUBLE,
|
||||
g.getAttributes().keySet(), g.getAttributes(),g.genotypesArePhased());
|
||||
g.getAttributes().keySet(), g.getAttributes(),g.isPhased());
|
||||
}
|
||||
|
||||
private Map<String,Genotype> flipGenotypes(Map<String,Genotype> old, Set<Allele> newAlleles) {
|
||||
|
|
@ -192,7 +192,7 @@ public class FixRefBases extends RodWalker<Integer,Integer> {
|
|||
|
||||
return new Genotype(old.getSampleName(),
|
||||
newGTAlleles,old.hasNegLog10PError() ? old.getNegLog10PError() : VCFConstants.MISSING_QUALITY_v3_DOUBLE,
|
||||
old.getAttributes().keySet(), old.getAttributes(),old.genotypesArePhased());
|
||||
old.getAttributes().keySet(), old.getAttributes(),old.isPhased());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue