From ffd5f407a5ab43102df79349b3aabf40dc430d79 Mon Sep 17 00:00:00 2001 From: fromer Date: Fri, 28 Jan 2011 18:33:32 +0000 Subject: [PATCH] Retain only a single walker to perform calculation of haplotype extents git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5110 348d0f76-0448-11de-a6fe-93d51630548a --- .../phasing/CalcFullHaplotypesWalker.java | 16 ++++- .../walkers/phasing/PhasingEval.java | 71 ++++--------------- 2 files changed, 29 insertions(+), 58 deletions(-) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CalcFullHaplotypesWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CalcFullHaplotypesWalker.java index be136b7b5..6cd3d2bb4 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CalcFullHaplotypesWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CalcFullHaplotypesWalker.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.phasing; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeader; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -34,6 +35,7 @@ import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -51,9 +53,15 @@ import static org.broadinstitute.sting.utils.vcf.VCFUtils.getVCFHeadersFromRods; public class CalcFullHaplotypesWalker extends RodWalker { private Map waitingHaplotypes = null; - @Output + @Output(doc = "File to which results should be written", required = true) protected PrintStream out; + @Argument(doc="sample to emit", required = false) + protected String sample = null; + + @Argument(doc="only include physically-phased results", required = false) + protected boolean requirePQ = false; + public void initialize() { this.waitingHaplotypes = new HashMap(); @@ -104,6 +112,9 @@ public class CalcFullHaplotypesWalker extends RodWalker { if (vc.isFiltered()) continue; + if (sample != null) + vc = vc.subContextFromGenotypes(vc.getGenotype(sample)); + for (Map.Entry sampleGtEntry : vc.getGenotypes().entrySet()) { String sample = sampleGtEntry.getKey(); Genotype gt = sampleGtEntry.getValue(); @@ -114,7 +125,8 @@ public class CalcFullHaplotypesWalker extends RodWalker { throw new ReviewedStingException("EVERY sample should have a haplotype [by code above and getToolkit().getSamples()]"); sampleHap.incrementHetCount(); - if (!gt.isPhased()) { // Terminate the haplotype here: + // Terminate the haplotype here: + if (!gt.isPhased() || (requirePQ && !gt.hasAttribute(ReadBackedPhasingWalker.PQ_KEY))) { outputHaplotype(sampleHap); // Start a new haplotype from the next position [if it exists]: diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/PhasingEval.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/PhasingEval.java index dd86f2a31..a1ad03905 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/PhasingEval.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/PhasingEval.java @@ -26,17 +26,14 @@ package org.broadinstitute.sting.oneoffprojects.walkers.phasing; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; -import org.broad.tribble.vcf.VCFHeader; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.vcf.VCFUtils; @@ -47,36 +44,22 @@ import java.util.*; /** * Emits specific fields as dictated by the user from one or more VCF files. */ -@Requires(value={}) +@Requires(value = {}) public class PhasingEval extends RodWalker { - @Output(doc="File to which results should be written",required=true) + + @Output(doc = "File to which results should be written", required = true) protected PrintStream out; - @Argument(doc="sample to emit", required = false) + @Argument(doc = "sample to emit", required = false) protected String sample = null; - @Argument(doc="only include physical phased results", required = false) - protected boolean requirePQ = false; - - @Argument(doc="Analysis to perform", required = true) + @Argument(doc = "Analysis to perform", required = true) protected Analysis analysis; public enum Analysis { - HAPLOTYPE_SIZES, PHASING_BY_AC } - private class Haplotype { - GenomeLoc start, last; - List genotypes; - - public Haplotype(GenomeLoc start) { - this.start = start; - this.last = null; - this.genotypes = new ArrayList(); - } - } - private class PhasingByAC { int myAC = 0; int myAN = 0; @@ -89,50 +72,26 @@ public class PhasingEval extends RodWalker { } } - Map haplotypes = new Hashtable(); List phasingByACs = new ArrayList(); public void initialize() { - if ( analysis == Analysis.HAPLOTYPE_SIZES ) { - out.println(Utils.join("\t", Arrays.asList("sample", "haplotype.length", "n.genotypes", "start", "stop"))); - } - Set samples = SampleUtils.getSampleList(VCFUtils.getVCFHeadersFromRods(getToolkit(), null)); int AN = 2 * samples.size(); - for ( int i = 0; i <= AN; i++ ) { + for (int i = 0; i <= AN; i++) { phasingByACs.add(new PhasingByAC(i, AN)); } } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) // RodWalkers can make funky map calls + if (tracker == null) // RodWalkers can make funky map calls return 0; Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation()); - for ( VariantContext vc : vcs) { - if ( sample != null ) + for (VariantContext vc : vcs) { + if (sample != null) vc = vc.subContextFromGenotypes(vc.getGenotype(sample)); - if ( analysis == Analysis.HAPLOTYPE_SIZES) { - GenomeLoc loc = getToolkit().getGenomeLocParser().parseGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd()); - for ( Genotype g : vc.getGenotypes().values() ) { - if ( ! haplotypes.containsKey(g.getSampleName()) ) - haplotypes.put(g.getSampleName(), new Haplotype(loc)); - - Haplotype h = haplotypes.get(g.getSampleName()); - - if ( g.isPhased() ) { - h.genotypes.add(g); - h.last = loc; - } else { - if ( ! requirePQ || isPhysicallyPhased(h.genotypes) ) - out.printf("%s %d %d %s %s%n", g.getSampleName(), - h.last != null ? h.start.distance(h.last) : 0, - h.genotypes.size(), h.start, h.last); - haplotypes.put(g.getSampleName(), new Haplotype(loc)); - } - } - } else if ( analysis == Analysis.PHASING_BY_AC ) { + if (analysis == Analysis.PHASING_BY_AC) { int homref = vc.getHomRefCount(); int homalt = vc.getHomVarCount(); int het = vc.getHetCount(); @@ -148,8 +107,8 @@ public class PhasingEval extends RodWalker { } private boolean isPhysicallyPhased(Collection genotypes) { - for ( Genotype g : genotypes ) { - if ( g.isHet() && g.hasAttribute("PQ") ) + for (Genotype g : genotypes) { + if (g.isHet() && g.hasAttribute(ReadBackedPhasingWalker.PQ_KEY)) return true; } @@ -165,9 +124,9 @@ public class PhasingEval extends RodWalker { } public void onTraversalDone(Integer sum) { - if ( analysis == Analysis.PHASING_BY_AC ) { + if (analysis == Analysis.PHASING_BY_AC) { out.println(Utils.join("\t", Arrays.asList("ac", "nhets", "nhetphased"))); - for ( PhasingByAC pac : phasingByACs ) { + for (PhasingByAC pac : phasingByACs) { out.printf("%d\t%d\t%d%n", pac.myAC, pac.nHets, pac.nHetsPhased); } }