Retain only a single walker to perform calculation of haplotype extents

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5110 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
fromer 2011-01-28 18:33:32 +00:00
parent 88ea60b864
commit ffd5f407a5
2 changed files with 29 additions and 58 deletions

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.phasing;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFHeader;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -34,6 +35,7 @@ import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -51,9 +53,15 @@ import static org.broadinstitute.sting.utils.vcf.VCFUtils.getVCFHeadersFromRods;
public class CalcFullHaplotypesWalker extends RodWalker<Integer, Integer> {
private Map<String, Haplotype> waitingHaplotypes = null;
@Output
@Output(doc = "File to which results should be written", required = true)
protected PrintStream out;
@Argument(doc="sample to emit", required = false)
protected String sample = null;
@Argument(doc="only include physically-phased results", required = false)
protected boolean requirePQ = false;
public void initialize() {
this.waitingHaplotypes = new HashMap<String, Haplotype>();
@ -104,6 +112,9 @@ public class CalcFullHaplotypesWalker extends RodWalker<Integer, Integer> {
if (vc.isFiltered())
continue;
if (sample != null)
vc = vc.subContextFromGenotypes(vc.getGenotype(sample));
for (Map.Entry<String, Genotype> sampleGtEntry : vc.getGenotypes().entrySet()) {
String sample = sampleGtEntry.getKey();
Genotype gt = sampleGtEntry.getValue();
@ -114,7 +125,8 @@ public class CalcFullHaplotypesWalker extends RodWalker<Integer, Integer> {
throw new ReviewedStingException("EVERY sample should have a haplotype [by code above and getToolkit().getSamples()]");
sampleHap.incrementHetCount();
if (!gt.isPhased()) { // Terminate the haplotype here:
// Terminate the haplotype here:
if (!gt.isPhased() || (requirePQ && !gt.hasAttribute(ReadBackedPhasingWalker.PQ_KEY))) {
outputHaplotype(sampleHap);
// Start a new haplotype from the next position [if it exists]:

View File

@ -26,17 +26,14 @@ package org.broadinstitute.sting.oneoffprojects.walkers.phasing;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFHeader;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
@ -47,36 +44,22 @@ import java.util.*;
/**
* Emits specific fields as dictated by the user from one or more VCF files.
*/
@Requires(value={})
@Requires(value = {})
public class PhasingEval extends RodWalker<Integer, Integer> {
@Output(doc="File to which results should be written",required=true)
@Output(doc = "File to which results should be written", required = true)
protected PrintStream out;
@Argument(doc="sample to emit", required = false)
@Argument(doc = "sample to emit", required = false)
protected String sample = null;
@Argument(doc="only include physical phased results", required = false)
protected boolean requirePQ = false;
@Argument(doc="Analysis to perform", required = true)
@Argument(doc = "Analysis to perform", required = true)
protected Analysis analysis;
public enum Analysis {
HAPLOTYPE_SIZES,
PHASING_BY_AC
}
private class Haplotype {
GenomeLoc start, last;
List<Genotype> genotypes;
public Haplotype(GenomeLoc start) {
this.start = start;
this.last = null;
this.genotypes = new ArrayList<Genotype>();
}
}
private class PhasingByAC {
int myAC = 0;
int myAN = 0;
@ -89,50 +72,26 @@ public class PhasingEval extends RodWalker<Integer, Integer> {
}
}
Map<String, Haplotype> haplotypes = new Hashtable<String, Haplotype>();
List<PhasingByAC> phasingByACs = new ArrayList<PhasingByAC>();
public void initialize() {
if ( analysis == Analysis.HAPLOTYPE_SIZES ) {
out.println(Utils.join("\t", Arrays.asList("sample", "haplotype.length", "n.genotypes", "start", "stop")));
}
Set<String> samples = SampleUtils.getSampleList(VCFUtils.getVCFHeadersFromRods(getToolkit(), null));
int AN = 2 * samples.size();
for ( int i = 0; i <= AN; i++ ) {
for (int i = 0; i <= AN; i++) {
phasingByACs.add(new PhasingByAC(i, AN));
}
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null ) // RodWalkers can make funky map calls
if (tracker == null) // RodWalkers can make funky map calls
return 0;
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
for ( VariantContext vc : vcs) {
if ( sample != null )
for (VariantContext vc : vcs) {
if (sample != null)
vc = vc.subContextFromGenotypes(vc.getGenotype(sample));
if ( analysis == Analysis.HAPLOTYPE_SIZES) {
GenomeLoc loc = getToolkit().getGenomeLocParser().parseGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd());
for ( Genotype g : vc.getGenotypes().values() ) {
if ( ! haplotypes.containsKey(g.getSampleName()) )
haplotypes.put(g.getSampleName(), new Haplotype(loc));
Haplotype h = haplotypes.get(g.getSampleName());
if ( g.isPhased() ) {
h.genotypes.add(g);
h.last = loc;
} else {
if ( ! requirePQ || isPhysicallyPhased(h.genotypes) )
out.printf("%s %d %d %s %s%n", g.getSampleName(),
h.last != null ? h.start.distance(h.last) : 0,
h.genotypes.size(), h.start, h.last);
haplotypes.put(g.getSampleName(), new Haplotype(loc));
}
}
} else if ( analysis == Analysis.PHASING_BY_AC ) {
if (analysis == Analysis.PHASING_BY_AC) {
int homref = vc.getHomRefCount();
int homalt = vc.getHomVarCount();
int het = vc.getHetCount();
@ -148,8 +107,8 @@ public class PhasingEval extends RodWalker<Integer, Integer> {
}
private boolean isPhysicallyPhased(Collection<Genotype> genotypes) {
for ( Genotype g : genotypes ) {
if ( g.isHet() && g.hasAttribute("PQ") )
for (Genotype g : genotypes) {
if (g.isHet() && g.hasAttribute(ReadBackedPhasingWalker.PQ_KEY))
return true;
}
@ -165,9 +124,9 @@ public class PhasingEval extends RodWalker<Integer, Integer> {
}
public void onTraversalDone(Integer sum) {
if ( analysis == Analysis.PHASING_BY_AC ) {
if (analysis == Analysis.PHASING_BY_AC) {
out.println(Utils.join("\t", Arrays.asList("ac", "nhets", "nhetphased")));
for ( PhasingByAC pac : phasingByACs ) {
for (PhasingByAC pac : phasingByACs) {
out.printf("%d\t%d\t%d%n", pac.myAC, pac.nHets, pac.nHetsPhased);
}
}