Retain only a single walker to perform calculation of haplotype extents
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5110 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
88ea60b864
commit
ffd5f407a5
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.phasing;
|
||||||
import org.broad.tribble.util.variantcontext.Genotype;
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
import org.broad.tribble.vcf.VCFHeader;
|
import org.broad.tribble.vcf.VCFHeader;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -34,6 +35,7 @@ import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
|
@ -51,9 +53,15 @@ import static org.broadinstitute.sting.utils.vcf.VCFUtils.getVCFHeadersFromRods;
|
||||||
public class CalcFullHaplotypesWalker extends RodWalker<Integer, Integer> {
|
public class CalcFullHaplotypesWalker extends RodWalker<Integer, Integer> {
|
||||||
private Map<String, Haplotype> waitingHaplotypes = null;
|
private Map<String, Haplotype> waitingHaplotypes = null;
|
||||||
|
|
||||||
@Output
|
@Output(doc = "File to which results should be written", required = true)
|
||||||
protected PrintStream out;
|
protected PrintStream out;
|
||||||
|
|
||||||
|
@Argument(doc="sample to emit", required = false)
|
||||||
|
protected String sample = null;
|
||||||
|
|
||||||
|
@Argument(doc="only include physically-phased results", required = false)
|
||||||
|
protected boolean requirePQ = false;
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
this.waitingHaplotypes = new HashMap<String, Haplotype>();
|
this.waitingHaplotypes = new HashMap<String, Haplotype>();
|
||||||
|
|
||||||
|
|
@ -104,6 +112,9 @@ public class CalcFullHaplotypesWalker extends RodWalker<Integer, Integer> {
|
||||||
if (vc.isFiltered())
|
if (vc.isFiltered())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (sample != null)
|
||||||
|
vc = vc.subContextFromGenotypes(vc.getGenotype(sample));
|
||||||
|
|
||||||
for (Map.Entry<String, Genotype> sampleGtEntry : vc.getGenotypes().entrySet()) {
|
for (Map.Entry<String, Genotype> sampleGtEntry : vc.getGenotypes().entrySet()) {
|
||||||
String sample = sampleGtEntry.getKey();
|
String sample = sampleGtEntry.getKey();
|
||||||
Genotype gt = sampleGtEntry.getValue();
|
Genotype gt = sampleGtEntry.getValue();
|
||||||
|
|
@ -114,7 +125,8 @@ public class CalcFullHaplotypesWalker extends RodWalker<Integer, Integer> {
|
||||||
throw new ReviewedStingException("EVERY sample should have a haplotype [by code above and getToolkit().getSamples()]");
|
throw new ReviewedStingException("EVERY sample should have a haplotype [by code above and getToolkit().getSamples()]");
|
||||||
sampleHap.incrementHetCount();
|
sampleHap.incrementHetCount();
|
||||||
|
|
||||||
if (!gt.isPhased()) { // Terminate the haplotype here:
|
// Terminate the haplotype here:
|
||||||
|
if (!gt.isPhased() || (requirePQ && !gt.hasAttribute(ReadBackedPhasingWalker.PQ_KEY))) {
|
||||||
outputHaplotype(sampleHap);
|
outputHaplotype(sampleHap);
|
||||||
|
|
||||||
// Start a new haplotype from the next position [if it exists]:
|
// Start a new haplotype from the next position [if it exists]:
|
||||||
|
|
|
||||||
|
|
@ -26,17 +26,14 @@ package org.broadinstitute.sting.oneoffprojects.walkers.phasing;
|
||||||
|
|
||||||
import org.broad.tribble.util.variantcontext.Genotype;
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
import org.broad.tribble.vcf.VCFHeader;
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||||
|
|
@ -47,36 +44,22 @@ import java.util.*;
|
||||||
/**
|
/**
|
||||||
* Emits specific fields as dictated by the user from one or more VCF files.
|
* Emits specific fields as dictated by the user from one or more VCF files.
|
||||||
*/
|
*/
|
||||||
@Requires(value={})
|
@Requires(value = {})
|
||||||
public class PhasingEval extends RodWalker<Integer, Integer> {
|
public class PhasingEval extends RodWalker<Integer, Integer> {
|
||||||
@Output(doc="File to which results should be written",required=true)
|
|
||||||
|
@Output(doc = "File to which results should be written", required = true)
|
||||||
protected PrintStream out;
|
protected PrintStream out;
|
||||||
|
|
||||||
@Argument(doc="sample to emit", required = false)
|
@Argument(doc = "sample to emit", required = false)
|
||||||
protected String sample = null;
|
protected String sample = null;
|
||||||
|
|
||||||
@Argument(doc="only include physical phased results", required = false)
|
@Argument(doc = "Analysis to perform", required = true)
|
||||||
protected boolean requirePQ = false;
|
|
||||||
|
|
||||||
@Argument(doc="Analysis to perform", required = true)
|
|
||||||
protected Analysis analysis;
|
protected Analysis analysis;
|
||||||
|
|
||||||
public enum Analysis {
|
public enum Analysis {
|
||||||
HAPLOTYPE_SIZES,
|
|
||||||
PHASING_BY_AC
|
PHASING_BY_AC
|
||||||
}
|
}
|
||||||
|
|
||||||
private class Haplotype {
|
|
||||||
GenomeLoc start, last;
|
|
||||||
List<Genotype> genotypes;
|
|
||||||
|
|
||||||
public Haplotype(GenomeLoc start) {
|
|
||||||
this.start = start;
|
|
||||||
this.last = null;
|
|
||||||
this.genotypes = new ArrayList<Genotype>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private class PhasingByAC {
|
private class PhasingByAC {
|
||||||
int myAC = 0;
|
int myAC = 0;
|
||||||
int myAN = 0;
|
int myAN = 0;
|
||||||
|
|
@ -89,50 +72,26 @@ public class PhasingEval extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, Haplotype> haplotypes = new Hashtable<String, Haplotype>();
|
|
||||||
List<PhasingByAC> phasingByACs = new ArrayList<PhasingByAC>();
|
List<PhasingByAC> phasingByACs = new ArrayList<PhasingByAC>();
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
if ( analysis == Analysis.HAPLOTYPE_SIZES ) {
|
|
||||||
out.println(Utils.join("\t", Arrays.asList("sample", "haplotype.length", "n.genotypes", "start", "stop")));
|
|
||||||
}
|
|
||||||
|
|
||||||
Set<String> samples = SampleUtils.getSampleList(VCFUtils.getVCFHeadersFromRods(getToolkit(), null));
|
Set<String> samples = SampleUtils.getSampleList(VCFUtils.getVCFHeadersFromRods(getToolkit(), null));
|
||||||
int AN = 2 * samples.size();
|
int AN = 2 * samples.size();
|
||||||
for ( int i = 0; i <= AN; i++ ) {
|
for (int i = 0; i <= AN; i++) {
|
||||||
phasingByACs.add(new PhasingByAC(i, AN));
|
phasingByACs.add(new PhasingByAC(i, AN));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if ( tracker == null ) // RodWalkers can make funky map calls
|
if (tracker == null) // RodWalkers can make funky map calls
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
||||||
for ( VariantContext vc : vcs) {
|
for (VariantContext vc : vcs) {
|
||||||
if ( sample != null )
|
if (sample != null)
|
||||||
vc = vc.subContextFromGenotypes(vc.getGenotype(sample));
|
vc = vc.subContextFromGenotypes(vc.getGenotype(sample));
|
||||||
|
|
||||||
if ( analysis == Analysis.HAPLOTYPE_SIZES) {
|
if (analysis == Analysis.PHASING_BY_AC) {
|
||||||
GenomeLoc loc = getToolkit().getGenomeLocParser().parseGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd());
|
|
||||||
for ( Genotype g : vc.getGenotypes().values() ) {
|
|
||||||
if ( ! haplotypes.containsKey(g.getSampleName()) )
|
|
||||||
haplotypes.put(g.getSampleName(), new Haplotype(loc));
|
|
||||||
|
|
||||||
Haplotype h = haplotypes.get(g.getSampleName());
|
|
||||||
|
|
||||||
if ( g.isPhased() ) {
|
|
||||||
h.genotypes.add(g);
|
|
||||||
h.last = loc;
|
|
||||||
} else {
|
|
||||||
if ( ! requirePQ || isPhysicallyPhased(h.genotypes) )
|
|
||||||
out.printf("%s %d %d %s %s%n", g.getSampleName(),
|
|
||||||
h.last != null ? h.start.distance(h.last) : 0,
|
|
||||||
h.genotypes.size(), h.start, h.last);
|
|
||||||
haplotypes.put(g.getSampleName(), new Haplotype(loc));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if ( analysis == Analysis.PHASING_BY_AC ) {
|
|
||||||
int homref = vc.getHomRefCount();
|
int homref = vc.getHomRefCount();
|
||||||
int homalt = vc.getHomVarCount();
|
int homalt = vc.getHomVarCount();
|
||||||
int het = vc.getHetCount();
|
int het = vc.getHetCount();
|
||||||
|
|
@ -148,8 +107,8 @@ public class PhasingEval extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isPhysicallyPhased(Collection<Genotype> genotypes) {
|
private boolean isPhysicallyPhased(Collection<Genotype> genotypes) {
|
||||||
for ( Genotype g : genotypes ) {
|
for (Genotype g : genotypes) {
|
||||||
if ( g.isHet() && g.hasAttribute("PQ") )
|
if (g.isHet() && g.hasAttribute(ReadBackedPhasingWalker.PQ_KEY))
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -165,9 +124,9 @@ public class PhasingEval extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraversalDone(Integer sum) {
|
public void onTraversalDone(Integer sum) {
|
||||||
if ( analysis == Analysis.PHASING_BY_AC ) {
|
if (analysis == Analysis.PHASING_BY_AC) {
|
||||||
out.println(Utils.join("\t", Arrays.asList("ac", "nhets", "nhetphased")));
|
out.println(Utils.join("\t", Arrays.asList("ac", "nhets", "nhetphased")));
|
||||||
for ( PhasingByAC pac : phasingByACs ) {
|
for (PhasingByAC pac : phasingByACs) {
|
||||||
out.printf("%d\t%d\t%d%n", pac.myAC, pac.nHets, pac.nHetsPhased);
|
out.printf("%d\t%d\t%d%n", pac.myAC, pac.nHets, pac.nHetsPhased);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue