Updated haplotype calculator to correctly terminate haploptypes RIGHT BEFORE an unphased het

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5252 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
fromer 2011-02-16 17:10:01 +00:00
parent c0a4af3809
commit b304ced801
1 changed files with 22 additions and 21 deletions

View File

@ -31,8 +31,6 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker;
@ -56,10 +54,10 @@ public class CalcFullHaplotypesWalker extends RodWalker<Integer, Integer> {
@Output(doc = "File to which results should be written", required = true) @Output(doc = "File to which results should be written", required = true)
protected PrintStream out; protected PrintStream out;
@Argument(doc="sample to emit", required = false) @Argument(doc = "sample to emit", required = false)
protected String sample = null; protected String sample = null;
@Argument(doc="only include physically-phased results", required = false) @Argument(doc = "only include physically-phased results", required = false)
protected boolean requirePQ = false; protected boolean requirePQ = false;
public void initialize() { public void initialize() {
@ -93,18 +91,23 @@ public class CalcFullHaplotypesWalker extends RodWalker<Integer, Integer> {
GenomeLoc curLocus = ref.getLocus(); GenomeLoc curLocus = ref.getLocus();
outputDoneHaplotypes(curLocus); outputDoneHaplotypes(curLocus);
// Extend the haplotypes to include this position: int curPosition = curLocus.getStop();
int prevPosition = curPosition - 1;
// Extend the haplotypes to include up to this position (BUT EXCLUSIVE OF THIS POSITION):
for (Map.Entry<String, Haplotype> sampleHapEntry : waitingHaplotypes.entrySet()) { for (Map.Entry<String, Haplotype> sampleHapEntry : waitingHaplotypes.entrySet()) {
Haplotype waitingHaplotype = sampleHapEntry.getValue(); Haplotype waitingHaplotype = sampleHapEntry.getValue();
if (waitingHaplotype == null) {// changed to a new contig: if (waitingHaplotype == null) {// changed to a new contig:
// Set the new haplotype to extend from [1, curLocus] // Set the new haplotype to extend from [1, prevPosition]
GenomeLoc startInterval = getToolkit().getGenomeLocParser().parseGenomeLoc(curLocus.getContig(), 1, curLocus.getStop()); if (prevPosition >= 1) {
waitingHaplotype = new Haplotype(startInterval, sampleHapEntry.getKey()); GenomeLoc startInterval = getToolkit().getGenomeLocParser().parseGenomeLoc(curLocus.getContig(), 1, prevPosition);
sampleHapEntry.setValue(waitingHaplotype); waitingHaplotype = new Haplotype(startInterval, sampleHapEntry.getKey());
sampleHapEntry.setValue(waitingHaplotype);
}
} }
else else
waitingHaplotype.extend(curLocus.getStop()); waitingHaplotype.extend(prevPosition);
} }
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation()); Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
@ -123,22 +126,20 @@ public class CalcFullHaplotypesWalker extends RodWalker<Integer, Integer> {
Haplotype sampleHap = waitingHaplotypes.get(sample); Haplotype sampleHap = waitingHaplotypes.get(sample);
if (sampleHap == null) if (sampleHap == null)
throw new ReviewedStingException("EVERY sample should have a haplotype [by code above and getToolkit().getSamples()]"); throw new ReviewedStingException("EVERY sample should have a haplotype [by code above and getToolkit().getSamples()]");
sampleHap.incrementHetCount();
// Terminate the haplotype here: // Terminate the haplotype before here:
if (!gt.isPhased() || (requirePQ && !gt.hasAttribute(ReadBackedPhasingWalker.PQ_KEY))) { if (!gt.isPhased() || (requirePQ && !gt.hasAttribute(ReadBackedPhasingWalker.PQ_KEY))) {
outputHaplotype(sampleHap); outputHaplotype(sampleHap);
// Start a new haplotype from the next position [if it exists]: // Start a new haplotype from the current position:
Haplotype nextHaplotype = null; sampleHap = new Haplotype(curLocus, sample);
int startNext = sampleHap.interval.getStop() + 1; waitingHaplotypes.put(sample, sampleHap);
if (startNext <= getContigLength(curLocus.getContig())) {
GenomeLoc nextInterval = getToolkit().getGenomeLocParser().parseGenomeLoc(curLocus.getContig(), startNext, startNext);
nextHaplotype = new Haplotype(nextInterval, sample);
}
waitingHaplotypes.put(sample, nextHaplotype);
} }
else {
sampleHap.extend(curPosition);
}
sampleHap.incrementHetCount();
} }
} }
} }