diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CalcFullHaplotypesWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CalcFullHaplotypesWalker.java new file mode 100755 index 000000000..be136b7b5 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CalcFullHaplotypesWalker.java @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.oneoffprojects.walkers.phasing; + +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFHeader; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.sample.Sample; +import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.PrintStream; +import java.util.*; + +import static org.broadinstitute.sting.utils.vcf.VCFUtils.getVCFHeadersFromRods; + +/** + * Walks along all variant ROD loci and verifies the phasing from the reads for user-defined pairs of sites. + */ +@Allows(value = {DataSource.REFERENCE}) +@Requires(value = {DataSource.REFERENCE}) + +public class CalcFullHaplotypesWalker extends RodWalker { + private Map waitingHaplotypes = null; + + @Output + protected PrintStream out; + + public void initialize() { + this.waitingHaplotypes = new HashMap(); + + Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), null); + for (VCFHeader header : rodNameToHeader.values()) { + for (String sample : header.getGenotypeSamples()) + waitingHaplotypes.put(sample, null); + } + } + + public boolean generateExtendedEvents() { + return false; + } + + public Integer reduceInit() { + return 0; + } + + /** + * @param tracker the meta-data tracker + * @param ref the reference base + * @param context the context for the given locus + * @return statistics of and list of all phased VariantContexts and their base pileup that have gone out of cacheWindow range. + */ + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (tracker == null) + return null; + + GenomeLoc curLocus = ref.getLocus(); + outputDoneHaplotypes(curLocus); + + // Extend the haplotypes to include this position: + for (Map.Entry sampleHapEntry : waitingHaplotypes.entrySet()) { + Haplotype waitingHaplotype = sampleHapEntry.getValue(); + + if (waitingHaplotype == null) {// changed to a new contig: + // Set the new haplotype to extend from [1, curLocus] + GenomeLoc startInterval = getToolkit().getGenomeLocParser().parseGenomeLoc(curLocus.getContig(), 1, curLocus.getStop()); + waitingHaplotype = new Haplotype(startInterval, sampleHapEntry.getKey()); + sampleHapEntry.setValue(waitingHaplotype); + } + else + waitingHaplotype.extend(curLocus.getStop()); + } + + Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation()); + for (VariantContext vc : vcs) { + if (vc.isFiltered()) + continue; + + for (Map.Entry sampleGtEntry : vc.getGenotypes().entrySet()) { + String sample = sampleGtEntry.getKey(); + Genotype gt = sampleGtEntry.getValue(); + + if (gt.isHet()) { + Haplotype sampleHap = waitingHaplotypes.get(sample); + if (sampleHap == null) + throw new ReviewedStingException("EVERY sample should have a haplotype [by code above and getToolkit().getSamples()]"); + sampleHap.incrementHetCount(); + + if (!gt.isPhased()) { // Terminate the haplotype here: + outputHaplotype(sampleHap); + + // Start a new haplotype from the next position [if it exists]: + Haplotype nextHaplotype = null; + int startNext = sampleHap.interval.getStop() + 1; + if (startNext <= getContigLength(curLocus.getContig())) { + GenomeLoc nextInterval = getToolkit().getGenomeLocParser().parseGenomeLoc(curLocus.getContig(), startNext, startNext); + nextHaplotype = new Haplotype(nextInterval, sample); + } + + waitingHaplotypes.put(sample, nextHaplotype); + } + } + } + } + + return 1; + } + + public Integer reduce(Integer addIn, Integer runningCount) { + if (addIn == null) + addIn = 0; + + return runningCount + addIn; + } + + private void outputDoneHaplotypes(GenomeLoc curLocus) { + for (Map.Entry sampleHapEntry : waitingHaplotypes.entrySet()) { + Haplotype waitingHaplotype = sampleHapEntry.getValue(); + + if (waitingHaplotype != null) { + if (curLocus == null || !waitingHaplotype.interval.onSameContig(curLocus)) { + sampleHapEntry.setValue(null); + + // Set the output haplotype to terminate at the end of its contig: + int contigLength = getContigLength(waitingHaplotype.interval.getContig()); + waitingHaplotype.extend(contigLength); + outputHaplotype(waitingHaplotype); + } + } + } + } + + private int getContigLength(String contig) { + return getToolkit().getGenomeLocParser().getContigInfo(contig).getSequenceLength(); + } + + private void outputHaplotype(Haplotype h) { + out.println(h); + } + + /** + * @param result the number of reads and VariantContexts seen. + */ + public void onTraversalDone(Integer result) { + outputDoneHaplotypes(null); + + System.out.println("map was called " + result + " times."); + } + + private class Haplotype { + public GenomeLoc interval; + public String sample; + public int hetCount; + + public Haplotype(GenomeLoc interval, String sample) { + this.interval = interval; + this.sample = sample; + this.hetCount = 0; + } + + public void extend(int stop) { + if (stop > interval.getStop()) + interval = getToolkit().getGenomeLocParser().parseGenomeLoc(interval.getContig(), interval.getStart(), stop); + } + + public void incrementHetCount() { + hetCount++; + } + + public String toString() { + return sample + "\t" + interval.toString() + "\t" + interval.size() + "\t" + hetCount; + } + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/phasing/ComparePhasingToTrioPhasingNoRecombinationWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/ComparePhasingToTrioPhasingNoRecombinationWalker.java similarity index 99% rename from java/src/org/broadinstitute/sting/oneoffprojects/phasing/ComparePhasingToTrioPhasingNoRecombinationWalker.java rename to java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/ComparePhasingToTrioPhasingNoRecombinationWalker.java index a5beda8d9..6c98726b8 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/phasing/ComparePhasingToTrioPhasingNoRecombinationWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/ComparePhasingToTrioPhasingNoRecombinationWalker.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.oneoffprojects.phasing; +package org.broadinstitute.sting.oneoffprojects.walkers.phasing; import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.Genotype; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/phasing/CountHetPhasingInIntervalWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CountHetPhasingInIntervalWalker.java similarity index 99% rename from java/src/org/broadinstitute/sting/oneoffprojects/phasing/CountHetPhasingInIntervalWalker.java rename to java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CountHetPhasingInIntervalWalker.java index 106817fb8..e164e3d38 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/phasing/CountHetPhasingInIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CountHetPhasingInIntervalWalker.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.oneoffprojects.phasing; +package org.broadinstitute.sting.oneoffprojects.walkers.phasing; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/PhasingEval.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/PhasingEval.java similarity index 99% rename from java/src/org/broadinstitute/sting/oneoffprojects/walkers/PhasingEval.java rename to java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/PhasingEval.java index 9f220db17..dd86f2a31 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/PhasingEval.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/PhasingEval.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.oneoffprojects.walkers; +package org.broadinstitute.sting.oneoffprojects.walkers.phasing; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext;