diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergePhasedSegregatingPolymorphismsToMNPvcfWriter.java b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergePhasedSegregatingPolymorphismsToMNPvcfWriter.java new file mode 100644 index 000000000..a10a95e94 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergePhasedSegregatingPolymorphismsToMNPvcfWriter.java @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.phasing; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFile; +import org.apache.log4j.Logger; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFWriter; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.util.LinkedList; +import java.util.List; + +// Streams in VariantContext objects and streams out VariantContexts produced by merging phased segregating polymorphisms into MNP VariantContexts +public class MergePhasedSegregatingPolymorphismsToMNPvcfWriter implements VCFWriter { + private VCFWriter innerWriter; + + private ReferenceSequenceFile referenceFileForMNPmerging; + private int maxGenomicDistanceForMNP; + + private VCFRecord vcfrWaitingToMerge; + private List filteredVcfrList; + private int numMergedRecords; + + private Logger logger; + + private static class VCFRecord { + public VariantContext vc; + public byte refBase; + + public VCFRecord(VariantContext vc, byte refBase) { + this.vc = vc; + this.refBase = refBase; + } + } + + public MergePhasedSegregatingPolymorphismsToMNPvcfWriter(VCFWriter innerWriter, File referenceFile, int maxGenomicDistanceForMNP, Logger logger) { + this.innerWriter = innerWriter; + this.referenceFileForMNPmerging = new IndexedFastaSequenceFile(referenceFile); + this.maxGenomicDistanceForMNP = maxGenomicDistanceForMNP; + this.vcfrWaitingToMerge = null; + this.filteredVcfrList = new LinkedList(); + this.numMergedRecords = 0; + this.logger = logger; + } + + public void writeHeader(VCFHeader header) { + innerWriter.writeHeader(header); + } + + public void close() { + flush(); + innerWriter.close(); + } + + public void flush() { + stopWaitingToMerge(); + innerWriter.flush(); + } + + public void add(VariantContext vc, byte refBase) { + logger.debug("Next VC input = " + VariantContextUtils.getLocation(vc)); + boolean curVcIsNotFiltered = vc.isNotFiltered(); + + if (vcfrWaitingToMerge == null) { + logger.debug("NOT Waiting to merge..."); + + if (!filteredVcfrList.isEmpty()) + throw new ReviewedStingException("filteredVcfrList should be empty if not waiting to merge a vc!"); + + if (curVcIsNotFiltered) { // still need to wait before can release vc + logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(vc)); + vcfrWaitingToMerge = new VCFRecord(vc, refBase); + } + else { + logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(vc)); + innerWriter.add(vc, refBase); + } + } + else { // waiting to merge vcfrWaitingToMerge + logger.debug("Waiting to merge " + VariantContextUtils.getLocation(vcfrWaitingToMerge.vc)); + + if (!curVcIsNotFiltered) { + logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(vc)); + filteredVcfrList.add(new VCFRecord(vc, refBase)); + } + else { // waiting to merge vcfrWaitingToMerge, and curVcIsNotFiltered. So, attempt to merge them: + boolean mergedRecords = false; + if (minDistance(vcfrWaitingToMerge.vc, vc) <= maxGenomicDistanceForMNP) { + VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging); + if (mergedVc != null) { + mergedRecords = true; + vcfrWaitingToMerge = new VCFRecord(mergedVc, vcfrWaitingToMerge.refBase); + numMergedRecords++; + } + } + if (!mergedRecords) { + stopWaitingToMerge(); + vcfrWaitingToMerge = new VCFRecord(vc, refBase); + } + logger.debug("Merged? = " + mergedRecords); + } + } + } + + private void stopWaitingToMerge() { + if (vcfrWaitingToMerge == null) { + if (!filteredVcfrList.isEmpty()) + throw new ReviewedStingException("filteredVcfrList should be empty if not waiting to merge a vc!"); + return; + } + + innerWriter.add(vcfrWaitingToMerge.vc, vcfrWaitingToMerge.refBase); + vcfrWaitingToMerge = null; + + for (VCFRecord vcfr : filteredVcfrList) + innerWriter.add(vcfr.vc, vcfr.refBase); + filteredVcfrList.clear(); + } + + public int getNumMergedRecords() { + return numMergedRecords; + } + + public static int minDistance(VariantContext vc1, VariantContext vc2) { + return VariantContextUtils.getLocation(vc1).minDistance(VariantContextUtils.getLocation(vc2)); + } + + /** + * Gets a string representation of this object. + * @return + */ + @Override + public String toString() { + return getClass().getName(); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingPolymorphismsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingPolymorphismsWalker.java new file mode 100644 index 000000000..5bce26c80 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingPolymorphismsWalker.java @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.vcf.VCFUtils; + +import java.util.*; + +import static org.broadinstitute.sting.utils.vcf.VCFUtils.getVCFHeadersFromRods; + +/** + * Walks along all variant ROD loci, and merges consecutive sites if they segregate in all samples in the ROD. + */ +@Allows(value = {DataSource.REFERENCE}) +@Requires(value = {DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class)) +@By(DataSource.REFERENCE_ORDERED_DATA) + +public class MergeSegregatingPolymorphismsWalker extends RodWalker { + + @Output(doc = "File to which variants should be written", required = true) + protected VCFWriter writer = null; + private MergePhasedSegregatingPolymorphismsToMNPvcfWriter vcMergerWriter = null; + + @Argument(fullName = "maxGenomicDistanceForMNP", shortName = "maxDistMNP", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records into a MNP record; [default:1]", required = false) + protected int maxGenomicDistanceForMNP = 1; + + private LinkedList rodNames = null; + + public void initialize() { + rodNames = new LinkedList(); + rodNames.add("variant"); + + initializeVcfWriter(); + } + + private void initializeVcfWriter() { + vcMergerWriter = new MergePhasedSegregatingPolymorphismsToMNPvcfWriter(writer, getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, logger); + writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter] + + // setup the header fields: + Set hInfo = new HashSet(); + hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); + hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); + + Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), rodNames); + vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(rodNames.get(0)).getGenotypeSamples()))); + } + + public boolean generateExtendedEvents() { + return false; + } + + public Integer reduceInit() { + return 0; + } + + /** + * For each site, send it to be (possibly) merged with previously observed sites. + * + * @param tracker the meta-data tracker + * @param ref the reference base + * @param context the context for the given locus + * @return dummy Integer + */ + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (tracker == null) + return null; + + boolean requireStartHere = true; // only see each VariantContext once + boolean takeFirstOnly = false; // take as many entries as the VCF file has + for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) + writeVCF(vc); + + return 0; + } + + private void writeVCF(VariantContext vc) { + byte refBase; + if (!vc.isIndel()) { + Allele varAllele = vc.getReference(); + refBase = SNPallelePair.getSingleBase(varAllele); + } + else { + refBase = vc.getReferenceBaseForIndel(); + } + + vcMergerWriter.add(vc, refBase); + } + + public Integer reduce(Integer result, Integer total) { + if (result == null) + return total; + + return total + result; + } + + /** + * Release any VariantContexts not yet processed. + * + * @param result Empty for now... + */ + public void onTraversalDone(Integer result) { + vcMergerWriter.flush(); + System.out.println("Number of records merged: " + vcMergerWriter.getNumMergedRecords()); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 78a74b2e1..852da9322 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -39,6 +39,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broad.tribble.vcf.SortingVCFWriter; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -82,6 +83,8 @@ public class ReadBackedPhasingWalker extends RodWalker unphasedSiteQueue = null; private DoublyLinkedList partiallyPhasedSites = null; // the phased VCs to be emitted, and the alignment bases at these positions @@ -96,11 +99,14 @@ public class ReadBackedPhasingWalker extends RodWalker mostDownstreamLocusReached is <= 2 * cacheWindow + + Therefore, can write the filtered records located at mostDownstreamLocusReached (if any) to SortingVCFWriter, even though partiallyPhasedSites.peek() has not yet been written. + + But, NOTE that map() is careful to pass out a list of records to be written that FIRST includes any records discarded due to having reached mostDownstreamLocusReached, + and only THEN records located at mostDownstreamLocusReached. The opposite order in map() would violate the startDistance limits imposed when contracting SortingVCFWriter with (2 * cacheWindow). + */ + writer = new SortingVCFWriter(writer, 2 * cacheWindow); + // setup the header fields: Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); @@ -152,18 +172,23 @@ public class ReadBackedPhasingWalker extends RodWalker unprocessedList = new LinkedList(); boolean requireStartHere = true; // only see each VariantContext once boolean takeFirstOnly = false; // take as many entries as the VCF file has for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) { - boolean processVariant = true; - if (!isUnfilteredBiallelicSNP(vc)) - processVariant = false; - - VariantAndReads vr = new VariantAndReads(vc, context, processVariant); - unphasedSiteQueue.add(vr); - logger.debug("Added variant to queue = " + VariantContextUtils.getLocation(vr.variant)); + if (ReadBackedPhasingWalker.processVariantInPhasing(vc)) { + VariantAndReads vr = new VariantAndReads(vc, context); + unphasedSiteQueue.add(vr); + logger.debug("Added variant to queue = " + VariantContextUtils.getLocation(vr.variant)); + } + else { + unprocessedList.add(vc); // Finished with the unprocessed variant, and writer can enforce sorting on-the-fly + } int numReads = 0; if (context.hasBasePileup()) { @@ -175,30 +200,28 @@ public class ReadBackedPhasingWalker extends RodWalker phasedList = processQueue(phaseStats, false); - return new PhasingStatsAndOutput(phaseStats, phasedList); + List completedList = processQueue(phaseStats, false); + completedList.addAll(unprocessedList); // add unprocessedList on to the END of completedList so that the processQueue() results, which are necessarily more upstream, are first! + + return new PhasingStatsAndOutput(phaseStats, completedList); } private List processQueue(PhasingStats phaseStats, boolean processAll) { List oldPhasedList = new LinkedList(); if (!unphasedSiteQueue.isEmpty()) { - GenomeLoc lastLocus = null; - if (!processAll) - lastLocus = VariantContextUtils.getLocation(unphasedSiteQueue.peekLast().variant); - while (!unphasedSiteQueue.isEmpty()) { if (!processAll) { // otherwise, phase until the end of unphasedSiteQueue VariantContext nextToPhaseVc = unphasedSiteQueue.peek().variant; - if (isInWindowRange(lastLocus, VariantContextUtils.getLocation(nextToPhaseVc))) { - /* lastLocus is still not far enough ahead of nextToPhaseVc to have all phasing information for nextToPhaseVc + if (startDistancesAreInWindowRange(mostDownstreamLocusReached, VariantContextUtils.getLocation(nextToPhaseVc))) { + /* mostDownstreamLocusReached is still not far enough ahead of nextToPhaseVc to have all phasing information for nextToPhaseVc (note that we ASSUME that the VCF is ordered by ). - Note that this will always leave at least one entry (the last one), since lastLocus is in range of itself. + Note that this will always leave at least one entry (the last one), since mostDownstreamLocusReached is in range of itself. */ break; } - // Already saw all variant positions within cacheWindow distance ahead of vc (on its contig) + // Already saw all variant positions within cacheWindow startDistance ahead of vc (on its contig) } // Update partiallyPhasedSites before it's used in phaseSite: oldPhasedList.addAll(discardIrrelevantPhasedSites()); @@ -213,6 +236,7 @@ public class ReadBackedPhasingWalker extends RodWalker handledGtAttribs = new HashMap(handledGt.getAttributes()); - handledGtAttribs.put(PQ_KEY, pr.phaseQuality); - Genotype phasedHomGt = new Genotype(handledGt.getSampleName(), handledGt.getAlleles(), handledGt.getNegLog10PError(), handledGt.getFilters(), handledGtAttribs, genotypesArePhased); - interiorUvc.setGenotype(samp, phasedHomGt); - } + if (genotypesArePhased) { + Map handledGtAttribs = new HashMap(handledGt.getAttributes()); + handledGtAttribs.put(PQ_KEY, pr.phaseQuality); + Genotype phasedHomGt = new Genotype(handledGt.getSampleName(), handledGt.getAlleles(), handledGt.getNegLog10PError(), handledGt.getFilters(), handledGtAttribs, genotypesArePhased); + interiorUvc.setGenotype(samp, phasedHomGt); } } if (statsWriter != null) - statsWriter.addStat(samp, VariantContextUtils.getLocation(vc), distance(prevUvc, vc), pr.phaseQuality, phaseWindow.readsAtHetSites.size(), phaseWindow.hetGenotypes.length); + statsWriter.addStat(samp, VariantContextUtils.getLocation(vc), startDistance(prevUvc, vc), pr.phaseQuality, phaseWindow.readsAtHetSites.size(), phaseWindow.hetGenotypes.length); PhaseCounts sampPhaseCounts = samplePhaseStats.get(samp); if (sampPhaseCounts == null) { @@ -343,7 +362,7 @@ public class ReadBackedPhasingWalker extends RodWalker 0; } - // ASSUMES that: isCalledDiploidGenotype(gt) && gt.isHet() [gt = vr.unfinishedVariant.getGenotype(sample)] + // ASSUMES that: isUnfilteredCalledDiploidGenotype(vrGt) && vrGt.isHet() [vrGt = vr.variant.getGenotype(sample)] + public PhasingWindow(VariantAndReads vr, String sample) { List listHetGenotypes = new LinkedList(); @@ -381,17 +401,15 @@ public class ReadBackedPhasingWalker extends RodWalker phasedIt = partiallyPhasedSites.iterator(); while (phasedIt.hasNext()) { UnfinishedVariantAndReads phasedVr = phasedIt.next(); - if (phasedVr.processVariant) { - Genotype gt = phasedVr.unfinishedVariant.getGenotype(sample); - if (gt == null || !isCalledDiploidGenotype(gt)) { // constructed haplotype must start AFTER this "break" - listHetGenotypes.clear(); // clear out any history - } - else if (gt.isHet()) { - GenotypeAndReadBases grb = new GenotypeAndReadBases(gt, phasedVr.sampleReadBases.get(sample), phasedVr.unfinishedVariant.getLocation()); - listHetGenotypes.add(grb); - logger.debug("Using UPSTREAM het site = " + grb.loc); - prevHetAndInteriorIt = phasedIt.clone(); - } + Genotype gt = phasedVr.unfinishedVariant.getGenotype(sample); + if (gt == null || !isUnfilteredCalledDiploidGenotype(gt)) { // constructed haplotype must start AFTER this "break" + listHetGenotypes.clear(); // clear out any history + } + else if (gt.isHet()) { + GenotypeAndReadBases grb = new GenotypeAndReadBases(gt, phasedVr.sampleReadBases.get(sample), phasedVr.unfinishedVariant.getLocation()); + listHetGenotypes.add(grb); + logger.debug("Using UPSTREAM het site = " + grb.loc); + prevHetAndInteriorIt = phasedIt.clone(); } } phasingSiteIndex = listHetGenotypes.size(); @@ -410,18 +428,16 @@ public class ReadBackedPhasingWalker extends RodWalker 2!"); @@ -877,54 +895,21 @@ public class ReadBackedPhasingWalker extends RodWalker finalList = processQueue(result, true); - writeVarContList(finalList); + List finalList = processQueue(result, true); // process all remaining data + writeVcList(finalList); + writer.flush(); + if (statsWriter != null) statsWriter.close(); @@ -954,30 +941,45 @@ public class ReadBackedPhasingWalker extends RodWalker varContList) { + private void writeVcList(List varContList) { for (VariantContext vc : varContList) writeVCF(vc); } + private void writeVCF(VariantContext vc) { + byte refBase; + if (!vc.isIndel()) { + Allele varAllele = vc.getReference(); + refBase = SNPallelePair.getSingleBase(varAllele); + } + else { + refBase = vc.getReferenceBaseForIndel(); + } + + writer.add(vc, refBase); + } + + public static boolean processVariantInPhasing(VariantContext vc) { + return isUnfilteredBiallelicSNP(vc); + } + /* - Inner classes: - */ + Inner classes: + */ + private class VariantAndReads { public VariantContext variant; public HashMap sampleReadBases; - public boolean processVariant; - public VariantAndReads(VariantContext variant, HashMap sampleReadBases, boolean processVariant) { + public VariantAndReads(VariantContext variant, HashMap sampleReadBases) { this.variant = variant; this.sampleReadBases = sampleReadBases; - this.processVariant = processVariant; } - public VariantAndReads(VariantContext variant, AlignmentContext alignment, boolean processVariant) { + public VariantAndReads(VariantContext variant, AlignmentContext alignment) { this.variant = variant; this.sampleReadBases = new HashMap(); - this.processVariant = processVariant; if (alignment != null) { ReadBackedPileup pileup = null; @@ -1006,6 +1008,62 @@ public class ReadBackedPhasingWalker extends RodWalker sampleReadBases; + + public UnfinishedVariantAndReads(VariantAndReads vr) { + this.unfinishedVariant = new UnfinishedVariantContext(vr.variant); + this.sampleReadBases = vr.sampleReadBases; + } + } + + // COULD replace with MutableVariantContext if it worked [didn't throw exceptions when trying to call its set() methods]... + + private static class UnfinishedVariantContext { + private String name; + private String contig; + private long start; + private long stop; + private Collection alleles; + private Map genotypes; + private double negLog10PError; + private Set filters; + private Map attributes; + + public UnfinishedVariantContext(VariantContext vc) { + this.name = vc.getName(); + this.contig = vc.getChr(); + this.start = vc.getStart(); + this.stop = vc.getEnd(); + this.alleles = vc.getAlleles(); + this.genotypes = new HashMap(vc.getGenotypes()); // since vc.getGenotypes() is unmodifiable + this.negLog10PError = vc.getNegLog10PError(); + this.filters = vc.getFilters(); + this.attributes = new HashMap(vc.getAttributes()); + } + + public VariantContext toVariantContext() { + return new VariantContext(name, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes); + } + + public GenomeLoc getLocation() { + return GenomeLocParser.createGenomeLoc(contig, start, stop); + } + + public Genotype getGenotype(String sample) { + return genotypes.get(sample); + } + + public void setGenotype(String sample, Genotype newGt) { + genotypes.put(sample, newGt); + } + + public void setPhasingInconsistent() { + attributes.put(PHASING_INCONSISTENT_KEY, true); + } + } + private static String toStringGRL(List grbList) { boolean first = true; StringBuilder sb = new StringBuilder(); @@ -1034,72 +1092,6 @@ public class ReadBackedPhasingWalker extends RodWalker sampleReadBases; - public boolean processVariant; - - public UnfinishedVariantAndReads(UnfinishedVariantContext unfinishedVariant, HashMap sampleReadBases, boolean processVariant) { - this.unfinishedVariant = unfinishedVariant; - this.sampleReadBases = sampleReadBases; - this.processVariant = processVariant; - } - - public UnfinishedVariantAndReads(VariantAndReads vr) { - this.unfinishedVariant = new UnfinishedVariantContext(vr.variant); - this.sampleReadBases = vr.sampleReadBases; - this.processVariant = vr.processVariant; - } - } - - // COULD replace with MutableVariantContext if it worked [didn't throw exceptions when trying to call its set() methods]... - private static class UnfinishedVariantContext { - private String name; - private String contig; - private long start; - private long stop; - private Collection alleles; - private Map genotypes; - private double negLog10PError; - private Set filters; - private Map attributes; - - public UnfinishedVariantContext(VariantContext vc) { - this.name = vc.getName(); - this.contig = vc.getChr(); - this.start = vc.getStart(); - this.stop = vc.getEnd(); - this.alleles = vc.getAlleles(); - this.genotypes = new HashMap(vc.getGenotypes()); // since vc.getGenotypes() is unmodifiable - this.negLog10PError = vc.getNegLog10PError(); - this.filters = new HashSet(vc.getFilters()); - this.attributes = new HashMap(vc.getAttributes()); - } - - public VariantContext toVariantContext() { - return new VariantContext(name, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes); - } - - public GenomeLoc getLocation() { - return GenomeLocParser.createGenomeLoc(contig, start, stop); - } - - public Genotype getGenotype(String sample) { - return genotypes.get(sample); - } - - public void setGenotype(String sample, Genotype newGt) { - genotypes.put(sample, newGt); - } - - public void setPhasingInconsistent(boolean addAsFilter) { - attributes.put(PHASING_INCONSISTENT_KEY, true); - - if (addAsFilter) - filters.add(PHASING_INCONSISTENT_KEY); - } - } - private static class ReadBase { public String readName; public byte base; @@ -1235,6 +1227,7 @@ public class ReadBackedPhasingWalker extends RodWalker= 10", spec); } @@ -35,7 +35,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:1232503-1332503", 1, - Arrays.asList("44b711c53d435015d32039114860ff0d")); + Arrays.asList("2342c346c4a586f6d18a432c04a451a9")); executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec); } @@ -45,7 +45,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30) + " -L chr20:332341-382503", 1, - Arrays.asList("71eb6bfc7568897f023200d13a60eca0")); + Arrays.asList("57e35f44652903493f6ff3fef6b9b9ed")); executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec); } @@ -55,7 +55,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100) + " -L chr20:332341-382503", 1, - Arrays.asList("b09d2a1415045b963292f9e04675111b")); + Arrays.asList("1520496b65bc4b4b348cacef9660b2d0")); executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec); } @@ -65,7 +65,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10) + " -L chr20:332341-482503", 1, - Arrays.asList("7639dd25101081288520b343bfe1fa61")); + Arrays.asList("202760f43e3a2852195688b562c360fd")); executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec); } @@ -75,7 +75,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:652810-681757", 1, - Arrays.asList("5c60272590daa6cdbafac234137528ef")); + Arrays.asList("353c260c81f287bc6c2734f69cb63d39")); executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec); }