diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/BaseArray.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/BaseArray.java index 5a32479ab..54838b55e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/BaseArray.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/BaseArray.java @@ -29,7 +29,7 @@ import java.util.Arrays; import java.util.LinkedList; import java.util.List; -public abstract class BaseArray implements Comparable { +abstract class BaseArray implements Comparable { protected Byte[] bases; public BaseArray(byte[] bases) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/CardinalityCounter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/CardinalityCounter.java index 06f4d3ab2..45a1ab04c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/CardinalityCounter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/CardinalityCounter.java @@ -30,7 +30,7 @@ import java.util.Iterator; /* * CardinalityCounter object allows user to iterate over all assignment of arbitrary-cardinality variables. */ -public class CardinalityCounter implements Iterator, Iterable { +class CardinalityCounter implements Iterator, Iterable { private int[] cards; private int[] valList; private boolean hasNext; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/CloneableIteratorLinkedList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/CloneableIteratorLinkedList.java index 4ec940f4f..e88a7104d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/CloneableIteratorLinkedList.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/CloneableIteratorLinkedList.java @@ -30,7 +30,7 @@ import java.util.NoSuchElementException; It is UNIQUE in the fact that its iterator (BidirectionalIterator) can be cloned to save the current pointer for a later time (while the original iterator can continue to iterate). */ -public class CloneableIteratorLinkedList { +class CloneableIteratorLinkedList { private CloneableIteratorDoublyLinkedNode first; private CloneableIteratorDoublyLinkedNode last; private int size; diff --git a/public/java/src/org/broadinstitute/sting/utils/DisjointSet.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/DisjointSet.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/utils/DisjointSet.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/DisjointSet.java index 52c18e6d6..c054af5d6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/DisjointSet.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/DisjointSet.java @@ -21,13 +21,13 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils; +package org.broadinstitute.sting.gatk.walkers.phasing; import java.util.Collection; import java.util.Set; import java.util.TreeSet; -public class DisjointSet { +class DisjointSet { private ItemNode[] nodes; public DisjointSet(int numItems) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/Haplotype.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/Haplotype.java index 3c20a311e..61d5a725e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/Haplotype.java @@ -27,7 +27,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.Arrays; -public class Haplotype extends BaseArray implements Cloneable { +class Haplotype extends BaseArray implements Cloneable { public Haplotype(byte[] bases) { super(bases); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java deleted file mode 100644 index 809772c05..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.phasing; - -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.*; - -import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFromRods; - - -/** - * Walks along all variant ROD loci, and merges consecutive sites if they segregate in all samples in the ROD. - */ -@Allows(value = {DataSource.REFERENCE}) -@Requires(value = {DataSource.REFERENCE}) -@By(DataSource.REFERENCE_ORDERED_DATA) - -public class MergeMNPsWalker extends RodWalker { - - @Output(doc = "File to which variants should be written", required = true) - protected VCFWriter writer = null; - private MergeSegregatingAlternateAllelesVCFWriter vcMergerWriter = null; - - @Argument(fullName = "maxGenomicDistanceForMNP", shortName = "maxDistMNP", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records into a MNP record; [default:1]", required = false) - protected int maxGenomicDistanceForMNP = 1; - - @Input(fullName="variant", shortName = "V", doc="Select variants from this VCF file", required=true) - public RodBinding variants; - - public void initialize() { - initializeVcfWriter(); - } - - private void initializeVcfWriter() { - // false <-> don't take control of writer, since didn't create it: - vcMergerWriter = new MergeSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getGenomeLocParser(), getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, logger, false); - writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter] - - // setup the header fields: - Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); - hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); - - Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName())); - vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(variants.getName()).getGenotypeSamples()))); - } - - public boolean generateExtendedEvents() { - return false; - } - - public Integer reduceInit() { - return 0; - } - - /** - * For each site, send it to be (possibly) merged with previously observed sites. - * - * @param tracker the meta-data tracker - * @param ref the reference base - * @param context the context for the given locus - * @return dummy Integer - */ - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (tracker == null) - return null; - - for (VariantContext vc : tracker.getValues(variants, context.getLocation())) - writeVCF(vc); - - return 0; - } - - private void writeVCF(VariantContext vc) { - WriteVCF.writeVCF(vc, vcMergerWriter, logger); - } - - public Integer reduce(Integer result, Integer total) { - if (result == null) - return total; - - return total + result; - } - - /** - * Release any VariantContexts not yet processed. - * - * @param result Empty for now... - */ - public void onTraversalDone(Integer result) { - vcMergerWriter.close(); - - System.out.println("Number of successive pairs of records: " + vcMergerWriter.getNumRecordsAttemptToMerge()); - System.out.println("Number of potentially merged records (" + vcMergerWriter.getVcMergeRule() + "): " + vcMergerWriter.getNumRecordsSatisfyingMergeRule()); - System.out.println("Number of records merged ("+ vcMergerWriter.getAlleleMergeRule() + "): " + vcMergerWriter.getNumMergedRecords()); - System.out.println(vcMergerWriter.getAltAlleleStats()); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java index 5ae034b0a..b935600b2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, The Broad Institute + * Copyright (c) 2011, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -44,7 +44,7 @@ import java.util.*; // Streams in VariantContext objects and streams out VariantContexts produced by merging phased segregating polymorphisms into MNP VariantContexts -public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { +class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { private VCFWriter innerWriter; private GenomeLocParser genomeLocParser; @@ -52,7 +52,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { private ReferenceSequenceFile referenceFileForMNPmerging; private VariantContextMergeRule vcMergeRule; - private VariantContextUtils.AlleleMergeRule alleleMergeRule; + private PhasingUtils.AlleleMergeRule alleleMergeRule; private String useSingleSample = null; @@ -71,7 +71,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { // Should we call innerWriter.close() in close() private boolean takeOwnershipOfInner; - public MergeSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, VariantContextMergeRule vcMergeRule, VariantContextUtils.AlleleMergeRule alleleMergeRule, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) { + public MergeSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, VariantContextMergeRule vcMergeRule, PhasingUtils.AlleleMergeRule alleleMergeRule, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) { this.innerWriter = innerWriter; this.genomeLocParser = genomeLocParser; try { @@ -179,7 +179,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { boolean mergedRecords = false; if (shouldAttemptToMerge) { numRecordsSatisfyingMergeRule++; - VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(genomeLocParser, vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging, alleleMergeRule); + VariantContext mergedVc = PhasingUtils.mergeIntoMNP(genomeLocParser, vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging, alleleMergeRule); if (mergedVc != null) { mergedRecords = true; @@ -218,26 +218,6 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { filteredVcfrList.clear(); } - public int getNumRecordsAttemptToMerge() { - return numRecordsAttemptToMerge; - } - - public int getNumRecordsSatisfyingMergeRule() { - return numRecordsSatisfyingMergeRule; - } - - public int getNumMergedRecords() { - return numMergedRecords; - } - - public VariantContextMergeRule getVcMergeRule() { - return vcMergeRule; - } - - public VariantContextUtils.AlleleMergeRule getAlleleMergeRule() { - return alleleMergeRule; - } - /** * Gets a string representation of this object. * @@ -248,13 +228,6 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { return getClass().getName(); } - public String getAltAlleleStats() { - if (altAlleleStats == null) - return ""; - - return "\n" + altAlleleStats.toString(); - } - private static class VCFRecord { public VariantContext vc; public boolean resultedFromMerge; @@ -373,7 +346,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { if (shouldAttemptToMerge) { aas.numSuccessiveGenotypesAttemptedToBeMerged++; - if (!VariantContextUtils.alleleSegregationIsKnown(gt1, gt2)) { + if (!PhasingUtils.alleleSegregationIsKnown(gt1, gt2)) { aas.segregationUnknown++; logger.debug("Unknown segregation of alleles [not phased] for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser, vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser, vc2)); } @@ -498,9 +471,9 @@ class DistanceMergeRule extends VariantContextMergeRule { } -class ExistsDoubleAltAlleleMergeRule extends VariantContextUtils.AlleleMergeRule { +class ExistsDoubleAltAlleleMergeRule extends PhasingUtils.AlleleMergeRule { public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2) { - return VariantContextUtils.someSampleHasDoubleNonReferenceAllele(vc1, vc2); + return PhasingUtils.someSampleHasDoubleNonReferenceAllele(vc1, vc2); } public String toString() { @@ -515,7 +488,7 @@ class SegregatingMNPmergeAllelesRule extends ExistsDoubleAltAlleleMergeRule { public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2) { // Must be interesting AND consistent: - return super.allelesShouldBeMerged(vc1, vc2) && VariantContextUtils.doubleAllelesSegregatePerfectlyAmongSamples(vc1, vc2); + return super.allelesShouldBeMerged(vc1, vc2) && PhasingUtils.doubleAllelesSegregatePerfectlyAmongSamples(vc1, vc2); } public String toString() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java deleted file mode 100644 index 96d5c471f..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.phasing; - -import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; - -import java.util.*; - -import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFromRods; - -/** - * Walks along all variant ROD loci, and merges consecutive sites if some sample has segregating alt alleles in the ROD. - */ -@Allows(value = {DataSource.REFERENCE}) -@Requires(value = {DataSource.REFERENCE}) -@By(DataSource.REFERENCE_ORDERED_DATA) - -public class MergeSegregatingAlternateAllelesWalker extends RodWalker { - - @Output(doc = "File to which variants should be written", required = true) - protected VCFWriter writer = null; - private MergeSegregatingAlternateAllelesVCFWriter vcMergerWriter = null; - - @Argument(fullName = "maxGenomicDistance", shortName = "maxDist", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records; [default:1]", required = false) - protected int maxGenomicDistance = 1; - - @Argument(fullName = "useSingleSample", shortName = "useSample", doc = "Only output genotypes for the single sample given; [default:use all samples]", required = false) - protected String useSingleSample = null; - - @Hidden - @Argument(fullName = "emitOnlyMergedRecords", shortName = "emitOnlyMerged", doc = "Only output records that resulted from merging [For DEBUGGING purposes only - DO NOT USE, since it disregards the semantics of '|' as 'phased relative to previous non-filtered VC']; [default:false]", required = false) - protected boolean emitOnlyMergedRecords = false; - - @Argument(fullName = "disablePrintAltAlleleStats", shortName = "noAlleleStats", doc = "Should the print-out of alternate allele statistics be disabled?; [default:false]", required = false) - protected boolean disablePrintAlternateAlleleStatistics = false; - - public final static String IGNORE_REFSEQ = "IGNORE"; - public final static String UNION_REFSEQ = "UNION"; - public final static String INTERSECT_REFSEQ = "INTERSECT"; - - @Argument(fullName = "mergeBasedOnRefSeqAnnotation", shortName = "mergeBasedOnRefSeqAnnotation", doc = "'Should merging be performed if two sites lie on the same RefSeq sequence in the INFO field {" + IGNORE_REFSEQ + ", " + UNION_REFSEQ + ", " + INTERSECT_REFSEQ + "}; [default:" + IGNORE_REFSEQ + "]", required = false) - protected String mergeBasedOnRefSeqAnnotation = IGNORE_REFSEQ; - - @Argument(fullName = "dontRequireSomeSampleHasDoubleAltAllele", shortName = "dontRequireSomeSampleHasDoubleAltAllele", doc = "Should the requirement, that SUCCESSIVE records to be merged have at least one sample with a double alternate allele, be relaxed?; [default:false]", required = false) - protected boolean dontRequireSomeSampleHasDoubleAltAllele = false; - - @Input(fullName="variant", shortName = "V", doc="Select variants from this VCF file", required=true) - public RodBinding variants; - - public void initialize() { - initializeVcfWriter(); - } - - private void initializeVcfWriter() { - GenomeLocParser genomeLocParser = getToolkit().getGenomeLocParser(); - - VariantContextMergeRule vcMergeRule; - if (mergeBasedOnRefSeqAnnotation.equals(IGNORE_REFSEQ)) - vcMergeRule = new DistanceMergeRule(maxGenomicDistance, genomeLocParser); - else - vcMergeRule = new SameGenePlusWithinDistanceMergeRule(maxGenomicDistance, genomeLocParser, mergeBasedOnRefSeqAnnotation); - - VariantContextUtils.AlleleMergeRule alleleMergeRule; - if (dontRequireSomeSampleHasDoubleAltAllele) // if a pair of VariantContext passes the vcMergeRule, then always merge them if there is a trailing prefix of polymorphisms (i.e., upstream polymorphic site): - alleleMergeRule = new PrefixPolymorphismMergeAllelesRule(); - else - alleleMergeRule = new ExistsDoubleAltAlleleMergeRule(); - - // false <-> don't take control of writer, since didn't create it: - vcMergerWriter = new MergeSegregatingAlternateAllelesVCFWriter(writer, genomeLocParser, getToolkit().getArguments().referenceFile, vcMergeRule, alleleMergeRule, useSingleSample, emitOnlyMergedRecords, logger, false, !disablePrintAlternateAlleleStatistics); - writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter] - - // setup the header fields: - Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); - hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); - - Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName())); - vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(variants.getName()).getGenotypeSamples()))); - } - - public boolean generateExtendedEvents() { - return false; - } - - public Integer reduceInit() { - return 0; - } - - /** - * For each site, send it to be (possibly) merged with previously observed sites. - * - * @param tracker the meta-data tracker - * @param ref the reference base - * @param context the context for the given locus - * @return dummy Integer - */ - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (tracker == null) - return null; - - for (VariantContext vc : tracker.getValues(variants, context.getLocation())) - writeVCF(vc); - - return 0; - } - - private void writeVCF(VariantContext vc) { - WriteVCF.writeVCF(vc, vcMergerWriter, logger); - } - - public Integer reduce(Integer result, Integer total) { - if (result == null) - return total; - - return total + result; - } - - /** - * Release any VariantContexts not yet processed. - * - * @param result Empty for now... - */ - public void onTraversalDone(Integer result) { - vcMergerWriter.close(); - - if (useSingleSample != null) - System.out.println("Only considered single sample: " + useSingleSample); - - System.out.println("Number of successive pairs of records: " + vcMergerWriter.getNumRecordsAttemptToMerge()); - System.out.println("Number of potentially merged records (" + vcMergerWriter.getVcMergeRule() + "): " + vcMergerWriter.getNumRecordsSatisfyingMergeRule()); - System.out.println("Number of records merged ("+ vcMergerWriter.getAlleleMergeRule() + "): " + vcMergerWriter.getNumMergedRecords()); - System.out.println(vcMergerWriter.getAltAlleleStats()); - } -} - - -enum MergeBasedOnRefSeqAnnotation { - UNION_WITH_DIST, INTERSECT_WITH_DIST -} - -class SameGenePlusWithinDistanceMergeRule extends DistanceMergeRule { - private MergeBasedOnRefSeqAnnotation mergeBasedOnRefSeqAnnotation; - - public SameGenePlusWithinDistanceMergeRule(int maxGenomicDistanceForMNP, GenomeLocParser genomeLocParser, String mergeBasedOnRefSeqAnnotation) { - super(maxGenomicDistanceForMNP, genomeLocParser); - - if (mergeBasedOnRefSeqAnnotation.equals(MergeSegregatingAlternateAllelesWalker.UNION_REFSEQ)) - this.mergeBasedOnRefSeqAnnotation = MergeBasedOnRefSeqAnnotation.UNION_WITH_DIST; - else if (mergeBasedOnRefSeqAnnotation.equals(MergeSegregatingAlternateAllelesWalker.INTERSECT_REFSEQ)) - this.mergeBasedOnRefSeqAnnotation = MergeBasedOnRefSeqAnnotation.INTERSECT_WITH_DIST; - else - throw new UserException("Must provide " + MergeSegregatingAlternateAllelesWalker.IGNORE_REFSEQ + ", " + MergeSegregatingAlternateAllelesWalker.UNION_REFSEQ + ", or " + MergeSegregatingAlternateAllelesWalker.INTERSECT_REFSEQ + " as argument to mergeBasedOnRefSeqAnnotation!"); - } - - public boolean shouldAttemptToMerge(VariantContext vc1, VariantContext vc2) { - boolean withinDistance = super.shouldAttemptToMerge(vc1, vc2); - - if (mergeBasedOnRefSeqAnnotation == MergeBasedOnRefSeqAnnotation.UNION_WITH_DIST) - return withinDistance || sameGene(vc1, vc2); - else // mergeBasedOnRefSeqAnnotation == MergeBasedOnRefSeqAnnotation.INTERSECT_WITH_DIST - return withinDistance && sameGene(vc1, vc2); - } - - private boolean sameGene(VariantContext vc1, VariantContext vc2) { - Set names_vc1 = RefSeqDataParser.getRefSeqNames(vc1); - Set names_vc2 = RefSeqDataParser.getRefSeqNames(vc2); - names_vc1.retainAll(names_vc2); - - if (!names_vc1.isEmpty()) - return true; - - // Check refseq.name2: - Set names2_vc1 = RefSeqDataParser.getRefSeqNames(vc1, true); - Set names2_vc2 = RefSeqDataParser.getRefSeqNames(vc2, true); - names2_vc1.retainAll(names2_vc2); - - return !names2_vc1.isEmpty(); - } - - public String toString() { - return super.toString() + " " + (mergeBasedOnRefSeqAnnotation == MergeBasedOnRefSeqAnnotation.UNION_WITH_DIST ? "OR" : "AND") + " on the same gene"; - } - - public Map addToMergedAttributes(VariantContext vc1, VariantContext vc2) { - Map addedAttribs = super.addToMergedAttributes(vc1, vc2); - addedAttribs.putAll(RefSeqDataParser.getMergedRefSeqNameAttributes(vc1, vc2)); - return addedAttribs; - } -} - - - -class PrefixPolymorphismMergeAllelesRule extends VariantContextUtils.AlleleMergeRule { - public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2) { - return vc1.isPolymorphic(); - } - - public String toString() { - return super.toString() + ", there exists a polymorphism at the start of the merged allele"; - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingGraph.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingGraph.java index fe2792475..8f980ad72 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingGraph.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingGraph.java @@ -23,12 +23,10 @@ */ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.DisjointSet; - import java.util.*; // Represents an undirected graph with no self-edges: -public class PhasingGraph implements Iterable { +class PhasingGraph implements Iterable { private Neighbors[] adj; public PhasingGraph(int numVertices) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingGraphEdge.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingGraphEdge.java index 56197a85f..053b09439 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingGraphEdge.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingGraphEdge.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.phasing; /* Edge class for PhasingGraph */ -public class PhasingGraphEdge implements Comparable { +class PhasingGraphEdge implements Comparable { protected int v1; protected int v2; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java index 63fb33295..a95b13d68 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java @@ -29,7 +29,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.Arrays; -public class PhasingRead extends BaseArray { +class PhasingRead extends BaseArray { private PreciseNonNegativeDouble mappingProb; // the probability that this read is mapped correctly private PreciseNonNegativeDouble[] baseProbs; // the probabilities that the base identities are CORRECT private PreciseNonNegativeDouble[] baseErrorProbs; // the probabilities that the base identities are INCORRECT diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java new file mode 100644 index 000000000..8b5455e50 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java @@ -0,0 +1,387 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.phasing; + +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.samtools.util.StringUtil; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.*; + +import java.util.*; + +/** + * [Short one sentence description of this walker] + *

+ *

+ * [Functionality of this walker] + *

+ *

+ *

Input

+ *

+ * [Input description] + *

+ *

+ *

Output

+ *

+ * [Output description] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +class PhasingUtils { + static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile, AlleleMergeRule alleleMergeRule) { + if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2)) + return null; + + // Check that it's logically possible to merge the VCs: + if (!allSamplesAreMergeable(vc1, vc2)) + return null; + + // Check if there's a "point" in merging the VCs (e.g., annotations could be changed) + if (!alleleMergeRule.allelesShouldBeMerged(vc1, vc2)) + return null; + + return reallyMergeIntoMNP(vc1, vc2, referenceFile); + } + + static VariantContext reallyMergeIntoMNP(VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) { + int startInter = vc1.getEnd() + 1; + int endInter = vc2.getStart() - 1; + byte[] intermediateBases = null; + if (startInter <= endInter) { + intermediateBases = referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases(); + StringUtil.toUpperCase(intermediateBases); + } + MergedAllelesData mergeData = new MergedAllelesData(intermediateBases, vc1, vc2); // ensures that the reference allele is added + + GenotypeCollection mergedGenotypes = GenotypeCollection.create(); + for (final Genotype gt1 : vc1.getGenotypes()) { + Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); + + List site1Alleles = gt1.getAlleles(); + List site2Alleles = gt2.getAlleles(); + + List mergedAllelesForSample = new LinkedList(); + + /* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records, + we preserve phase information (if any) relative to whatever precedes vc1: + */ + Iterator all2It = site2Alleles.iterator(); + for (Allele all1 : site1Alleles) { + Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable() + + Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2); + mergedAllelesForSample.add(mergedAllele); + } + + double mergedGQ = Math.max(gt1.getNegLog10PError(), gt2.getNegLog10PError()); + Set mergedGtFilters = new HashSet(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered + + Map mergedGtAttribs = new HashMap(); + PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2); + if (phaseQual.PQ != null) + mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ); + + Genotype mergedGt = new Genotype(gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased); + mergedGenotypes.add(mergedGt); + } + + String mergedName = mergeVariantContextNames(vc1.getSource(), vc2.getSource()); + double mergedNegLog10PError = Math.max(vc1.getNegLog10PError(), vc2.getNegLog10PError()); + Set mergedFilters = new HashSet(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered + Map mergedAttribs = mergeVariantContextAttributes(vc1, vc2); + + // ids + List mergedIDs = new ArrayList(); + if ( vc1.hasID() ) mergedIDs.add(vc1.getID()); + if ( vc2.hasID() ) mergedIDs.add(vc2.getID()); + String mergedID = Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs); + + // TODO -- FIX ID + VariantContext mergedVc = new VariantContext(mergedName, mergedID, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles(), mergedGenotypes, mergedNegLog10PError, mergedFilters, mergedAttribs); + + mergedAttribs = new HashMap(mergedVc.getAttributes()); + VariantContextUtils.calculateChromosomeCounts(mergedVc, mergedAttribs, true); + mergedVc = VariantContext.modifyAttributes(mergedVc, mergedAttribs); + + return mergedVc; + } + + static String mergeVariantContextNames(String name1, String name2) { + return name1 + "_" + name2; + } + + static Map mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) { + Map mergedAttribs = new HashMap(); + + List vcList = new LinkedList(); + vcList.add(vc1); + vcList.add(vc2); + + String[] MERGE_OR_ATTRIBS = {VCFConstants.DBSNP_KEY}; + for (String orAttrib : MERGE_OR_ATTRIBS) { + boolean attribVal = false; + for (VariantContext vc : vcList) { + attribVal = vc.getAttributeAsBoolean(orAttrib, false); + if (attribVal) // already true, so no reason to continue: + break; + } + mergedAttribs.put(orAttrib, attribVal); + } + + return mergedAttribs; + } + + static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) { + GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser, vc1); + GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser, vc2); + + if (!loc1.onSameContig(loc2)) + throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome"); + + if (!loc1.isBefore(loc2)) + throw new ReviewedStingException("Can only merge if vc1 is BEFORE vc2"); + + if (vc1.isFiltered() || vc2.isFiltered()) + return false; + + if (!vc1.getSampleNames().equals(vc2.getSampleNames())) // vc1, vc2 refer to different sample sets + return false; + + if (!allGenotypesAreUnfilteredAndCalled(vc1) || !allGenotypesAreUnfilteredAndCalled(vc2)) + return false; + + return true; + } + + static boolean allGenotypesAreUnfilteredAndCalled(VariantContext vc) { + for (final Genotype gt : vc.getGenotypes()) { + if (gt.isNoCall() || gt.isFiltered()) + return false; + } + + return true; + } + + static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) { + // Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1: + for (final Genotype gt1 : vc1.getGenotypes()) { + Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); + + if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom + return false; + } + + return true; + } + + static boolean alleleSegregationIsKnown(Genotype gt1, Genotype gt2) { + if (gt1.getPloidy() != gt2.getPloidy()) + return false; + + /* If gt2 is phased or hom, then could even be MERGED with gt1 [This is standard]. + + HOWEVER, EVEN if this is not the case, but gt1.isHom(), + it is trivially known that each of gt2's alleles segregate with the single allele type present in gt1. + */ + return (gt2.isPhased() || gt2.isHom() || gt1.isHom()); + } + + static PhaseAndQuality calcPhaseForMergedGenotypes(Genotype gt1, Genotype gt2) { + if (gt2.isPhased() || gt2.isHom()) + return new PhaseAndQuality(gt1); // maintain the phase of gt1 + + if (!gt1.isHom()) + throw new ReviewedStingException("alleleSegregationIsKnown(gt1, gt2) implies: gt2.genotypesArePhased() || gt2.isHom() || gt1.isHom()"); + + /* We're dealing with: gt1.isHom(), gt2.isHet(), !gt2.genotypesArePhased(); so, the merged (het) Genotype is not phased relative to the previous Genotype + + For example, if we're merging the third Genotype with the second one: + 0/1 + 1|1 + 0/1 + + Then, we want to output: + 0/1 + 1/2 + */ + return new PhaseAndQuality(gt2); // maintain the phase of gt2 [since !gt2.genotypesArePhased()] + } + + static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) { + for (final Genotype gt1 : vc1.getGenotypes()) { + Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); + + List site1Alleles = gt1.getAlleles(); + List site2Alleles = gt2.getAlleles(); + + Iterator all2It = site2Alleles.iterator(); + for (Allele all1 : site1Alleles) { + Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable() + + if (all1.isNonReference() && all2.isNonReference()) // corresponding alleles are alternate + return true; + } + } + + return false; + } + + static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) { + // Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference): + Map allele1ToAllele2 = new HashMap(); + Map allele2ToAllele1 = new HashMap(); + + // Note the segregation of the alleles for the reference genome: + allele1ToAllele2.put(vc1.getReference(), vc2.getReference()); + allele2ToAllele1.put(vc2.getReference(), vc1.getReference()); + + // Note the segregation of the alleles for each sample (and check that it is consistent with the reference and all previous samples). + for (final Genotype gt1 : vc1.getGenotypes()) { + Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); + + List site1Alleles = gt1.getAlleles(); + List site2Alleles = gt2.getAlleles(); + + Iterator all2It = site2Alleles.iterator(); + for (Allele all1 : site1Alleles) { + Allele all2 = all2It.next(); + + Allele all1To2 = allele1ToAllele2.get(all1); + if (all1To2 == null) + allele1ToAllele2.put(all1, all2); + else if (!all1To2.equals(all2)) // all1 segregates with two different alleles at site 2 + return false; + + Allele all2To1 = allele2ToAllele1.get(all2); + if (all2To1 == null) + allele2ToAllele1.put(all2, all1); + else if (!all2To1.equals(all1)) // all2 segregates with two different alleles at site 1 + return false; + } + } + + return true; + } + + abstract static class AlleleMergeRule { + // vc1, vc2 are ONLY passed to allelesShouldBeMerged() if mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2) AND allSamplesAreMergeable(vc1, vc2): + abstract public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2); + + public String toString() { + return "all samples are mergeable"; + } + } + + static class AlleleOneAndTwo { + private Allele all1; + private Allele all2; + + public AlleleOneAndTwo(Allele all1, Allele all2) { + this.all1 = all1; + this.all2 = all2; + } + + public int hashCode() { + return all1.hashCode() + all2.hashCode(); + } + + public boolean equals(Object other) { + if (!(other instanceof AlleleOneAndTwo)) + return false; + + AlleleOneAndTwo otherAot = (AlleleOneAndTwo) other; + return (this.all1.equals(otherAot.all1) && this.all2.equals(otherAot.all2)); + } + } + + static class MergedAllelesData { + private Map mergedAlleles; + private byte[] intermediateBases; + private int intermediateLength; + + public MergedAllelesData(byte[] intermediateBases, VariantContext vc1, VariantContext vc2) { + this.mergedAlleles = new HashMap(); // implemented equals() and hashCode() for AlleleOneAndTwo + this.intermediateBases = intermediateBases; + this.intermediateLength = this.intermediateBases != null ? this.intermediateBases.length : 0; + + this.ensureMergedAllele(vc1.getReference(), vc2.getReference(), true); + } + + public Allele ensureMergedAllele(Allele all1, Allele all2) { + return ensureMergedAllele(all1, all2, false); // false <-> since even if all1+all2 = reference, it was already created in the constructor + } + + private Allele ensureMergedAllele(Allele all1, Allele all2, boolean creatingReferenceForFirstTime) { + AlleleOneAndTwo all12 = new AlleleOneAndTwo(all1, all2); + Allele mergedAllele = mergedAlleles.get(all12); + + if (mergedAllele == null) { + byte[] bases1 = all1.getBases(); + byte[] bases2 = all2.getBases(); + + byte[] mergedBases = new byte[bases1.length + intermediateLength + bases2.length]; + System.arraycopy(bases1, 0, mergedBases, 0, bases1.length); + if (intermediateBases != null) + System.arraycopy(intermediateBases, 0, mergedBases, bases1.length, intermediateLength); + System.arraycopy(bases2, 0, mergedBases, bases1.length + intermediateLength, bases2.length); + + mergedAllele = Allele.create(mergedBases, creatingReferenceForFirstTime); + mergedAlleles.put(all12, mergedAllele); + } + + return mergedAllele; + } + + public Set getAllMergedAlleles() { + return new HashSet(mergedAlleles.values()); + } + } + + static class PhaseAndQuality { + public boolean isPhased; + public Double PQ = null; + + public PhaseAndQuality(Genotype gt) { + this.isPhased = gt.isPhased(); + if (this.isPhased) { + this.PQ = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1); + if ( this.PQ == -1 ) this.PQ = null; + } + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PreciseNonNegativeDouble.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PreciseNonNegativeDouble.java index 99446705e..b68739b48 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PreciseNonNegativeDouble.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PreciseNonNegativeDouble.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.phasing; /* PreciseNonNegativeDouble permits arithmetic operations on NON-NEGATIVE double values with precision (prevents underflow by representing in log10 space). */ -public class PreciseNonNegativeDouble implements Comparable { +class PreciseNonNegativeDouble implements Comparable { private static final double EQUALS_THRESH = 1e-6; private static final double INFINITY = Double.POSITIVE_INFINITY; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 94127438d..8cc0d8856 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -34,7 +34,6 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.DisjointSet; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.codecs.vcf.*; @@ -1052,7 +1051,7 @@ public class ReadBackedPhasingWalker extends RodWalker { +class ReadBasesAtPosition implements Iterable { // list of: private LinkedList bases; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/RefSeqDataParser.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/RefSeqDataParser.java deleted file mode 100644 index f94140814..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/RefSeqDataParser.java +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.phasing; - -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.*; - -/* Some methods for extracting RefSeq-related data from annotated VCF INFO fields: - */ -public class RefSeqDataParser { - private static String REFSEQ_PREFIX = "refseq."; - - private static String NUM_RECORDS_KEY = REFSEQ_PREFIX + "numMatchingRecords"; - private static String NAME_KEY = REFSEQ_PREFIX + "name"; - private static String NAME2_KEY = REFSEQ_PREFIX + "name2"; - - private static String[] NAME_KEYS = {NAME_KEY, NAME2_KEY}; - - private static Map getRefSeqEntriesToNames(VariantContext vc, boolean getName2) { - String nameKeyToUse = getName2 ? NAME2_KEY : NAME_KEY; - String nameKeyToUseMultiplePrefix = nameKeyToUse + "_"; - - Map entriesToNames = new HashMap(); - int numRecords = vc.getAttributeAsInt(NUM_RECORDS_KEY, -1); - if (numRecords != -1) { - boolean done = false; - - if (numRecords == 1) { // Check if perhaps the single record doesn't end with "_1": - String name = vc.getAttributeAsString(nameKeyToUse, null); - if (name != null) { - entriesToNames.put(nameKeyToUse, name); - done = true; - } - } - - if (!done) { - for (int i = 1; i <= numRecords; i++) { - String key = nameKeyToUseMultiplePrefix + i; - String name = vc.getAttributeAsString(key, null); - if (name != null) - entriesToNames.put(key, name); - } - } - } - else { // no entry with the # of records: - String name = vc.getAttributeAsString(nameKeyToUse, null); - if (name != null) { - entriesToNames.put(nameKeyToUse, name); - } - else { // Check all INFO fields for a match (if there are multiple entries): - for (Map.Entry entry : vc.getAttributes().entrySet()) { - String key = entry.getKey(); - if (key.startsWith(nameKeyToUseMultiplePrefix)) - entriesToNames.put(key, entry.getValue().toString()); - } - } - } - return entriesToNames; - } - - private static Map getRefSeqEntriesToNames(VariantContext vc) { - return getRefSeqEntriesToNames(vc, false); - } - - public static Set getRefSeqNames(VariantContext vc, boolean getName2) { - return new TreeSet(getRefSeqEntriesToNames(vc, getName2).values()); - } - - public static Set getRefSeqNames(VariantContext vc) { - return getRefSeqNames(vc, false); - } - - public static Map getMergedRefSeqNameAttributes(VariantContext vc1, VariantContext vc2) { - Map refSeqNameAttribs = new HashMap(); - - Map entriesMap1 = getAllRefSeqEntriesByName(vc1); - Map entriesMap2 = getAllRefSeqEntriesByName(vc2); - - Set commonNames = entriesMap1.keySet(); - commonNames.retainAll(entriesMap2.keySet()); - boolean addSuffix = commonNames.size() > 1; - int nextCount = 1; - - for (String name : commonNames) { - RefSeqEntry refseq1 = entriesMap1.get(name); - RefSeqEntry refseq2 = entriesMap2.get(name); - - String keySuffix = ""; - if (addSuffix) - keySuffix = "_" + nextCount; - - boolean added = false; - for (String key : NAME_KEYS) { - Object obj1 = refseq1.info.get(key); - Object obj2 = refseq2.info.get(key); - if (obj1 != null && obj2 != null && obj1.equals(obj2)) { - added = true; - String useKey = key + keySuffix; - refSeqNameAttribs.put(useKey, obj1); - } - } - if (added) - nextCount++; - } - int totalCount = nextCount - 1; // since incremented count one extra time - if (totalCount > 1) - refSeqNameAttribs.put(NUM_RECORDS_KEY, totalCount); - - return refSeqNameAttribs; - } - - public static Map removeRefSeqAttributes(Map attributes) { - Map removedRefSeqAttributes = new HashMap(attributes); - - Iterator> attrIt = removedRefSeqAttributes.entrySet().iterator(); - while (attrIt.hasNext()) { - String key = attrIt.next().getKey(); - if (key.startsWith(REFSEQ_PREFIX)) - attrIt.remove(); - } - - return removedRefSeqAttributes; - } - - private static Map getAllRefSeqEntriesByName(VariantContext vc) { - Map nameToEntries = new TreeMap(); - - List allEntries = getAllRefSeqEntries(vc); - for (RefSeqEntry entry : allEntries) { - Object name = entry.info.get(NAME_KEY); - if (name != null) - nameToEntries.put(name.toString(), entry); - } - - return nameToEntries; - } - - // Returns a List of SEPARATE Map for EACH RefSeq annotation (i.e., each gene), stripping out the "_1", "_2", etc. - private static List getAllRefSeqEntries(VariantContext vc) { - List allRefSeq = new LinkedList(); - - for (Map.Entry entryToName : getRefSeqEntriesToNames(vc).entrySet()) { - String entry = entryToName.getKey(); - String entrySuffix = entry.replaceFirst(NAME_KEY, ""); - allRefSeq.add(new RefSeqEntry(vc, entrySuffix)); - } - - return allRefSeq; - } - - private static class RefSeqEntry { - public Map info; - - public RefSeqEntry(VariantContext vc, String entrySuffix) { - this.info = new HashMap(); - - for (Map.Entry attribEntry : vc.getAttributes().entrySet()) { - String key = attribEntry.getKey(); - if (key.startsWith(REFSEQ_PREFIX) && key.endsWith(entrySuffix)) { - String genericKey = key.replaceAll(entrySuffix, ""); - this.info.put(genericKey, attribEntry.getValue()); - } - } - } - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java index 153c4a23f..6a2381e29 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java @@ -28,7 +28,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; -public class SNPallelePair extends AllelePair { +class SNPallelePair extends AllelePair { public SNPallelePair(Genotype gt) { super(gt); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java deleted file mode 100644 index c10eaa2da..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -package org.broadinstitute.sting.gatk.walkers.phasing; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -public class WriteVCF { - public static void writeVCF(VariantContext vc, VCFWriter writer, Logger logger) { - writer.add(vc); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index acd3ac0d2..161800009 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -25,13 +25,10 @@ package org.broadinstitute.sting.utils.variantcontext; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import net.sf.picard.reference.ReferenceSequenceFile; -import net.sf.samtools.util.StringUtil; import org.apache.commons.jexl2.Expression; import org.apache.commons.jexl2.JexlEngine; import org.apache.log4j.Logger; import org.broad.tribble.util.popgen.HardyWeinbergCalculation; -import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -929,340 +926,4 @@ public class VariantContextUtils { public static final GenomeLoc getLocation(GenomeLocParser genomeLocParser,VariantContext vc) { return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true); } - - public abstract static class AlleleMergeRule { - // vc1, vc2 are ONLY passed to allelesShouldBeMerged() if mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2) AND allSamplesAreMergeable(vc1, vc2): - abstract public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2); - - public String toString() { - return "all samples are mergeable"; - } - } - - // NOTE: returns null if vc1 and vc2 are not merged into a single MNP record - - public static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile, AlleleMergeRule alleleMergeRule) { - if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2)) - return null; - - // Check that it's logically possible to merge the VCs: - if (!allSamplesAreMergeable(vc1, vc2)) - return null; - - // Check if there's a "point" in merging the VCs (e.g., annotations could be changed) - if (!alleleMergeRule.allelesShouldBeMerged(vc1, vc2)) - return null; - - return reallyMergeIntoMNP(vc1, vc2, referenceFile); - } - - private static VariantContext reallyMergeIntoMNP(VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) { - int startInter = vc1.getEnd() + 1; - int endInter = vc2.getStart() - 1; - byte[] intermediateBases = null; - if (startInter <= endInter) { - intermediateBases = referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases(); - StringUtil.toUpperCase(intermediateBases); - } - MergedAllelesData mergeData = new MergedAllelesData(intermediateBases, vc1, vc2); // ensures that the reference allele is added - - GenotypeCollection mergedGenotypes = GenotypeCollection.create(); - for (final Genotype gt1 : vc1.getGenotypes()) { - Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); - - List site1Alleles = gt1.getAlleles(); - List site2Alleles = gt2.getAlleles(); - - List mergedAllelesForSample = new LinkedList(); - - /* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records, - we preserve phase information (if any) relative to whatever precedes vc1: - */ - Iterator all2It = site2Alleles.iterator(); - for (Allele all1 : site1Alleles) { - Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable() - - Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2); - mergedAllelesForSample.add(mergedAllele); - } - - double mergedGQ = Math.max(gt1.getNegLog10PError(), gt2.getNegLog10PError()); - Set mergedGtFilters = new HashSet(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered - - Map mergedGtAttribs = new HashMap(); - PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2); - if (phaseQual.PQ != null) - mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ); - - Genotype mergedGt = new Genotype(gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased); - mergedGenotypes.add(mergedGt); - } - - String mergedName = VariantContextUtils.mergeVariantContextNames(vc1.getSource(), vc2.getSource()); - double mergedNegLog10PError = Math.max(vc1.getNegLog10PError(), vc2.getNegLog10PError()); - Set mergedFilters = new HashSet(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered - Map mergedAttribs = VariantContextUtils.mergeVariantContextAttributes(vc1, vc2); - - // ids - List mergedIDs = new ArrayList(); - if ( vc1.hasID() ) mergedIDs.add(vc1.getID()); - if ( vc2.hasID() ) mergedIDs.add(vc2.getID()); - String mergedID = Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs); - - // TODO -- FIX ID - VariantContext mergedVc = new VariantContext(mergedName, mergedID, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles(), mergedGenotypes, mergedNegLog10PError, mergedFilters, mergedAttribs); - - mergedAttribs = new HashMap(mergedVc.getAttributes()); - VariantContextUtils.calculateChromosomeCounts(mergedVc, mergedAttribs, true); - mergedVc = VariantContext.modifyAttributes(mergedVc, mergedAttribs); - - return mergedVc; - } - - private static class AlleleOneAndTwo { - private Allele all1; - private Allele all2; - - public AlleleOneAndTwo(Allele all1, Allele all2) { - this.all1 = all1; - this.all2 = all2; - } - - public int hashCode() { - return all1.hashCode() + all2.hashCode(); - } - - public boolean equals(Object other) { - if (!(other instanceof AlleleOneAndTwo)) - return false; - - AlleleOneAndTwo otherAot = (AlleleOneAndTwo) other; - return (this.all1.equals(otherAot.all1) && this.all2.equals(otherAot.all2)); - } - } - - private static class MergedAllelesData { - private Map mergedAlleles; - private byte[] intermediateBases; - private int intermediateLength; - - public MergedAllelesData(byte[] intermediateBases, VariantContext vc1, VariantContext vc2) { - this.mergedAlleles = new HashMap(); // implemented equals() and hashCode() for AlleleOneAndTwo - this.intermediateBases = intermediateBases; - this.intermediateLength = this.intermediateBases != null ? this.intermediateBases.length : 0; - - this.ensureMergedAllele(vc1.getReference(), vc2.getReference(), true); - } - - public Allele ensureMergedAllele(Allele all1, Allele all2) { - return ensureMergedAllele(all1, all2, false); // false <-> since even if all1+all2 = reference, it was already created in the constructor - } - - private Allele ensureMergedAllele(Allele all1, Allele all2, boolean creatingReferenceForFirstTime) { - AlleleOneAndTwo all12 = new AlleleOneAndTwo(all1, all2); - Allele mergedAllele = mergedAlleles.get(all12); - - if (mergedAllele == null) { - byte[] bases1 = all1.getBases(); - byte[] bases2 = all2.getBases(); - - byte[] mergedBases = new byte[bases1.length + intermediateLength + bases2.length]; - System.arraycopy(bases1, 0, mergedBases, 0, bases1.length); - if (intermediateBases != null) - System.arraycopy(intermediateBases, 0, mergedBases, bases1.length, intermediateLength); - System.arraycopy(bases2, 0, mergedBases, bases1.length + intermediateLength, bases2.length); - - mergedAllele = Allele.create(mergedBases, creatingReferenceForFirstTime); - mergedAlleles.put(all12, mergedAllele); - } - - return mergedAllele; - } - - public Set getAllMergedAlleles() { - return new HashSet(mergedAlleles.values()); - } - } - - private static String mergeVariantContextNames(String name1, String name2) { - return name1 + "_" + name2; - } - - private static Map mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) { - Map mergedAttribs = new HashMap(); - - List vcList = new LinkedList(); - vcList.add(vc1); - vcList.add(vc2); - - String[] MERGE_OR_ATTRIBS = {VCFConstants.DBSNP_KEY}; - for (String orAttrib : MERGE_OR_ATTRIBS) { - boolean attribVal = false; - for (VariantContext vc : vcList) { - attribVal = vc.getAttributeAsBoolean(orAttrib, false); - if (attribVal) // already true, so no reason to continue: - break; - } - mergedAttribs.put(orAttrib, attribVal); - } - - return mergedAttribs; - } - - private static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) { - GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser, vc1); - GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser, vc2); - - if (!loc1.onSameContig(loc2)) - throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome"); - - if (!loc1.isBefore(loc2)) - throw new ReviewedStingException("Can only merge if vc1 is BEFORE vc2"); - - if (vc1.isFiltered() || vc2.isFiltered()) - return false; - - if (!vc1.getSampleNames().equals(vc2.getSampleNames())) // vc1, vc2 refer to different sample sets - return false; - - if (!allGenotypesAreUnfilteredAndCalled(vc1) || !allGenotypesAreUnfilteredAndCalled(vc2)) - return false; - - return true; - } - - private static boolean allGenotypesAreUnfilteredAndCalled(VariantContext vc) { - for (final Genotype gt : vc.getGenotypes()) { - if (gt.isNoCall() || gt.isFiltered()) - return false; - } - - return true; - } - - // Assumes that vc1 and vc2 were already checked to have the same sample names: - - private static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) { - // Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1: - for (final Genotype gt1 : vc1.getGenotypes()) { - Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); - - if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom - return false; - } - - return true; - } - - public static boolean alleleSegregationIsKnown(Genotype gt1, Genotype gt2) { - if (gt1.getPloidy() != gt2.getPloidy()) - return false; - - /* If gt2 is phased or hom, then could even be MERGED with gt1 [This is standard]. - - HOWEVER, EVEN if this is not the case, but gt1.isHom(), - it is trivially known that each of gt2's alleles segregate with the single allele type present in gt1. - */ - return (gt2.isPhased() || gt2.isHom() || gt1.isHom()); - } - - private static class PhaseAndQuality { - public boolean isPhased; - public Double PQ = null; - - public PhaseAndQuality(Genotype gt) { - this.isPhased = gt.isPhased(); - if (this.isPhased) { - this.PQ = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1); - if ( this.PQ == -1 ) this.PQ = null; - } - } - } - - // Assumes that alleleSegregationIsKnown(gt1, gt2): - - private static PhaseAndQuality calcPhaseForMergedGenotypes(Genotype gt1, Genotype gt2) { - if (gt2.isPhased() || gt2.isHom()) - return new PhaseAndQuality(gt1); // maintain the phase of gt1 - - if (!gt1.isHom()) - throw new ReviewedStingException("alleleSegregationIsKnown(gt1, gt2) implies: gt2.genotypesArePhased() || gt2.isHom() || gt1.isHom()"); - - /* We're dealing with: gt1.isHom(), gt2.isHet(), !gt2.genotypesArePhased(); so, the merged (het) Genotype is not phased relative to the previous Genotype - - For example, if we're merging the third Genotype with the second one: - 0/1 - 1|1 - 0/1 - - Then, we want to output: - 0/1 - 1/2 - */ - return new PhaseAndQuality(gt2); // maintain the phase of gt2 [since !gt2.genotypesArePhased()] - } - - /* Checks if any sample has a MNP of ALT alleles (segregating together): - [Assumes that vc1 and vc2 were already checked to have the same sample names && allSamplesAreMergeable(vc1, vc2)] - */ - - public static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) { - for (final Genotype gt1 : vc1.getGenotypes()) { - Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); - - List site1Alleles = gt1.getAlleles(); - List site2Alleles = gt2.getAlleles(); - - Iterator all2It = site2Alleles.iterator(); - for (Allele all1 : site1Alleles) { - Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable() - - if (all1.isNonReference() && all2.isNonReference()) // corresponding alleles are alternate - return true; - } - } - - return false; - } - - /* Checks if all samples are consistent in their haplotypes: - [Assumes that vc1 and vc2 were already checked to have the same sample names && allSamplesAreMergeable(vc1, vc2)] - */ - - public static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) { - // Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference): - Map allele1ToAllele2 = new HashMap(); - Map allele2ToAllele1 = new HashMap(); - - // Note the segregation of the alleles for the reference genome: - allele1ToAllele2.put(vc1.getReference(), vc2.getReference()); - allele2ToAllele1.put(vc2.getReference(), vc1.getReference()); - - // Note the segregation of the alleles for each sample (and check that it is consistent with the reference and all previous samples). - for (final Genotype gt1 : vc1.getGenotypes()) { - Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); - - List site1Alleles = gt1.getAlleles(); - List site2Alleles = gt2.getAlleles(); - - Iterator all2It = site2Alleles.iterator(); - for (Allele all1 : site1Alleles) { - Allele all2 = all2It.next(); - - Allele all1To2 = allele1ToAllele2.get(all1); - if (all1To2 == null) - allele1ToAllele2.put(all1, all2); - else if (!all1To2.equals(all2)) // all1 segregates with two different alleles at site 2 - return false; - - Allele all2To1 = allele2ToAllele1.get(all2); - if (all2To1 == null) - allele2ToAllele1.put(all2, all1); - else if (!all2To1.equals(all1)) // all2 segregates with two different alleles at site 1 - return false; - } - } - - return true; - } -} \ No newline at end of file +}