Moved many unused phasing walkers and utilities to archive
This commit is contained in:
parent
460a51f473
commit
2b2514dad2
|
|
@ -29,7 +29,7 @@ import java.util.Arrays;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public abstract class BaseArray implements Comparable<BaseArray> {
|
abstract class BaseArray implements Comparable<BaseArray> {
|
||||||
protected Byte[] bases;
|
protected Byte[] bases;
|
||||||
|
|
||||||
public BaseArray(byte[] bases) {
|
public BaseArray(byte[] bases) {
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ import java.util.Iterator;
|
||||||
/*
|
/*
|
||||||
* CardinalityCounter object allows user to iterate over all assignment of arbitrary-cardinality variables.
|
* CardinalityCounter object allows user to iterate over all assignment of arbitrary-cardinality variables.
|
||||||
*/
|
*/
|
||||||
public class CardinalityCounter implements Iterator<int[]>, Iterable<int[]> {
|
class CardinalityCounter implements Iterator<int[]>, Iterable<int[]> {
|
||||||
private int[] cards;
|
private int[] cards;
|
||||||
private int[] valList;
|
private int[] valList;
|
||||||
private boolean hasNext;
|
private boolean hasNext;
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ import java.util.NoSuchElementException;
|
||||||
It is UNIQUE in the fact that its iterator (BidirectionalIterator) can be cloned
|
It is UNIQUE in the fact that its iterator (BidirectionalIterator) can be cloned
|
||||||
to save the current pointer for a later time (while the original iterator can continue to iterate).
|
to save the current pointer for a later time (while the original iterator can continue to iterate).
|
||||||
*/
|
*/
|
||||||
public class CloneableIteratorLinkedList<E> {
|
class CloneableIteratorLinkedList<E> {
|
||||||
private CloneableIteratorDoublyLinkedNode<E> first;
|
private CloneableIteratorDoublyLinkedNode<E> first;
|
||||||
private CloneableIteratorDoublyLinkedNode<E> last;
|
private CloneableIteratorDoublyLinkedNode<E> last;
|
||||||
private int size;
|
private int size;
|
||||||
|
|
|
||||||
|
|
@ -21,13 +21,13 @@
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
package org.broadinstitute.sting.utils;
|
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
public class DisjointSet {
|
class DisjointSet {
|
||||||
private ItemNode[] nodes;
|
private ItemNode[] nodes;
|
||||||
|
|
||||||
public DisjointSet(int numItems) {
|
public DisjointSet(int numItems) {
|
||||||
|
|
@ -27,7 +27,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
public class Haplotype extends BaseArray implements Cloneable {
|
class Haplotype extends BaseArray implements Cloneable {
|
||||||
public Haplotype(byte[] bases) {
|
public Haplotype(byte[] bases) {
|
||||||
super(bases);
|
super(bases);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,133 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010, The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFromRods;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Walks along all variant ROD loci, and merges consecutive sites if they segregate in all samples in the ROD.
|
|
||||||
*/
|
|
||||||
@Allows(value = {DataSource.REFERENCE})
|
|
||||||
@Requires(value = {DataSource.REFERENCE})
|
|
||||||
@By(DataSource.REFERENCE_ORDERED_DATA)
|
|
||||||
|
|
||||||
public class MergeMNPsWalker extends RodWalker<Integer, Integer> {
|
|
||||||
|
|
||||||
@Output(doc = "File to which variants should be written", required = true)
|
|
||||||
protected VCFWriter writer = null;
|
|
||||||
private MergeSegregatingAlternateAllelesVCFWriter vcMergerWriter = null;
|
|
||||||
|
|
||||||
@Argument(fullName = "maxGenomicDistanceForMNP", shortName = "maxDistMNP", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records into a MNP record; [default:1]", required = false)
|
|
||||||
protected int maxGenomicDistanceForMNP = 1;
|
|
||||||
|
|
||||||
@Input(fullName="variant", shortName = "V", doc="Select variants from this VCF file", required=true)
|
|
||||||
public RodBinding<VariantContext> variants;
|
|
||||||
|
|
||||||
public void initialize() {
|
|
||||||
initializeVcfWriter();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void initializeVcfWriter() {
|
|
||||||
// false <-> don't take control of writer, since didn't create it:
|
|
||||||
vcMergerWriter = new MergeSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getGenomeLocParser(), getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, logger, false);
|
|
||||||
writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter]
|
|
||||||
|
|
||||||
// setup the header fields:
|
|
||||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
|
||||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
|
||||||
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
|
||||||
|
|
||||||
Map<String, VCFHeader> rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName()));
|
|
||||||
vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(rodNameToHeader.get(variants.getName()).getGenotypeSamples())));
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean generateExtendedEvents() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer reduceInit() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* For each site, send it to be (possibly) merged with previously observed sites.
|
|
||||||
*
|
|
||||||
* @param tracker the meta-data tracker
|
|
||||||
* @param ref the reference base
|
|
||||||
* @param context the context for the given locus
|
|
||||||
* @return dummy Integer
|
|
||||||
*/
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
if (tracker == null)
|
|
||||||
return null;
|
|
||||||
|
|
||||||
for (VariantContext vc : tracker.getValues(variants, context.getLocation()))
|
|
||||||
writeVCF(vc);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void writeVCF(VariantContext vc) {
|
|
||||||
WriteVCF.writeVCF(vc, vcMergerWriter, logger);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer reduce(Integer result, Integer total) {
|
|
||||||
if (result == null)
|
|
||||||
return total;
|
|
||||||
|
|
||||||
return total + result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Release any VariantContexts not yet processed.
|
|
||||||
*
|
|
||||||
* @param result Empty for now...
|
|
||||||
*/
|
|
||||||
public void onTraversalDone(Integer result) {
|
|
||||||
vcMergerWriter.close();
|
|
||||||
|
|
||||||
System.out.println("Number of successive pairs of records: " + vcMergerWriter.getNumRecordsAttemptToMerge());
|
|
||||||
System.out.println("Number of potentially merged records (" + vcMergerWriter.getVcMergeRule() + "): " + vcMergerWriter.getNumRecordsSatisfyingMergeRule());
|
|
||||||
System.out.println("Number of records merged ("+ vcMergerWriter.getAlleleMergeRule() + "): " + vcMergerWriter.getNumMergedRecords());
|
|
||||||
System.out.println(vcMergerWriter.getAltAlleleStats());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2010, The Broad Institute
|
* Copyright (c) 2011, The Broad Institute
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person
|
* Permission is hereby granted, free of charge, to any person
|
||||||
* obtaining a copy of this software and associated documentation
|
* obtaining a copy of this software and associated documentation
|
||||||
|
|
@ -44,7 +44,7 @@ import java.util.*;
|
||||||
|
|
||||||
// Streams in VariantContext objects and streams out VariantContexts produced by merging phased segregating polymorphisms into MNP VariantContexts
|
// Streams in VariantContext objects and streams out VariantContexts produced by merging phased segregating polymorphisms into MNP VariantContexts
|
||||||
|
|
||||||
public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
||||||
private VCFWriter innerWriter;
|
private VCFWriter innerWriter;
|
||||||
|
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
@ -52,7 +52,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
||||||
private ReferenceSequenceFile referenceFileForMNPmerging;
|
private ReferenceSequenceFile referenceFileForMNPmerging;
|
||||||
|
|
||||||
private VariantContextMergeRule vcMergeRule;
|
private VariantContextMergeRule vcMergeRule;
|
||||||
private VariantContextUtils.AlleleMergeRule alleleMergeRule;
|
private PhasingUtils.AlleleMergeRule alleleMergeRule;
|
||||||
|
|
||||||
private String useSingleSample = null;
|
private String useSingleSample = null;
|
||||||
|
|
||||||
|
|
@ -71,7 +71,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
||||||
// Should we call innerWriter.close() in close()
|
// Should we call innerWriter.close() in close()
|
||||||
private boolean takeOwnershipOfInner;
|
private boolean takeOwnershipOfInner;
|
||||||
|
|
||||||
public MergeSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, VariantContextMergeRule vcMergeRule, VariantContextUtils.AlleleMergeRule alleleMergeRule, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) {
|
public MergeSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, VariantContextMergeRule vcMergeRule, PhasingUtils.AlleleMergeRule alleleMergeRule, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) {
|
||||||
this.innerWriter = innerWriter;
|
this.innerWriter = innerWriter;
|
||||||
this.genomeLocParser = genomeLocParser;
|
this.genomeLocParser = genomeLocParser;
|
||||||
try {
|
try {
|
||||||
|
|
@ -179,7 +179,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
||||||
boolean mergedRecords = false;
|
boolean mergedRecords = false;
|
||||||
if (shouldAttemptToMerge) {
|
if (shouldAttemptToMerge) {
|
||||||
numRecordsSatisfyingMergeRule++;
|
numRecordsSatisfyingMergeRule++;
|
||||||
VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(genomeLocParser, vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging, alleleMergeRule);
|
VariantContext mergedVc = PhasingUtils.mergeIntoMNP(genomeLocParser, vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging, alleleMergeRule);
|
||||||
|
|
||||||
if (mergedVc != null) {
|
if (mergedVc != null) {
|
||||||
mergedRecords = true;
|
mergedRecords = true;
|
||||||
|
|
@ -218,26 +218,6 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
||||||
filteredVcfrList.clear();
|
filteredVcfrList.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getNumRecordsAttemptToMerge() {
|
|
||||||
return numRecordsAttemptToMerge;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getNumRecordsSatisfyingMergeRule() {
|
|
||||||
return numRecordsSatisfyingMergeRule;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getNumMergedRecords() {
|
|
||||||
return numMergedRecords;
|
|
||||||
}
|
|
||||||
|
|
||||||
public VariantContextMergeRule getVcMergeRule() {
|
|
||||||
return vcMergeRule;
|
|
||||||
}
|
|
||||||
|
|
||||||
public VariantContextUtils.AlleleMergeRule getAlleleMergeRule() {
|
|
||||||
return alleleMergeRule;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a string representation of this object.
|
* Gets a string representation of this object.
|
||||||
*
|
*
|
||||||
|
|
@ -248,13 +228,6 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
||||||
return getClass().getName();
|
return getClass().getName();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getAltAlleleStats() {
|
|
||||||
if (altAlleleStats == null)
|
|
||||||
return "";
|
|
||||||
|
|
||||||
return "\n" + altAlleleStats.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class VCFRecord {
|
private static class VCFRecord {
|
||||||
public VariantContext vc;
|
public VariantContext vc;
|
||||||
public boolean resultedFromMerge;
|
public boolean resultedFromMerge;
|
||||||
|
|
@ -373,7 +346,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
||||||
if (shouldAttemptToMerge) {
|
if (shouldAttemptToMerge) {
|
||||||
aas.numSuccessiveGenotypesAttemptedToBeMerged++;
|
aas.numSuccessiveGenotypesAttemptedToBeMerged++;
|
||||||
|
|
||||||
if (!VariantContextUtils.alleleSegregationIsKnown(gt1, gt2)) {
|
if (!PhasingUtils.alleleSegregationIsKnown(gt1, gt2)) {
|
||||||
aas.segregationUnknown++;
|
aas.segregationUnknown++;
|
||||||
logger.debug("Unknown segregation of alleles [not phased] for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser, vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser, vc2));
|
logger.debug("Unknown segregation of alleles [not phased] for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser, vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser, vc2));
|
||||||
}
|
}
|
||||||
|
|
@ -498,9 +471,9 @@ class DistanceMergeRule extends VariantContextMergeRule {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ExistsDoubleAltAlleleMergeRule extends VariantContextUtils.AlleleMergeRule {
|
class ExistsDoubleAltAlleleMergeRule extends PhasingUtils.AlleleMergeRule {
|
||||||
public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2) {
|
public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2) {
|
||||||
return VariantContextUtils.someSampleHasDoubleNonReferenceAllele(vc1, vc2);
|
return PhasingUtils.someSampleHasDoubleNonReferenceAllele(vc1, vc2);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
@ -515,7 +488,7 @@ class SegregatingMNPmergeAllelesRule extends ExistsDoubleAltAlleleMergeRule {
|
||||||
|
|
||||||
public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2) {
|
public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2) {
|
||||||
// Must be interesting AND consistent:
|
// Must be interesting AND consistent:
|
||||||
return super.allelesShouldBeMerged(vc1, vc2) && VariantContextUtils.doubleAllelesSegregatePerfectlyAmongSamples(vc1, vc2);
|
return super.allelesShouldBeMerged(vc1, vc2) && PhasingUtils.doubleAllelesSegregatePerfectlyAmongSamples(vc1, vc2);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
|
||||||
|
|
@ -1,236 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010, The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.*;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFromRods;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Walks along all variant ROD loci, and merges consecutive sites if some sample has segregating alt alleles in the ROD.
|
|
||||||
*/
|
|
||||||
@Allows(value = {DataSource.REFERENCE})
|
|
||||||
@Requires(value = {DataSource.REFERENCE})
|
|
||||||
@By(DataSource.REFERENCE_ORDERED_DATA)
|
|
||||||
|
|
||||||
public class MergeSegregatingAlternateAllelesWalker extends RodWalker<Integer, Integer> {
|
|
||||||
|
|
||||||
@Output(doc = "File to which variants should be written", required = true)
|
|
||||||
protected VCFWriter writer = null;
|
|
||||||
private MergeSegregatingAlternateAllelesVCFWriter vcMergerWriter = null;
|
|
||||||
|
|
||||||
@Argument(fullName = "maxGenomicDistance", shortName = "maxDist", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records; [default:1]", required = false)
|
|
||||||
protected int maxGenomicDistance = 1;
|
|
||||||
|
|
||||||
@Argument(fullName = "useSingleSample", shortName = "useSample", doc = "Only output genotypes for the single sample given; [default:use all samples]", required = false)
|
|
||||||
protected String useSingleSample = null;
|
|
||||||
|
|
||||||
@Hidden
|
|
||||||
@Argument(fullName = "emitOnlyMergedRecords", shortName = "emitOnlyMerged", doc = "Only output records that resulted from merging [For DEBUGGING purposes only - DO NOT USE, since it disregards the semantics of '|' as 'phased relative to previous non-filtered VC']; [default:false]", required = false)
|
|
||||||
protected boolean emitOnlyMergedRecords = false;
|
|
||||||
|
|
||||||
@Argument(fullName = "disablePrintAltAlleleStats", shortName = "noAlleleStats", doc = "Should the print-out of alternate allele statistics be disabled?; [default:false]", required = false)
|
|
||||||
protected boolean disablePrintAlternateAlleleStatistics = false;
|
|
||||||
|
|
||||||
public final static String IGNORE_REFSEQ = "IGNORE";
|
|
||||||
public final static String UNION_REFSEQ = "UNION";
|
|
||||||
public final static String INTERSECT_REFSEQ = "INTERSECT";
|
|
||||||
|
|
||||||
@Argument(fullName = "mergeBasedOnRefSeqAnnotation", shortName = "mergeBasedOnRefSeqAnnotation", doc = "'Should merging be performed if two sites lie on the same RefSeq sequence in the INFO field {" + IGNORE_REFSEQ + ", " + UNION_REFSEQ + ", " + INTERSECT_REFSEQ + "}; [default:" + IGNORE_REFSEQ + "]", required = false)
|
|
||||||
protected String mergeBasedOnRefSeqAnnotation = IGNORE_REFSEQ;
|
|
||||||
|
|
||||||
@Argument(fullName = "dontRequireSomeSampleHasDoubleAltAllele", shortName = "dontRequireSomeSampleHasDoubleAltAllele", doc = "Should the requirement, that SUCCESSIVE records to be merged have at least one sample with a double alternate allele, be relaxed?; [default:false]", required = false)
|
|
||||||
protected boolean dontRequireSomeSampleHasDoubleAltAllele = false;
|
|
||||||
|
|
||||||
@Input(fullName="variant", shortName = "V", doc="Select variants from this VCF file", required=true)
|
|
||||||
public RodBinding<VariantContext> variants;
|
|
||||||
|
|
||||||
public void initialize() {
|
|
||||||
initializeVcfWriter();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void initializeVcfWriter() {
|
|
||||||
GenomeLocParser genomeLocParser = getToolkit().getGenomeLocParser();
|
|
||||||
|
|
||||||
VariantContextMergeRule vcMergeRule;
|
|
||||||
if (mergeBasedOnRefSeqAnnotation.equals(IGNORE_REFSEQ))
|
|
||||||
vcMergeRule = new DistanceMergeRule(maxGenomicDistance, genomeLocParser);
|
|
||||||
else
|
|
||||||
vcMergeRule = new SameGenePlusWithinDistanceMergeRule(maxGenomicDistance, genomeLocParser, mergeBasedOnRefSeqAnnotation);
|
|
||||||
|
|
||||||
VariantContextUtils.AlleleMergeRule alleleMergeRule;
|
|
||||||
if (dontRequireSomeSampleHasDoubleAltAllele) // if a pair of VariantContext passes the vcMergeRule, then always merge them if there is a trailing prefix of polymorphisms (i.e., upstream polymorphic site):
|
|
||||||
alleleMergeRule = new PrefixPolymorphismMergeAllelesRule();
|
|
||||||
else
|
|
||||||
alleleMergeRule = new ExistsDoubleAltAlleleMergeRule();
|
|
||||||
|
|
||||||
// false <-> don't take control of writer, since didn't create it:
|
|
||||||
vcMergerWriter = new MergeSegregatingAlternateAllelesVCFWriter(writer, genomeLocParser, getToolkit().getArguments().referenceFile, vcMergeRule, alleleMergeRule, useSingleSample, emitOnlyMergedRecords, logger, false, !disablePrintAlternateAlleleStatistics);
|
|
||||||
writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter]
|
|
||||||
|
|
||||||
// setup the header fields:
|
|
||||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
|
||||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
|
||||||
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
|
||||||
|
|
||||||
Map<String, VCFHeader> rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName()));
|
|
||||||
vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(rodNameToHeader.get(variants.getName()).getGenotypeSamples())));
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean generateExtendedEvents() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer reduceInit() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* For each site, send it to be (possibly) merged with previously observed sites.
|
|
||||||
*
|
|
||||||
* @param tracker the meta-data tracker
|
|
||||||
* @param ref the reference base
|
|
||||||
* @param context the context for the given locus
|
|
||||||
* @return dummy Integer
|
|
||||||
*/
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
if (tracker == null)
|
|
||||||
return null;
|
|
||||||
|
|
||||||
for (VariantContext vc : tracker.getValues(variants, context.getLocation()))
|
|
||||||
writeVCF(vc);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void writeVCF(VariantContext vc) {
|
|
||||||
WriteVCF.writeVCF(vc, vcMergerWriter, logger);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer reduce(Integer result, Integer total) {
|
|
||||||
if (result == null)
|
|
||||||
return total;
|
|
||||||
|
|
||||||
return total + result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Release any VariantContexts not yet processed.
|
|
||||||
*
|
|
||||||
* @param result Empty for now...
|
|
||||||
*/
|
|
||||||
public void onTraversalDone(Integer result) {
|
|
||||||
vcMergerWriter.close();
|
|
||||||
|
|
||||||
if (useSingleSample != null)
|
|
||||||
System.out.println("Only considered single sample: " + useSingleSample);
|
|
||||||
|
|
||||||
System.out.println("Number of successive pairs of records: " + vcMergerWriter.getNumRecordsAttemptToMerge());
|
|
||||||
System.out.println("Number of potentially merged records (" + vcMergerWriter.getVcMergeRule() + "): " + vcMergerWriter.getNumRecordsSatisfyingMergeRule());
|
|
||||||
System.out.println("Number of records merged ("+ vcMergerWriter.getAlleleMergeRule() + "): " + vcMergerWriter.getNumMergedRecords());
|
|
||||||
System.out.println(vcMergerWriter.getAltAlleleStats());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
enum MergeBasedOnRefSeqAnnotation {
|
|
||||||
UNION_WITH_DIST, INTERSECT_WITH_DIST
|
|
||||||
}
|
|
||||||
|
|
||||||
class SameGenePlusWithinDistanceMergeRule extends DistanceMergeRule {
|
|
||||||
private MergeBasedOnRefSeqAnnotation mergeBasedOnRefSeqAnnotation;
|
|
||||||
|
|
||||||
public SameGenePlusWithinDistanceMergeRule(int maxGenomicDistanceForMNP, GenomeLocParser genomeLocParser, String mergeBasedOnRefSeqAnnotation) {
|
|
||||||
super(maxGenomicDistanceForMNP, genomeLocParser);
|
|
||||||
|
|
||||||
if (mergeBasedOnRefSeqAnnotation.equals(MergeSegregatingAlternateAllelesWalker.UNION_REFSEQ))
|
|
||||||
this.mergeBasedOnRefSeqAnnotation = MergeBasedOnRefSeqAnnotation.UNION_WITH_DIST;
|
|
||||||
else if (mergeBasedOnRefSeqAnnotation.equals(MergeSegregatingAlternateAllelesWalker.INTERSECT_REFSEQ))
|
|
||||||
this.mergeBasedOnRefSeqAnnotation = MergeBasedOnRefSeqAnnotation.INTERSECT_WITH_DIST;
|
|
||||||
else
|
|
||||||
throw new UserException("Must provide " + MergeSegregatingAlternateAllelesWalker.IGNORE_REFSEQ + ", " + MergeSegregatingAlternateAllelesWalker.UNION_REFSEQ + ", or " + MergeSegregatingAlternateAllelesWalker.INTERSECT_REFSEQ + " as argument to mergeBasedOnRefSeqAnnotation!");
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean shouldAttemptToMerge(VariantContext vc1, VariantContext vc2) {
|
|
||||||
boolean withinDistance = super.shouldAttemptToMerge(vc1, vc2);
|
|
||||||
|
|
||||||
if (mergeBasedOnRefSeqAnnotation == MergeBasedOnRefSeqAnnotation.UNION_WITH_DIST)
|
|
||||||
return withinDistance || sameGene(vc1, vc2);
|
|
||||||
else // mergeBasedOnRefSeqAnnotation == MergeBasedOnRefSeqAnnotation.INTERSECT_WITH_DIST
|
|
||||||
return withinDistance && sameGene(vc1, vc2);
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean sameGene(VariantContext vc1, VariantContext vc2) {
|
|
||||||
Set<String> names_vc1 = RefSeqDataParser.getRefSeqNames(vc1);
|
|
||||||
Set<String> names_vc2 = RefSeqDataParser.getRefSeqNames(vc2);
|
|
||||||
names_vc1.retainAll(names_vc2);
|
|
||||||
|
|
||||||
if (!names_vc1.isEmpty())
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// Check refseq.name2:
|
|
||||||
Set<String> names2_vc1 = RefSeqDataParser.getRefSeqNames(vc1, true);
|
|
||||||
Set<String> names2_vc2 = RefSeqDataParser.getRefSeqNames(vc2, true);
|
|
||||||
names2_vc1.retainAll(names2_vc2);
|
|
||||||
|
|
||||||
return !names2_vc1.isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return super.toString() + " " + (mergeBasedOnRefSeqAnnotation == MergeBasedOnRefSeqAnnotation.UNION_WITH_DIST ? "OR" : "AND") + " on the same gene";
|
|
||||||
}
|
|
||||||
|
|
||||||
public Map<String, Object> addToMergedAttributes(VariantContext vc1, VariantContext vc2) {
|
|
||||||
Map<String, Object> addedAttribs = super.addToMergedAttributes(vc1, vc2);
|
|
||||||
addedAttribs.putAll(RefSeqDataParser.getMergedRefSeqNameAttributes(vc1, vc2));
|
|
||||||
return addedAttribs;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class PrefixPolymorphismMergeAllelesRule extends VariantContextUtils.AlleleMergeRule {
|
|
||||||
public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2) {
|
|
||||||
return vc1.isPolymorphic();
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return super.toString() + ", there exists a polymorphism at the start of the merged allele";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -23,12 +23,10 @@
|
||||||
*/
|
*/
|
||||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.DisjointSet;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
// Represents an undirected graph with no self-edges:
|
// Represents an undirected graph with no self-edges:
|
||||||
public class PhasingGraph implements Iterable<PhasingGraphEdge> {
|
class PhasingGraph implements Iterable<PhasingGraphEdge> {
|
||||||
private Neighbors[] adj;
|
private Neighbors[] adj;
|
||||||
|
|
||||||
public PhasingGraph(int numVertices) {
|
public PhasingGraph(int numVertices) {
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||||
/*
|
/*
|
||||||
Edge class for PhasingGraph
|
Edge class for PhasingGraph
|
||||||
*/
|
*/
|
||||||
public class PhasingGraphEdge implements Comparable<PhasingGraphEdge> {
|
class PhasingGraphEdge implements Comparable<PhasingGraphEdge> {
|
||||||
protected int v1;
|
protected int v1;
|
||||||
protected int v2;
|
protected int v2;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
public class PhasingRead extends BaseArray {
|
class PhasingRead extends BaseArray {
|
||||||
private PreciseNonNegativeDouble mappingProb; // the probability that this read is mapped correctly
|
private PreciseNonNegativeDouble mappingProb; // the probability that this read is mapped correctly
|
||||||
private PreciseNonNegativeDouble[] baseProbs; // the probabilities that the base identities are CORRECT
|
private PreciseNonNegativeDouble[] baseProbs; // the probabilities that the base identities are CORRECT
|
||||||
private PreciseNonNegativeDouble[] baseErrorProbs; // the probabilities that the base identities are INCORRECT
|
private PreciseNonNegativeDouble[] baseErrorProbs; // the probabilities that the base identities are INCORRECT
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,387 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* [Short one sentence description of this walker]
|
||||||
|
* <p/>
|
||||||
|
* <p>
|
||||||
|
* [Functionality of this walker]
|
||||||
|
* </p>
|
||||||
|
* <p/>
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* [Input description]
|
||||||
|
* </p>
|
||||||
|
* <p/>
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* [Output description]
|
||||||
|
* </p>
|
||||||
|
* <p/>
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java
|
||||||
|
* -jar GenomeAnalysisTK.jar
|
||||||
|
* -T $WalkerName
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @author Your Name
|
||||||
|
* @since Date created
|
||||||
|
*/
|
||||||
|
class PhasingUtils {
|
||||||
|
static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile, AlleleMergeRule alleleMergeRule) {
|
||||||
|
if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
// Check that it's logically possible to merge the VCs:
|
||||||
|
if (!allSamplesAreMergeable(vc1, vc2))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
// Check if there's a "point" in merging the VCs (e.g., annotations could be changed)
|
||||||
|
if (!alleleMergeRule.allelesShouldBeMerged(vc1, vc2))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
return reallyMergeIntoMNP(vc1, vc2, referenceFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
static VariantContext reallyMergeIntoMNP(VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
|
||||||
|
int startInter = vc1.getEnd() + 1;
|
||||||
|
int endInter = vc2.getStart() - 1;
|
||||||
|
byte[] intermediateBases = null;
|
||||||
|
if (startInter <= endInter) {
|
||||||
|
intermediateBases = referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases();
|
||||||
|
StringUtil.toUpperCase(intermediateBases);
|
||||||
|
}
|
||||||
|
MergedAllelesData mergeData = new MergedAllelesData(intermediateBases, vc1, vc2); // ensures that the reference allele is added
|
||||||
|
|
||||||
|
GenotypeCollection mergedGenotypes = GenotypeCollection.create();
|
||||||
|
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||||
|
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||||
|
|
||||||
|
List<Allele> site1Alleles = gt1.getAlleles();
|
||||||
|
List<Allele> site2Alleles = gt2.getAlleles();
|
||||||
|
|
||||||
|
List<Allele> mergedAllelesForSample = new LinkedList<Allele>();
|
||||||
|
|
||||||
|
/* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records,
|
||||||
|
we preserve phase information (if any) relative to whatever precedes vc1:
|
||||||
|
*/
|
||||||
|
Iterator<Allele> all2It = site2Alleles.iterator();
|
||||||
|
for (Allele all1 : site1Alleles) {
|
||||||
|
Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()
|
||||||
|
|
||||||
|
Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2);
|
||||||
|
mergedAllelesForSample.add(mergedAllele);
|
||||||
|
}
|
||||||
|
|
||||||
|
double mergedGQ = Math.max(gt1.getNegLog10PError(), gt2.getNegLog10PError());
|
||||||
|
Set<String> mergedGtFilters = new HashSet<String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered
|
||||||
|
|
||||||
|
Map<String, Object> mergedGtAttribs = new HashMap<String, Object>();
|
||||||
|
PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2);
|
||||||
|
if (phaseQual.PQ != null)
|
||||||
|
mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);
|
||||||
|
|
||||||
|
Genotype mergedGt = new Genotype(gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased);
|
||||||
|
mergedGenotypes.add(mergedGt);
|
||||||
|
}
|
||||||
|
|
||||||
|
String mergedName = mergeVariantContextNames(vc1.getSource(), vc2.getSource());
|
||||||
|
double mergedNegLog10PError = Math.max(vc1.getNegLog10PError(), vc2.getNegLog10PError());
|
||||||
|
Set<String> mergedFilters = new HashSet<String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered
|
||||||
|
Map<String, Object> mergedAttribs = mergeVariantContextAttributes(vc1, vc2);
|
||||||
|
|
||||||
|
// ids
|
||||||
|
List<String> mergedIDs = new ArrayList<String>();
|
||||||
|
if ( vc1.hasID() ) mergedIDs.add(vc1.getID());
|
||||||
|
if ( vc2.hasID() ) mergedIDs.add(vc2.getID());
|
||||||
|
String mergedID = Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs);
|
||||||
|
|
||||||
|
// TODO -- FIX ID
|
||||||
|
VariantContext mergedVc = new VariantContext(mergedName, mergedID, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles(), mergedGenotypes, mergedNegLog10PError, mergedFilters, mergedAttribs);
|
||||||
|
|
||||||
|
mergedAttribs = new HashMap<String, Object>(mergedVc.getAttributes());
|
||||||
|
VariantContextUtils.calculateChromosomeCounts(mergedVc, mergedAttribs, true);
|
||||||
|
mergedVc = VariantContext.modifyAttributes(mergedVc, mergedAttribs);
|
||||||
|
|
||||||
|
return mergedVc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String mergeVariantContextNames(String name1, String name2) {
|
||||||
|
return name1 + "_" + name2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Map<String, Object> mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) {
|
||||||
|
Map<String, Object> mergedAttribs = new HashMap<String, Object>();
|
||||||
|
|
||||||
|
List<VariantContext> vcList = new LinkedList<VariantContext>();
|
||||||
|
vcList.add(vc1);
|
||||||
|
vcList.add(vc2);
|
||||||
|
|
||||||
|
String[] MERGE_OR_ATTRIBS = {VCFConstants.DBSNP_KEY};
|
||||||
|
for (String orAttrib : MERGE_OR_ATTRIBS) {
|
||||||
|
boolean attribVal = false;
|
||||||
|
for (VariantContext vc : vcList) {
|
||||||
|
attribVal = vc.getAttributeAsBoolean(orAttrib, false);
|
||||||
|
if (attribVal) // already true, so no reason to continue:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
mergedAttribs.put(orAttrib, attribVal);
|
||||||
|
}
|
||||||
|
|
||||||
|
return mergedAttribs;
|
||||||
|
}
|
||||||
|
|
||||||
|
static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) {
|
||||||
|
GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser, vc1);
|
||||||
|
GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser, vc2);
|
||||||
|
|
||||||
|
if (!loc1.onSameContig(loc2))
|
||||||
|
throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome");
|
||||||
|
|
||||||
|
if (!loc1.isBefore(loc2))
|
||||||
|
throw new ReviewedStingException("Can only merge if vc1 is BEFORE vc2");
|
||||||
|
|
||||||
|
if (vc1.isFiltered() || vc2.isFiltered())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!vc1.getSampleNames().equals(vc2.getSampleNames())) // vc1, vc2 refer to different sample sets
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!allGenotypesAreUnfilteredAndCalled(vc1) || !allGenotypesAreUnfilteredAndCalled(vc2))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static boolean allGenotypesAreUnfilteredAndCalled(VariantContext vc) {
|
||||||
|
for (final Genotype gt : vc.getGenotypes()) {
|
||||||
|
if (gt.isNoCall() || gt.isFiltered())
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) {
|
||||||
|
// Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1:
|
||||||
|
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||||
|
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||||
|
|
||||||
|
if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static boolean alleleSegregationIsKnown(Genotype gt1, Genotype gt2) {
|
||||||
|
if (gt1.getPloidy() != gt2.getPloidy())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* If gt2 is phased or hom, then could even be MERGED with gt1 [This is standard].
|
||||||
|
|
||||||
|
HOWEVER, EVEN if this is not the case, but gt1.isHom(),
|
||||||
|
it is trivially known that each of gt2's alleles segregate with the single allele type present in gt1.
|
||||||
|
*/
|
||||||
|
return (gt2.isPhased() || gt2.isHom() || gt1.isHom());
|
||||||
|
}
|
||||||
|
|
||||||
|
static PhaseAndQuality calcPhaseForMergedGenotypes(Genotype gt1, Genotype gt2) {
|
||||||
|
if (gt2.isPhased() || gt2.isHom())
|
||||||
|
return new PhaseAndQuality(gt1); // maintain the phase of gt1
|
||||||
|
|
||||||
|
if (!gt1.isHom())
|
||||||
|
throw new ReviewedStingException("alleleSegregationIsKnown(gt1, gt2) implies: gt2.genotypesArePhased() || gt2.isHom() || gt1.isHom()");
|
||||||
|
|
||||||
|
/* We're dealing with: gt1.isHom(), gt2.isHet(), !gt2.genotypesArePhased(); so, the merged (het) Genotype is not phased relative to the previous Genotype
|
||||||
|
|
||||||
|
For example, if we're merging the third Genotype with the second one:
|
||||||
|
0/1
|
||||||
|
1|1
|
||||||
|
0/1
|
||||||
|
|
||||||
|
Then, we want to output:
|
||||||
|
0/1
|
||||||
|
1/2
|
||||||
|
*/
|
||||||
|
return new PhaseAndQuality(gt2); // maintain the phase of gt2 [since !gt2.genotypesArePhased()]
|
||||||
|
}
|
||||||
|
|
||||||
|
static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
|
||||||
|
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||||
|
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||||
|
|
||||||
|
List<Allele> site1Alleles = gt1.getAlleles();
|
||||||
|
List<Allele> site2Alleles = gt2.getAlleles();
|
||||||
|
|
||||||
|
Iterator<Allele> all2It = site2Alleles.iterator();
|
||||||
|
for (Allele all1 : site1Alleles) {
|
||||||
|
Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()
|
||||||
|
|
||||||
|
if (all1.isNonReference() && all2.isNonReference()) // corresponding alleles are alternate
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) {
|
||||||
|
// Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference):
|
||||||
|
Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
|
||||||
|
Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>();
|
||||||
|
|
||||||
|
// Note the segregation of the alleles for the reference genome:
|
||||||
|
allele1ToAllele2.put(vc1.getReference(), vc2.getReference());
|
||||||
|
allele2ToAllele1.put(vc2.getReference(), vc1.getReference());
|
||||||
|
|
||||||
|
// Note the segregation of the alleles for each sample (and check that it is consistent with the reference and all previous samples).
|
||||||
|
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||||
|
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||||
|
|
||||||
|
List<Allele> site1Alleles = gt1.getAlleles();
|
||||||
|
List<Allele> site2Alleles = gt2.getAlleles();
|
||||||
|
|
||||||
|
Iterator<Allele> all2It = site2Alleles.iterator();
|
||||||
|
for (Allele all1 : site1Alleles) {
|
||||||
|
Allele all2 = all2It.next();
|
||||||
|
|
||||||
|
Allele all1To2 = allele1ToAllele2.get(all1);
|
||||||
|
if (all1To2 == null)
|
||||||
|
allele1ToAllele2.put(all1, all2);
|
||||||
|
else if (!all1To2.equals(all2)) // all1 segregates with two different alleles at site 2
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Allele all2To1 = allele2ToAllele1.get(all2);
|
||||||
|
if (all2To1 == null)
|
||||||
|
allele2ToAllele1.put(all2, all1);
|
||||||
|
else if (!all2To1.equals(all1)) // all2 segregates with two different alleles at site 1
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
abstract static class AlleleMergeRule {
|
||||||
|
// vc1, vc2 are ONLY passed to allelesShouldBeMerged() if mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2) AND allSamplesAreMergeable(vc1, vc2):
|
||||||
|
abstract public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2);
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return "all samples are mergeable";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class AlleleOneAndTwo {
|
||||||
|
private Allele all1;
|
||||||
|
private Allele all2;
|
||||||
|
|
||||||
|
public AlleleOneAndTwo(Allele all1, Allele all2) {
|
||||||
|
this.all1 = all1;
|
||||||
|
this.all2 = all2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int hashCode() {
|
||||||
|
return all1.hashCode() + all2.hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
if (!(other instanceof AlleleOneAndTwo))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
AlleleOneAndTwo otherAot = (AlleleOneAndTwo) other;
|
||||||
|
return (this.all1.equals(otherAot.all1) && this.all2.equals(otherAot.all2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class MergedAllelesData {
|
||||||
|
private Map<AlleleOneAndTwo, Allele> mergedAlleles;
|
||||||
|
private byte[] intermediateBases;
|
||||||
|
private int intermediateLength;
|
||||||
|
|
||||||
|
public MergedAllelesData(byte[] intermediateBases, VariantContext vc1, VariantContext vc2) {
|
||||||
|
this.mergedAlleles = new HashMap<AlleleOneAndTwo, Allele>(); // implemented equals() and hashCode() for AlleleOneAndTwo
|
||||||
|
this.intermediateBases = intermediateBases;
|
||||||
|
this.intermediateLength = this.intermediateBases != null ? this.intermediateBases.length : 0;
|
||||||
|
|
||||||
|
this.ensureMergedAllele(vc1.getReference(), vc2.getReference(), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Allele ensureMergedAllele(Allele all1, Allele all2) {
|
||||||
|
return ensureMergedAllele(all1, all2, false); // false <-> since even if all1+all2 = reference, it was already created in the constructor
|
||||||
|
}
|
||||||
|
|
||||||
|
private Allele ensureMergedAllele(Allele all1, Allele all2, boolean creatingReferenceForFirstTime) {
|
||||||
|
AlleleOneAndTwo all12 = new AlleleOneAndTwo(all1, all2);
|
||||||
|
Allele mergedAllele = mergedAlleles.get(all12);
|
||||||
|
|
||||||
|
if (mergedAllele == null) {
|
||||||
|
byte[] bases1 = all1.getBases();
|
||||||
|
byte[] bases2 = all2.getBases();
|
||||||
|
|
||||||
|
byte[] mergedBases = new byte[bases1.length + intermediateLength + bases2.length];
|
||||||
|
System.arraycopy(bases1, 0, mergedBases, 0, bases1.length);
|
||||||
|
if (intermediateBases != null)
|
||||||
|
System.arraycopy(intermediateBases, 0, mergedBases, bases1.length, intermediateLength);
|
||||||
|
System.arraycopy(bases2, 0, mergedBases, bases1.length + intermediateLength, bases2.length);
|
||||||
|
|
||||||
|
mergedAllele = Allele.create(mergedBases, creatingReferenceForFirstTime);
|
||||||
|
mergedAlleles.put(all12, mergedAllele);
|
||||||
|
}
|
||||||
|
|
||||||
|
return mergedAllele;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<Allele> getAllMergedAlleles() {
|
||||||
|
return new HashSet<Allele>(mergedAlleles.values());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class PhaseAndQuality {
|
||||||
|
public boolean isPhased;
|
||||||
|
public Double PQ = null;
|
||||||
|
|
||||||
|
public PhaseAndQuality(Genotype gt) {
|
||||||
|
this.isPhased = gt.isPhased();
|
||||||
|
if (this.isPhased) {
|
||||||
|
this.PQ = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
|
||||||
|
if ( this.PQ == -1 ) this.PQ = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||||
/* PreciseNonNegativeDouble permits arithmetic operations on NON-NEGATIVE double values
|
/* PreciseNonNegativeDouble permits arithmetic operations on NON-NEGATIVE double values
|
||||||
with precision (prevents underflow by representing in log10 space).
|
with precision (prevents underflow by representing in log10 space).
|
||||||
*/
|
*/
|
||||||
public class PreciseNonNegativeDouble implements Comparable<PreciseNonNegativeDouble> {
|
class PreciseNonNegativeDouble implements Comparable<PreciseNonNegativeDouble> {
|
||||||
private static final double EQUALS_THRESH = 1e-6;
|
private static final double EQUALS_THRESH = 1e-6;
|
||||||
private static final double INFINITY = Double.POSITIVE_INFINITY;
|
private static final double INFINITY = Double.POSITIVE_INFINITY;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,6 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.DisjointSet;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
|
|
@ -1052,7 +1051,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
private void writeVCF(VariantContext vc) {
|
private void writeVCF(VariantContext vc) {
|
||||||
if (samplesToPhase == null || vc.isNotFiltered())
|
if (samplesToPhase == null || vc.isNotFiltered())
|
||||||
//if ( samplesToPhase == null || (vc.isVariant() && vc.isNotFiltered())) // if we are only operating on specific samples, don't write out all sites, just those where the VC is variant
|
//if ( samplesToPhase == null || (vc.isVariant() && vc.isNotFiltered())) // if we are only operating on specific samples, don't write out all sites, just those where the VC is variant
|
||||||
WriteVCF.writeVCF(vc, writer, logger);
|
writer.add(vc);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean processVariantInPhasing(VariantContext vc) {
|
public static boolean processVariantInPhasing(VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@
|
||||||
*/
|
*/
|
||||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||||
|
|
||||||
public class ReadBase {
|
class ReadBase {
|
||||||
public String readName;
|
public String readName;
|
||||||
public byte base;
|
public byte base;
|
||||||
public int mappingQual;
|
public int mappingQual;
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
|
|
||||||
public class ReadBasesAtPosition implements Iterable<ReadBase> {
|
class ReadBasesAtPosition implements Iterable<ReadBase> {
|
||||||
// list of: <read name, base>
|
// list of: <read name, base>
|
||||||
private LinkedList<ReadBase> bases;
|
private LinkedList<ReadBase> bases;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,189 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010, The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
/* Some methods for extracting RefSeq-related data from annotated VCF INFO fields:
|
|
||||||
*/
|
|
||||||
public class RefSeqDataParser {
|
|
||||||
private static String REFSEQ_PREFIX = "refseq.";
|
|
||||||
|
|
||||||
private static String NUM_RECORDS_KEY = REFSEQ_PREFIX + "numMatchingRecords";
|
|
||||||
private static String NAME_KEY = REFSEQ_PREFIX + "name";
|
|
||||||
private static String NAME2_KEY = REFSEQ_PREFIX + "name2";
|
|
||||||
|
|
||||||
private static String[] NAME_KEYS = {NAME_KEY, NAME2_KEY};
|
|
||||||
|
|
||||||
private static Map<String, String> getRefSeqEntriesToNames(VariantContext vc, boolean getName2) {
|
|
||||||
String nameKeyToUse = getName2 ? NAME2_KEY : NAME_KEY;
|
|
||||||
String nameKeyToUseMultiplePrefix = nameKeyToUse + "_";
|
|
||||||
|
|
||||||
Map<String, String> entriesToNames = new HashMap<String, String>();
|
|
||||||
int numRecords = vc.getAttributeAsInt(NUM_RECORDS_KEY, -1);
|
|
||||||
if (numRecords != -1) {
|
|
||||||
boolean done = false;
|
|
||||||
|
|
||||||
if (numRecords == 1) { // Check if perhaps the single record doesn't end with "_1":
|
|
||||||
String name = vc.getAttributeAsString(nameKeyToUse, null);
|
|
||||||
if (name != null) {
|
|
||||||
entriesToNames.put(nameKeyToUse, name);
|
|
||||||
done = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!done) {
|
|
||||||
for (int i = 1; i <= numRecords; i++) {
|
|
||||||
String key = nameKeyToUseMultiplePrefix + i;
|
|
||||||
String name = vc.getAttributeAsString(key, null);
|
|
||||||
if (name != null)
|
|
||||||
entriesToNames.put(key, name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else { // no entry with the # of records:
|
|
||||||
String name = vc.getAttributeAsString(nameKeyToUse, null);
|
|
||||||
if (name != null) {
|
|
||||||
entriesToNames.put(nameKeyToUse, name);
|
|
||||||
}
|
|
||||||
else { // Check all INFO fields for a match (if there are multiple entries):
|
|
||||||
for (Map.Entry<String, Object> entry : vc.getAttributes().entrySet()) {
|
|
||||||
String key = entry.getKey();
|
|
||||||
if (key.startsWith(nameKeyToUseMultiplePrefix))
|
|
||||||
entriesToNames.put(key, entry.getValue().toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return entriesToNames;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Map<String, String> getRefSeqEntriesToNames(VariantContext vc) {
|
|
||||||
return getRefSeqEntriesToNames(vc, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Set<String> getRefSeqNames(VariantContext vc, boolean getName2) {
|
|
||||||
return new TreeSet<String>(getRefSeqEntriesToNames(vc, getName2).values());
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Set<String> getRefSeqNames(VariantContext vc) {
|
|
||||||
return getRefSeqNames(vc, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Map<String, Object> getMergedRefSeqNameAttributes(VariantContext vc1, VariantContext vc2) {
|
|
||||||
Map<String, Object> refSeqNameAttribs = new HashMap<String, Object>();
|
|
||||||
|
|
||||||
Map<String, RefSeqEntry> entriesMap1 = getAllRefSeqEntriesByName(vc1);
|
|
||||||
Map<String, RefSeqEntry> entriesMap2 = getAllRefSeqEntriesByName(vc2);
|
|
||||||
|
|
||||||
Set<String> commonNames = entriesMap1.keySet();
|
|
||||||
commonNames.retainAll(entriesMap2.keySet());
|
|
||||||
boolean addSuffix = commonNames.size() > 1;
|
|
||||||
int nextCount = 1;
|
|
||||||
|
|
||||||
for (String name : commonNames) {
|
|
||||||
RefSeqEntry refseq1 = entriesMap1.get(name);
|
|
||||||
RefSeqEntry refseq2 = entriesMap2.get(name);
|
|
||||||
|
|
||||||
String keySuffix = "";
|
|
||||||
if (addSuffix)
|
|
||||||
keySuffix = "_" + nextCount;
|
|
||||||
|
|
||||||
boolean added = false;
|
|
||||||
for (String key : NAME_KEYS) {
|
|
||||||
Object obj1 = refseq1.info.get(key);
|
|
||||||
Object obj2 = refseq2.info.get(key);
|
|
||||||
if (obj1 != null && obj2 != null && obj1.equals(obj2)) {
|
|
||||||
added = true;
|
|
||||||
String useKey = key + keySuffix;
|
|
||||||
refSeqNameAttribs.put(useKey, obj1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (added)
|
|
||||||
nextCount++;
|
|
||||||
}
|
|
||||||
int totalCount = nextCount - 1; // since incremented count one extra time
|
|
||||||
if (totalCount > 1)
|
|
||||||
refSeqNameAttribs.put(NUM_RECORDS_KEY, totalCount);
|
|
||||||
|
|
||||||
return refSeqNameAttribs;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Map<String, Object> removeRefSeqAttributes(Map<String, Object> attributes) {
|
|
||||||
Map<String, Object> removedRefSeqAttributes = new HashMap<String, Object>(attributes);
|
|
||||||
|
|
||||||
Iterator<Map.Entry<String, Object>> attrIt = removedRefSeqAttributes.entrySet().iterator();
|
|
||||||
while (attrIt.hasNext()) {
|
|
||||||
String key = attrIt.next().getKey();
|
|
||||||
if (key.startsWith(REFSEQ_PREFIX))
|
|
||||||
attrIt.remove();
|
|
||||||
}
|
|
||||||
|
|
||||||
return removedRefSeqAttributes;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Map<String, RefSeqEntry> getAllRefSeqEntriesByName(VariantContext vc) {
|
|
||||||
Map<String, RefSeqEntry> nameToEntries = new TreeMap<String, RefSeqEntry>();
|
|
||||||
|
|
||||||
List<RefSeqEntry> allEntries = getAllRefSeqEntries(vc);
|
|
||||||
for (RefSeqEntry entry : allEntries) {
|
|
||||||
Object name = entry.info.get(NAME_KEY);
|
|
||||||
if (name != null)
|
|
||||||
nameToEntries.put(name.toString(), entry);
|
|
||||||
}
|
|
||||||
|
|
||||||
return nameToEntries;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns a List of SEPARATE Map<refseq.ENTRY, refseq.VALUE> for EACH RefSeq annotation (i.e., each gene), stripping out the "_1", "_2", etc.
|
|
||||||
private static List<RefSeqEntry> getAllRefSeqEntries(VariantContext vc) {
|
|
||||||
List<RefSeqEntry> allRefSeq = new LinkedList<RefSeqEntry>();
|
|
||||||
|
|
||||||
for (Map.Entry<String, String> entryToName : getRefSeqEntriesToNames(vc).entrySet()) {
|
|
||||||
String entry = entryToName.getKey();
|
|
||||||
String entrySuffix = entry.replaceFirst(NAME_KEY, "");
|
|
||||||
allRefSeq.add(new RefSeqEntry(vc, entrySuffix));
|
|
||||||
}
|
|
||||||
|
|
||||||
return allRefSeq;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class RefSeqEntry {
|
|
||||||
public Map<String, Object> info;
|
|
||||||
|
|
||||||
public RefSeqEntry(VariantContext vc, String entrySuffix) {
|
|
||||||
this.info = new HashMap<String, Object>();
|
|
||||||
|
|
||||||
for (Map.Entry<String, Object> attribEntry : vc.getAttributes().entrySet()) {
|
|
||||||
String key = attribEntry.getKey();
|
|
||||||
if (key.startsWith(REFSEQ_PREFIX) && key.endsWith(entrySuffix)) {
|
|
||||||
String genericKey = key.replaceAll(entrySuffix, "");
|
|
||||||
this.info.put(genericKey, attribEntry.getValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -28,7 +28,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
|
||||||
public class SNPallelePair extends AllelePair {
|
class SNPallelePair extends AllelePair {
|
||||||
|
|
||||||
public SNPallelePair(Genotype gt) {
|
public SNPallelePair(Genotype gt) {
|
||||||
super(gt);
|
super(gt);
|
||||||
|
|
|
||||||
|
|
@ -1,34 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010, The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
public class WriteVCF {
|
|
||||||
public static void writeVCF(VariantContext vc, VCFWriter writer, Logger logger) {
|
|
||||||
writer.add(vc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -25,13 +25,10 @@ package org.broadinstitute.sting.utils.variantcontext;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
|
||||||
import net.sf.samtools.util.StringUtil;
|
|
||||||
import org.apache.commons.jexl2.Expression;
|
import org.apache.commons.jexl2.Expression;
|
||||||
import org.apache.commons.jexl2.JexlEngine;
|
import org.apache.commons.jexl2.JexlEngine;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker;
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
@ -929,340 +926,4 @@ public class VariantContextUtils {
|
||||||
public static final GenomeLoc getLocation(GenomeLocParser genomeLocParser,VariantContext vc) {
|
public static final GenomeLoc getLocation(GenomeLocParser genomeLocParser,VariantContext vc) {
|
||||||
return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true);
|
return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
public abstract static class AlleleMergeRule {
|
|
||||||
// vc1, vc2 are ONLY passed to allelesShouldBeMerged() if mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2) AND allSamplesAreMergeable(vc1, vc2):
|
|
||||||
abstract public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2);
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return "all samples are mergeable";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: returns null if vc1 and vc2 are not merged into a single MNP record
|
|
||||||
|
|
||||||
public static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile, AlleleMergeRule alleleMergeRule) {
|
|
||||||
if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2))
|
|
||||||
return null;
|
|
||||||
|
|
||||||
// Check that it's logically possible to merge the VCs:
|
|
||||||
if (!allSamplesAreMergeable(vc1, vc2))
|
|
||||||
return null;
|
|
||||||
|
|
||||||
// Check if there's a "point" in merging the VCs (e.g., annotations could be changed)
|
|
||||||
if (!alleleMergeRule.allelesShouldBeMerged(vc1, vc2))
|
|
||||||
return null;
|
|
||||||
|
|
||||||
return reallyMergeIntoMNP(vc1, vc2, referenceFile);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static VariantContext reallyMergeIntoMNP(VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
|
|
||||||
int startInter = vc1.getEnd() + 1;
|
|
||||||
int endInter = vc2.getStart() - 1;
|
|
||||||
byte[] intermediateBases = null;
|
|
||||||
if (startInter <= endInter) {
|
|
||||||
intermediateBases = referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases();
|
|
||||||
StringUtil.toUpperCase(intermediateBases);
|
|
||||||
}
|
|
||||||
MergedAllelesData mergeData = new MergedAllelesData(intermediateBases, vc1, vc2); // ensures that the reference allele is added
|
|
||||||
|
|
||||||
GenotypeCollection mergedGenotypes = GenotypeCollection.create();
|
|
||||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
|
||||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
|
||||||
|
|
||||||
List<Allele> site1Alleles = gt1.getAlleles();
|
|
||||||
List<Allele> site2Alleles = gt2.getAlleles();
|
|
||||||
|
|
||||||
List<Allele> mergedAllelesForSample = new LinkedList<Allele>();
|
|
||||||
|
|
||||||
/* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records,
|
|
||||||
we preserve phase information (if any) relative to whatever precedes vc1:
|
|
||||||
*/
|
|
||||||
Iterator<Allele> all2It = site2Alleles.iterator();
|
|
||||||
for (Allele all1 : site1Alleles) {
|
|
||||||
Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()
|
|
||||||
|
|
||||||
Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2);
|
|
||||||
mergedAllelesForSample.add(mergedAllele);
|
|
||||||
}
|
|
||||||
|
|
||||||
double mergedGQ = Math.max(gt1.getNegLog10PError(), gt2.getNegLog10PError());
|
|
||||||
Set<String> mergedGtFilters = new HashSet<String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered
|
|
||||||
|
|
||||||
Map<String, Object> mergedGtAttribs = new HashMap<String, Object>();
|
|
||||||
PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2);
|
|
||||||
if (phaseQual.PQ != null)
|
|
||||||
mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);
|
|
||||||
|
|
||||||
Genotype mergedGt = new Genotype(gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased);
|
|
||||||
mergedGenotypes.add(mergedGt);
|
|
||||||
}
|
|
||||||
|
|
||||||
String mergedName = VariantContextUtils.mergeVariantContextNames(vc1.getSource(), vc2.getSource());
|
|
||||||
double mergedNegLog10PError = Math.max(vc1.getNegLog10PError(), vc2.getNegLog10PError());
|
|
||||||
Set<String> mergedFilters = new HashSet<String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered
|
|
||||||
Map<String, Object> mergedAttribs = VariantContextUtils.mergeVariantContextAttributes(vc1, vc2);
|
|
||||||
|
|
||||||
// ids
|
|
||||||
List<String> mergedIDs = new ArrayList<String>();
|
|
||||||
if ( vc1.hasID() ) mergedIDs.add(vc1.getID());
|
|
||||||
if ( vc2.hasID() ) mergedIDs.add(vc2.getID());
|
|
||||||
String mergedID = Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs);
|
|
||||||
|
|
||||||
// TODO -- FIX ID
|
|
||||||
VariantContext mergedVc = new VariantContext(mergedName, mergedID, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles(), mergedGenotypes, mergedNegLog10PError, mergedFilters, mergedAttribs);
|
|
||||||
|
|
||||||
mergedAttribs = new HashMap<String, Object>(mergedVc.getAttributes());
|
|
||||||
VariantContextUtils.calculateChromosomeCounts(mergedVc, mergedAttribs, true);
|
|
||||||
mergedVc = VariantContext.modifyAttributes(mergedVc, mergedAttribs);
|
|
||||||
|
|
||||||
return mergedVc;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class AlleleOneAndTwo {
|
|
||||||
private Allele all1;
|
|
||||||
private Allele all2;
|
|
||||||
|
|
||||||
public AlleleOneAndTwo(Allele all1, Allele all2) {
|
|
||||||
this.all1 = all1;
|
|
||||||
this.all2 = all2;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int hashCode() {
|
|
||||||
return all1.hashCode() + all2.hashCode();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean equals(Object other) {
|
|
||||||
if (!(other instanceof AlleleOneAndTwo))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
AlleleOneAndTwo otherAot = (AlleleOneAndTwo) other;
|
|
||||||
return (this.all1.equals(otherAot.all1) && this.all2.equals(otherAot.all2));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class MergedAllelesData {
|
|
||||||
private Map<AlleleOneAndTwo, Allele> mergedAlleles;
|
|
||||||
private byte[] intermediateBases;
|
|
||||||
private int intermediateLength;
|
|
||||||
|
|
||||||
public MergedAllelesData(byte[] intermediateBases, VariantContext vc1, VariantContext vc2) {
|
|
||||||
this.mergedAlleles = new HashMap<AlleleOneAndTwo, Allele>(); // implemented equals() and hashCode() for AlleleOneAndTwo
|
|
||||||
this.intermediateBases = intermediateBases;
|
|
||||||
this.intermediateLength = this.intermediateBases != null ? this.intermediateBases.length : 0;
|
|
||||||
|
|
||||||
this.ensureMergedAllele(vc1.getReference(), vc2.getReference(), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Allele ensureMergedAllele(Allele all1, Allele all2) {
|
|
||||||
return ensureMergedAllele(all1, all2, false); // false <-> since even if all1+all2 = reference, it was already created in the constructor
|
|
||||||
}
|
|
||||||
|
|
||||||
private Allele ensureMergedAllele(Allele all1, Allele all2, boolean creatingReferenceForFirstTime) {
|
|
||||||
AlleleOneAndTwo all12 = new AlleleOneAndTwo(all1, all2);
|
|
||||||
Allele mergedAllele = mergedAlleles.get(all12);
|
|
||||||
|
|
||||||
if (mergedAllele == null) {
|
|
||||||
byte[] bases1 = all1.getBases();
|
|
||||||
byte[] bases2 = all2.getBases();
|
|
||||||
|
|
||||||
byte[] mergedBases = new byte[bases1.length + intermediateLength + bases2.length];
|
|
||||||
System.arraycopy(bases1, 0, mergedBases, 0, bases1.length);
|
|
||||||
if (intermediateBases != null)
|
|
||||||
System.arraycopy(intermediateBases, 0, mergedBases, bases1.length, intermediateLength);
|
|
||||||
System.arraycopy(bases2, 0, mergedBases, bases1.length + intermediateLength, bases2.length);
|
|
||||||
|
|
||||||
mergedAllele = Allele.create(mergedBases, creatingReferenceForFirstTime);
|
|
||||||
mergedAlleles.put(all12, mergedAllele);
|
|
||||||
}
|
|
||||||
|
|
||||||
return mergedAllele;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Set<Allele> getAllMergedAlleles() {
|
|
||||||
return new HashSet<Allele>(mergedAlleles.values());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String mergeVariantContextNames(String name1, String name2) {
|
|
||||||
return name1 + "_" + name2;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Map<String, Object> mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) {
|
|
||||||
Map<String, Object> mergedAttribs = new HashMap<String, Object>();
|
|
||||||
|
|
||||||
List<VariantContext> vcList = new LinkedList<VariantContext>();
|
|
||||||
vcList.add(vc1);
|
|
||||||
vcList.add(vc2);
|
|
||||||
|
|
||||||
String[] MERGE_OR_ATTRIBS = {VCFConstants.DBSNP_KEY};
|
|
||||||
for (String orAttrib : MERGE_OR_ATTRIBS) {
|
|
||||||
boolean attribVal = false;
|
|
||||||
for (VariantContext vc : vcList) {
|
|
||||||
attribVal = vc.getAttributeAsBoolean(orAttrib, false);
|
|
||||||
if (attribVal) // already true, so no reason to continue:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
mergedAttribs.put(orAttrib, attribVal);
|
|
||||||
}
|
|
||||||
|
|
||||||
return mergedAttribs;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) {
|
|
||||||
GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser, vc1);
|
|
||||||
GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser, vc2);
|
|
||||||
|
|
||||||
if (!loc1.onSameContig(loc2))
|
|
||||||
throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome");
|
|
||||||
|
|
||||||
if (!loc1.isBefore(loc2))
|
|
||||||
throw new ReviewedStingException("Can only merge if vc1 is BEFORE vc2");
|
|
||||||
|
|
||||||
if (vc1.isFiltered() || vc2.isFiltered())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (!vc1.getSampleNames().equals(vc2.getSampleNames())) // vc1, vc2 refer to different sample sets
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (!allGenotypesAreUnfilteredAndCalled(vc1) || !allGenotypesAreUnfilteredAndCalled(vc2))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean allGenotypesAreUnfilteredAndCalled(VariantContext vc) {
|
|
||||||
for (final Genotype gt : vc.getGenotypes()) {
|
|
||||||
if (gt.isNoCall() || gt.isFiltered())
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Assumes that vc1 and vc2 were already checked to have the same sample names:
|
|
||||||
|
|
||||||
private static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) {
|
|
||||||
// Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1:
|
|
||||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
|
||||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
|
||||||
|
|
||||||
if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean alleleSegregationIsKnown(Genotype gt1, Genotype gt2) {
|
|
||||||
if (gt1.getPloidy() != gt2.getPloidy())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* If gt2 is phased or hom, then could even be MERGED with gt1 [This is standard].
|
|
||||||
|
|
||||||
HOWEVER, EVEN if this is not the case, but gt1.isHom(),
|
|
||||||
it is trivially known that each of gt2's alleles segregate with the single allele type present in gt1.
|
|
||||||
*/
|
|
||||||
return (gt2.isPhased() || gt2.isHom() || gt1.isHom());
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class PhaseAndQuality {
|
|
||||||
public boolean isPhased;
|
|
||||||
public Double PQ = null;
|
|
||||||
|
|
||||||
public PhaseAndQuality(Genotype gt) {
|
|
||||||
this.isPhased = gt.isPhased();
|
|
||||||
if (this.isPhased) {
|
|
||||||
this.PQ = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
|
|
||||||
if ( this.PQ == -1 ) this.PQ = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Assumes that alleleSegregationIsKnown(gt1, gt2):
|
|
||||||
|
|
||||||
private static PhaseAndQuality calcPhaseForMergedGenotypes(Genotype gt1, Genotype gt2) {
|
|
||||||
if (gt2.isPhased() || gt2.isHom())
|
|
||||||
return new PhaseAndQuality(gt1); // maintain the phase of gt1
|
|
||||||
|
|
||||||
if (!gt1.isHom())
|
|
||||||
throw new ReviewedStingException("alleleSegregationIsKnown(gt1, gt2) implies: gt2.genotypesArePhased() || gt2.isHom() || gt1.isHom()");
|
|
||||||
|
|
||||||
/* We're dealing with: gt1.isHom(), gt2.isHet(), !gt2.genotypesArePhased(); so, the merged (het) Genotype is not phased relative to the previous Genotype
|
|
||||||
|
|
||||||
For example, if we're merging the third Genotype with the second one:
|
|
||||||
0/1
|
|
||||||
1|1
|
|
||||||
0/1
|
|
||||||
|
|
||||||
Then, we want to output:
|
|
||||||
0/1
|
|
||||||
1/2
|
|
||||||
*/
|
|
||||||
return new PhaseAndQuality(gt2); // maintain the phase of gt2 [since !gt2.genotypesArePhased()]
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Checks if any sample has a MNP of ALT alleles (segregating together):
|
|
||||||
[Assumes that vc1 and vc2 were already checked to have the same sample names && allSamplesAreMergeable(vc1, vc2)]
|
|
||||||
*/
|
|
||||||
|
|
||||||
public static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
|
|
||||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
|
||||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
|
||||||
|
|
||||||
List<Allele> site1Alleles = gt1.getAlleles();
|
|
||||||
List<Allele> site2Alleles = gt2.getAlleles();
|
|
||||||
|
|
||||||
Iterator<Allele> all2It = site2Alleles.iterator();
|
|
||||||
for (Allele all1 : site1Alleles) {
|
|
||||||
Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()
|
|
||||||
|
|
||||||
if (all1.isNonReference() && all2.isNonReference()) // corresponding alleles are alternate
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Checks if all samples are consistent in their haplotypes:
|
|
||||||
[Assumes that vc1 and vc2 were already checked to have the same sample names && allSamplesAreMergeable(vc1, vc2)]
|
|
||||||
*/
|
|
||||||
|
|
||||||
public static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) {
|
|
||||||
// Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference):
|
|
||||||
Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
|
|
||||||
Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>();
|
|
||||||
|
|
||||||
// Note the segregation of the alleles for the reference genome:
|
|
||||||
allele1ToAllele2.put(vc1.getReference(), vc2.getReference());
|
|
||||||
allele2ToAllele1.put(vc2.getReference(), vc1.getReference());
|
|
||||||
|
|
||||||
// Note the segregation of the alleles for each sample (and check that it is consistent with the reference and all previous samples).
|
|
||||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
|
||||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
|
||||||
|
|
||||||
List<Allele> site1Alleles = gt1.getAlleles();
|
|
||||||
List<Allele> site2Alleles = gt2.getAlleles();
|
|
||||||
|
|
||||||
Iterator<Allele> all2It = site2Alleles.iterator();
|
|
||||||
for (Allele all1 : site1Alleles) {
|
|
||||||
Allele all2 = all2It.next();
|
|
||||||
|
|
||||||
Allele all1To2 = allele1ToAllele2.get(all1);
|
|
||||||
if (all1To2 == null)
|
|
||||||
allele1ToAllele2.put(all1, all2);
|
|
||||||
else if (!all1To2.equals(all2)) // all1 segregates with two different alleles at site 2
|
|
||||||
return false;
|
|
||||||
|
|
||||||
Allele all2To1 = allele2ToAllele1.get(all2);
|
|
||||||
if (all2To1 == null)
|
|
||||||
allele2ToAllele1.put(all2, all1);
|
|
||||||
else if (!all2To1.equals(all1)) // all2 segregates with two different alleles at site 1
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue