Moved many unused phasing walkers and utilities to archive
This commit is contained in:
parent
460a51f473
commit
2b2514dad2
|
|
@ -29,7 +29,7 @@ import java.util.Arrays;
|
|||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
public abstract class BaseArray implements Comparable<BaseArray> {
|
||||
abstract class BaseArray implements Comparable<BaseArray> {
|
||||
protected Byte[] bases;
|
||||
|
||||
public BaseArray(byte[] bases) {
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import java.util.Iterator;
|
|||
/*
|
||||
* CardinalityCounter object allows user to iterate over all assignment of arbitrary-cardinality variables.
|
||||
*/
|
||||
public class CardinalityCounter implements Iterator<int[]>, Iterable<int[]> {
|
||||
class CardinalityCounter implements Iterator<int[]>, Iterable<int[]> {
|
||||
private int[] cards;
|
||||
private int[] valList;
|
||||
private boolean hasNext;
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import java.util.NoSuchElementException;
|
|||
It is UNIQUE in the fact that its iterator (BidirectionalIterator) can be cloned
|
||||
to save the current pointer for a later time (while the original iterator can continue to iterate).
|
||||
*/
|
||||
public class CloneableIteratorLinkedList<E> {
|
||||
class CloneableIteratorLinkedList<E> {
|
||||
private CloneableIteratorDoublyLinkedNode<E> first;
|
||||
private CloneableIteratorDoublyLinkedNode<E> last;
|
||||
private int size;
|
||||
|
|
|
|||
|
|
@ -21,13 +21,13 @@
|
|||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.utils;
|
||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
public class DisjointSet {
|
||||
class DisjointSet {
|
||||
private ItemNode[] nodes;
|
||||
|
||||
public DisjointSet(int numItems) {
|
||||
|
|
@ -27,7 +27,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class Haplotype extends BaseArray implements Cloneable {
|
||||
class Haplotype extends BaseArray implements Cloneable {
|
||||
public Haplotype(byte[] bases) {
|
||||
super(bases);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,133 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFromRods;
|
||||
|
||||
|
||||
/**
|
||||
* Walks along all variant ROD loci, and merges consecutive sites if they segregate in all samples in the ROD.
|
||||
*/
|
||||
@Allows(value = {DataSource.REFERENCE})
|
||||
@Requires(value = {DataSource.REFERENCE})
|
||||
@By(DataSource.REFERENCE_ORDERED_DATA)
|
||||
|
||||
public class MergeMNPsWalker extends RodWalker<Integer, Integer> {
|
||||
|
||||
@Output(doc = "File to which variants should be written", required = true)
|
||||
protected VCFWriter writer = null;
|
||||
private MergeSegregatingAlternateAllelesVCFWriter vcMergerWriter = null;
|
||||
|
||||
@Argument(fullName = "maxGenomicDistanceForMNP", shortName = "maxDistMNP", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records into a MNP record; [default:1]", required = false)
|
||||
protected int maxGenomicDistanceForMNP = 1;
|
||||
|
||||
@Input(fullName="variant", shortName = "V", doc="Select variants from this VCF file", required=true)
|
||||
public RodBinding<VariantContext> variants;
|
||||
|
||||
public void initialize() {
|
||||
initializeVcfWriter();
|
||||
}
|
||||
|
||||
private void initializeVcfWriter() {
|
||||
// false <-> don't take control of writer, since didn't create it:
|
||||
vcMergerWriter = new MergeSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getGenomeLocParser(), getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, logger, false);
|
||||
writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter]
|
||||
|
||||
// setup the header fields:
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
|
||||
Map<String, VCFHeader> rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName()));
|
||||
vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(rodNameToHeader.get(variants.getName()).getGenotypeSamples())));
|
||||
}
|
||||
|
||||
public boolean generateExtendedEvents() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* For each site, send it to be (possibly) merged with previously observed sites.
|
||||
*
|
||||
* @param tracker the meta-data tracker
|
||||
* @param ref the reference base
|
||||
* @param context the context for the given locus
|
||||
* @return dummy Integer
|
||||
*/
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (tracker == null)
|
||||
return null;
|
||||
|
||||
for (VariantContext vc : tracker.getValues(variants, context.getLocation()))
|
||||
writeVCF(vc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private void writeVCF(VariantContext vc) {
|
||||
WriteVCF.writeVCF(vc, vcMergerWriter, logger);
|
||||
}
|
||||
|
||||
public Integer reduce(Integer result, Integer total) {
|
||||
if (result == null)
|
||||
return total;
|
||||
|
||||
return total + result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release any VariantContexts not yet processed.
|
||||
*
|
||||
* @param result Empty for now...
|
||||
*/
|
||||
public void onTraversalDone(Integer result) {
|
||||
vcMergerWriter.close();
|
||||
|
||||
System.out.println("Number of successive pairs of records: " + vcMergerWriter.getNumRecordsAttemptToMerge());
|
||||
System.out.println("Number of potentially merged records (" + vcMergerWriter.getVcMergeRule() + "): " + vcMergerWriter.getNumRecordsSatisfyingMergeRule());
|
||||
System.out.println("Number of records merged ("+ vcMergerWriter.getAlleleMergeRule() + "): " + vcMergerWriter.getNumMergedRecords());
|
||||
System.out.println(vcMergerWriter.getAltAlleleStats());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -44,7 +44,7 @@ import java.util.*;
|
|||
|
||||
// Streams in VariantContext objects and streams out VariantContexts produced by merging phased segregating polymorphisms into MNP VariantContexts
|
||||
|
||||
public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
||||
class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
||||
private VCFWriter innerWriter;
|
||||
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
|
@ -52,7 +52,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
|||
private ReferenceSequenceFile referenceFileForMNPmerging;
|
||||
|
||||
private VariantContextMergeRule vcMergeRule;
|
||||
private VariantContextUtils.AlleleMergeRule alleleMergeRule;
|
||||
private PhasingUtils.AlleleMergeRule alleleMergeRule;
|
||||
|
||||
private String useSingleSample = null;
|
||||
|
||||
|
|
@ -71,7 +71,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
|||
// Should we call innerWriter.close() in close()
|
||||
private boolean takeOwnershipOfInner;
|
||||
|
||||
public MergeSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, VariantContextMergeRule vcMergeRule, VariantContextUtils.AlleleMergeRule alleleMergeRule, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) {
|
||||
public MergeSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, VariantContextMergeRule vcMergeRule, PhasingUtils.AlleleMergeRule alleleMergeRule, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) {
|
||||
this.innerWriter = innerWriter;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
try {
|
||||
|
|
@ -179,7 +179,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
|||
boolean mergedRecords = false;
|
||||
if (shouldAttemptToMerge) {
|
||||
numRecordsSatisfyingMergeRule++;
|
||||
VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(genomeLocParser, vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging, alleleMergeRule);
|
||||
VariantContext mergedVc = PhasingUtils.mergeIntoMNP(genomeLocParser, vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging, alleleMergeRule);
|
||||
|
||||
if (mergedVc != null) {
|
||||
mergedRecords = true;
|
||||
|
|
@ -218,26 +218,6 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
|||
filteredVcfrList.clear();
|
||||
}
|
||||
|
||||
public int getNumRecordsAttemptToMerge() {
|
||||
return numRecordsAttemptToMerge;
|
||||
}
|
||||
|
||||
public int getNumRecordsSatisfyingMergeRule() {
|
||||
return numRecordsSatisfyingMergeRule;
|
||||
}
|
||||
|
||||
public int getNumMergedRecords() {
|
||||
return numMergedRecords;
|
||||
}
|
||||
|
||||
public VariantContextMergeRule getVcMergeRule() {
|
||||
return vcMergeRule;
|
||||
}
|
||||
|
||||
public VariantContextUtils.AlleleMergeRule getAlleleMergeRule() {
|
||||
return alleleMergeRule;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a string representation of this object.
|
||||
*
|
||||
|
|
@ -248,13 +228,6 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
|||
return getClass().getName();
|
||||
}
|
||||
|
||||
public String getAltAlleleStats() {
|
||||
if (altAlleleStats == null)
|
||||
return "";
|
||||
|
||||
return "\n" + altAlleleStats.toString();
|
||||
}
|
||||
|
||||
private static class VCFRecord {
|
||||
public VariantContext vc;
|
||||
public boolean resultedFromMerge;
|
||||
|
|
@ -373,7 +346,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
|||
if (shouldAttemptToMerge) {
|
||||
aas.numSuccessiveGenotypesAttemptedToBeMerged++;
|
||||
|
||||
if (!VariantContextUtils.alleleSegregationIsKnown(gt1, gt2)) {
|
||||
if (!PhasingUtils.alleleSegregationIsKnown(gt1, gt2)) {
|
||||
aas.segregationUnknown++;
|
||||
logger.debug("Unknown segregation of alleles [not phased] for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser, vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser, vc2));
|
||||
}
|
||||
|
|
@ -498,9 +471,9 @@ class DistanceMergeRule extends VariantContextMergeRule {
|
|||
}
|
||||
|
||||
|
||||
class ExistsDoubleAltAlleleMergeRule extends VariantContextUtils.AlleleMergeRule {
|
||||
class ExistsDoubleAltAlleleMergeRule extends PhasingUtils.AlleleMergeRule {
|
||||
public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2) {
|
||||
return VariantContextUtils.someSampleHasDoubleNonReferenceAllele(vc1, vc2);
|
||||
return PhasingUtils.someSampleHasDoubleNonReferenceAllele(vc1, vc2);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
|
|
@ -515,7 +488,7 @@ class SegregatingMNPmergeAllelesRule extends ExistsDoubleAltAlleleMergeRule {
|
|||
|
||||
public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2) {
|
||||
// Must be interesting AND consistent:
|
||||
return super.allelesShouldBeMerged(vc1, vc2) && VariantContextUtils.doubleAllelesSegregatePerfectlyAmongSamples(vc1, vc2);
|
||||
return super.allelesShouldBeMerged(vc1, vc2) && PhasingUtils.doubleAllelesSegregatePerfectlyAmongSamples(vc1, vc2);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
|
|
|
|||
|
|
@ -1,236 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFromRods;
|
||||
|
||||
/**
|
||||
* Walks along all variant ROD loci, and merges consecutive sites if some sample has segregating alt alleles in the ROD.
|
||||
*/
|
||||
@Allows(value = {DataSource.REFERENCE})
|
||||
@Requires(value = {DataSource.REFERENCE})
|
||||
@By(DataSource.REFERENCE_ORDERED_DATA)
|
||||
|
||||
public class MergeSegregatingAlternateAllelesWalker extends RodWalker<Integer, Integer> {
|
||||
|
||||
@Output(doc = "File to which variants should be written", required = true)
|
||||
protected VCFWriter writer = null;
|
||||
private MergeSegregatingAlternateAllelesVCFWriter vcMergerWriter = null;
|
||||
|
||||
@Argument(fullName = "maxGenomicDistance", shortName = "maxDist", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records; [default:1]", required = false)
|
||||
protected int maxGenomicDistance = 1;
|
||||
|
||||
@Argument(fullName = "useSingleSample", shortName = "useSample", doc = "Only output genotypes for the single sample given; [default:use all samples]", required = false)
|
||||
protected String useSingleSample = null;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "emitOnlyMergedRecords", shortName = "emitOnlyMerged", doc = "Only output records that resulted from merging [For DEBUGGING purposes only - DO NOT USE, since it disregards the semantics of '|' as 'phased relative to previous non-filtered VC']; [default:false]", required = false)
|
||||
protected boolean emitOnlyMergedRecords = false;
|
||||
|
||||
@Argument(fullName = "disablePrintAltAlleleStats", shortName = "noAlleleStats", doc = "Should the print-out of alternate allele statistics be disabled?; [default:false]", required = false)
|
||||
protected boolean disablePrintAlternateAlleleStatistics = false;
|
||||
|
||||
public final static String IGNORE_REFSEQ = "IGNORE";
|
||||
public final static String UNION_REFSEQ = "UNION";
|
||||
public final static String INTERSECT_REFSEQ = "INTERSECT";
|
||||
|
||||
@Argument(fullName = "mergeBasedOnRefSeqAnnotation", shortName = "mergeBasedOnRefSeqAnnotation", doc = "'Should merging be performed if two sites lie on the same RefSeq sequence in the INFO field {" + IGNORE_REFSEQ + ", " + UNION_REFSEQ + ", " + INTERSECT_REFSEQ + "}; [default:" + IGNORE_REFSEQ + "]", required = false)
|
||||
protected String mergeBasedOnRefSeqAnnotation = IGNORE_REFSEQ;
|
||||
|
||||
@Argument(fullName = "dontRequireSomeSampleHasDoubleAltAllele", shortName = "dontRequireSomeSampleHasDoubleAltAllele", doc = "Should the requirement, that SUCCESSIVE records to be merged have at least one sample with a double alternate allele, be relaxed?; [default:false]", required = false)
|
||||
protected boolean dontRequireSomeSampleHasDoubleAltAllele = false;
|
||||
|
||||
@Input(fullName="variant", shortName = "V", doc="Select variants from this VCF file", required=true)
|
||||
public RodBinding<VariantContext> variants;
|
||||
|
||||
public void initialize() {
|
||||
initializeVcfWriter();
|
||||
}
|
||||
|
||||
private void initializeVcfWriter() {
|
||||
GenomeLocParser genomeLocParser = getToolkit().getGenomeLocParser();
|
||||
|
||||
VariantContextMergeRule vcMergeRule;
|
||||
if (mergeBasedOnRefSeqAnnotation.equals(IGNORE_REFSEQ))
|
||||
vcMergeRule = new DistanceMergeRule(maxGenomicDistance, genomeLocParser);
|
||||
else
|
||||
vcMergeRule = new SameGenePlusWithinDistanceMergeRule(maxGenomicDistance, genomeLocParser, mergeBasedOnRefSeqAnnotation);
|
||||
|
||||
VariantContextUtils.AlleleMergeRule alleleMergeRule;
|
||||
if (dontRequireSomeSampleHasDoubleAltAllele) // if a pair of VariantContext passes the vcMergeRule, then always merge them if there is a trailing prefix of polymorphisms (i.e., upstream polymorphic site):
|
||||
alleleMergeRule = new PrefixPolymorphismMergeAllelesRule();
|
||||
else
|
||||
alleleMergeRule = new ExistsDoubleAltAlleleMergeRule();
|
||||
|
||||
// false <-> don't take control of writer, since didn't create it:
|
||||
vcMergerWriter = new MergeSegregatingAlternateAllelesVCFWriter(writer, genomeLocParser, getToolkit().getArguments().referenceFile, vcMergeRule, alleleMergeRule, useSingleSample, emitOnlyMergedRecords, logger, false, !disablePrintAlternateAlleleStatistics);
|
||||
writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter]
|
||||
|
||||
// setup the header fields:
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
|
||||
Map<String, VCFHeader> rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName()));
|
||||
vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(rodNameToHeader.get(variants.getName()).getGenotypeSamples())));
|
||||
}
|
||||
|
||||
public boolean generateExtendedEvents() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* For each site, send it to be (possibly) merged with previously observed sites.
|
||||
*
|
||||
* @param tracker the meta-data tracker
|
||||
* @param ref the reference base
|
||||
* @param context the context for the given locus
|
||||
* @return dummy Integer
|
||||
*/
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (tracker == null)
|
||||
return null;
|
||||
|
||||
for (VariantContext vc : tracker.getValues(variants, context.getLocation()))
|
||||
writeVCF(vc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private void writeVCF(VariantContext vc) {
|
||||
WriteVCF.writeVCF(vc, vcMergerWriter, logger);
|
||||
}
|
||||
|
||||
public Integer reduce(Integer result, Integer total) {
|
||||
if (result == null)
|
||||
return total;
|
||||
|
||||
return total + result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release any VariantContexts not yet processed.
|
||||
*
|
||||
* @param result Empty for now...
|
||||
*/
|
||||
public void onTraversalDone(Integer result) {
|
||||
vcMergerWriter.close();
|
||||
|
||||
if (useSingleSample != null)
|
||||
System.out.println("Only considered single sample: " + useSingleSample);
|
||||
|
||||
System.out.println("Number of successive pairs of records: " + vcMergerWriter.getNumRecordsAttemptToMerge());
|
||||
System.out.println("Number of potentially merged records (" + vcMergerWriter.getVcMergeRule() + "): " + vcMergerWriter.getNumRecordsSatisfyingMergeRule());
|
||||
System.out.println("Number of records merged ("+ vcMergerWriter.getAlleleMergeRule() + "): " + vcMergerWriter.getNumMergedRecords());
|
||||
System.out.println(vcMergerWriter.getAltAlleleStats());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
enum MergeBasedOnRefSeqAnnotation {
|
||||
UNION_WITH_DIST, INTERSECT_WITH_DIST
|
||||
}
|
||||
|
||||
class SameGenePlusWithinDistanceMergeRule extends DistanceMergeRule {
|
||||
private MergeBasedOnRefSeqAnnotation mergeBasedOnRefSeqAnnotation;
|
||||
|
||||
public SameGenePlusWithinDistanceMergeRule(int maxGenomicDistanceForMNP, GenomeLocParser genomeLocParser, String mergeBasedOnRefSeqAnnotation) {
|
||||
super(maxGenomicDistanceForMNP, genomeLocParser);
|
||||
|
||||
if (mergeBasedOnRefSeqAnnotation.equals(MergeSegregatingAlternateAllelesWalker.UNION_REFSEQ))
|
||||
this.mergeBasedOnRefSeqAnnotation = MergeBasedOnRefSeqAnnotation.UNION_WITH_DIST;
|
||||
else if (mergeBasedOnRefSeqAnnotation.equals(MergeSegregatingAlternateAllelesWalker.INTERSECT_REFSEQ))
|
||||
this.mergeBasedOnRefSeqAnnotation = MergeBasedOnRefSeqAnnotation.INTERSECT_WITH_DIST;
|
||||
else
|
||||
throw new UserException("Must provide " + MergeSegregatingAlternateAllelesWalker.IGNORE_REFSEQ + ", " + MergeSegregatingAlternateAllelesWalker.UNION_REFSEQ + ", or " + MergeSegregatingAlternateAllelesWalker.INTERSECT_REFSEQ + " as argument to mergeBasedOnRefSeqAnnotation!");
|
||||
}
|
||||
|
||||
public boolean shouldAttemptToMerge(VariantContext vc1, VariantContext vc2) {
|
||||
boolean withinDistance = super.shouldAttemptToMerge(vc1, vc2);
|
||||
|
||||
if (mergeBasedOnRefSeqAnnotation == MergeBasedOnRefSeqAnnotation.UNION_WITH_DIST)
|
||||
return withinDistance || sameGene(vc1, vc2);
|
||||
else // mergeBasedOnRefSeqAnnotation == MergeBasedOnRefSeqAnnotation.INTERSECT_WITH_DIST
|
||||
return withinDistance && sameGene(vc1, vc2);
|
||||
}
|
||||
|
||||
private boolean sameGene(VariantContext vc1, VariantContext vc2) {
|
||||
Set<String> names_vc1 = RefSeqDataParser.getRefSeqNames(vc1);
|
||||
Set<String> names_vc2 = RefSeqDataParser.getRefSeqNames(vc2);
|
||||
names_vc1.retainAll(names_vc2);
|
||||
|
||||
if (!names_vc1.isEmpty())
|
||||
return true;
|
||||
|
||||
// Check refseq.name2:
|
||||
Set<String> names2_vc1 = RefSeqDataParser.getRefSeqNames(vc1, true);
|
||||
Set<String> names2_vc2 = RefSeqDataParser.getRefSeqNames(vc2, true);
|
||||
names2_vc1.retainAll(names2_vc2);
|
||||
|
||||
return !names2_vc1.isEmpty();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return super.toString() + " " + (mergeBasedOnRefSeqAnnotation == MergeBasedOnRefSeqAnnotation.UNION_WITH_DIST ? "OR" : "AND") + " on the same gene";
|
||||
}
|
||||
|
||||
public Map<String, Object> addToMergedAttributes(VariantContext vc1, VariantContext vc2) {
|
||||
Map<String, Object> addedAttribs = super.addToMergedAttributes(vc1, vc2);
|
||||
addedAttribs.putAll(RefSeqDataParser.getMergedRefSeqNameAttributes(vc1, vc2));
|
||||
return addedAttribs;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
class PrefixPolymorphismMergeAllelesRule extends VariantContextUtils.AlleleMergeRule {
|
||||
public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2) {
|
||||
return vc1.isPolymorphic();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return super.toString() + ", there exists a polymorphism at the start of the merged allele";
|
||||
}
|
||||
}
|
||||
|
|
@ -23,12 +23,10 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||
|
||||
import org.broadinstitute.sting.utils.DisjointSet;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
// Represents an undirected graph with no self-edges:
|
||||
public class PhasingGraph implements Iterable<PhasingGraphEdge> {
|
||||
class PhasingGraph implements Iterable<PhasingGraphEdge> {
|
||||
private Neighbors[] adj;
|
||||
|
||||
public PhasingGraph(int numVertices) {
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.phasing;
|
|||
/*
|
||||
Edge class for PhasingGraph
|
||||
*/
|
||||
public class PhasingGraphEdge implements Comparable<PhasingGraphEdge> {
|
||||
class PhasingGraphEdge implements Comparable<PhasingGraphEdge> {
|
||||
protected int v1;
|
||||
protected int v2;
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class PhasingRead extends BaseArray {
|
||||
class PhasingRead extends BaseArray {
|
||||
private PreciseNonNegativeDouble mappingProb; // the probability that this read is mapped correctly
|
||||
private PreciseNonNegativeDouble[] baseProbs; // the probabilities that the base identities are CORRECT
|
||||
private PreciseNonNegativeDouble[] baseErrorProbs; // the probabilities that the base identities are INCORRECT
|
||||
|
|
|
|||
|
|
@ -0,0 +1,387 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
class PhasingUtils {
|
||||
static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile, AlleleMergeRule alleleMergeRule) {
|
||||
if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2))
|
||||
return null;
|
||||
|
||||
// Check that it's logically possible to merge the VCs:
|
||||
if (!allSamplesAreMergeable(vc1, vc2))
|
||||
return null;
|
||||
|
||||
// Check if there's a "point" in merging the VCs (e.g., annotations could be changed)
|
||||
if (!alleleMergeRule.allelesShouldBeMerged(vc1, vc2))
|
||||
return null;
|
||||
|
||||
return reallyMergeIntoMNP(vc1, vc2, referenceFile);
|
||||
}
|
||||
|
||||
static VariantContext reallyMergeIntoMNP(VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
|
||||
int startInter = vc1.getEnd() + 1;
|
||||
int endInter = vc2.getStart() - 1;
|
||||
byte[] intermediateBases = null;
|
||||
if (startInter <= endInter) {
|
||||
intermediateBases = referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases();
|
||||
StringUtil.toUpperCase(intermediateBases);
|
||||
}
|
||||
MergedAllelesData mergeData = new MergedAllelesData(intermediateBases, vc1, vc2); // ensures that the reference allele is added
|
||||
|
||||
GenotypeCollection mergedGenotypes = GenotypeCollection.create();
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
||||
List<Allele> site1Alleles = gt1.getAlleles();
|
||||
List<Allele> site2Alleles = gt2.getAlleles();
|
||||
|
||||
List<Allele> mergedAllelesForSample = new LinkedList<Allele>();
|
||||
|
||||
/* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records,
|
||||
we preserve phase information (if any) relative to whatever precedes vc1:
|
||||
*/
|
||||
Iterator<Allele> all2It = site2Alleles.iterator();
|
||||
for (Allele all1 : site1Alleles) {
|
||||
Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()
|
||||
|
||||
Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2);
|
||||
mergedAllelesForSample.add(mergedAllele);
|
||||
}
|
||||
|
||||
double mergedGQ = Math.max(gt1.getNegLog10PError(), gt2.getNegLog10PError());
|
||||
Set<String> mergedGtFilters = new HashSet<String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered
|
||||
|
||||
Map<String, Object> mergedGtAttribs = new HashMap<String, Object>();
|
||||
PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2);
|
||||
if (phaseQual.PQ != null)
|
||||
mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);
|
||||
|
||||
Genotype mergedGt = new Genotype(gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased);
|
||||
mergedGenotypes.add(mergedGt);
|
||||
}
|
||||
|
||||
String mergedName = mergeVariantContextNames(vc1.getSource(), vc2.getSource());
|
||||
double mergedNegLog10PError = Math.max(vc1.getNegLog10PError(), vc2.getNegLog10PError());
|
||||
Set<String> mergedFilters = new HashSet<String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered
|
||||
Map<String, Object> mergedAttribs = mergeVariantContextAttributes(vc1, vc2);
|
||||
|
||||
// ids
|
||||
List<String> mergedIDs = new ArrayList<String>();
|
||||
if ( vc1.hasID() ) mergedIDs.add(vc1.getID());
|
||||
if ( vc2.hasID() ) mergedIDs.add(vc2.getID());
|
||||
String mergedID = Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs);
|
||||
|
||||
// TODO -- FIX ID
|
||||
VariantContext mergedVc = new VariantContext(mergedName, mergedID, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles(), mergedGenotypes, mergedNegLog10PError, mergedFilters, mergedAttribs);
|
||||
|
||||
mergedAttribs = new HashMap<String, Object>(mergedVc.getAttributes());
|
||||
VariantContextUtils.calculateChromosomeCounts(mergedVc, mergedAttribs, true);
|
||||
mergedVc = VariantContext.modifyAttributes(mergedVc, mergedAttribs);
|
||||
|
||||
return mergedVc;
|
||||
}
|
||||
|
||||
static String mergeVariantContextNames(String name1, String name2) {
|
||||
return name1 + "_" + name2;
|
||||
}
|
||||
|
||||
static Map<String, Object> mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) {
|
||||
Map<String, Object> mergedAttribs = new HashMap<String, Object>();
|
||||
|
||||
List<VariantContext> vcList = new LinkedList<VariantContext>();
|
||||
vcList.add(vc1);
|
||||
vcList.add(vc2);
|
||||
|
||||
String[] MERGE_OR_ATTRIBS = {VCFConstants.DBSNP_KEY};
|
||||
for (String orAttrib : MERGE_OR_ATTRIBS) {
|
||||
boolean attribVal = false;
|
||||
for (VariantContext vc : vcList) {
|
||||
attribVal = vc.getAttributeAsBoolean(orAttrib, false);
|
||||
if (attribVal) // already true, so no reason to continue:
|
||||
break;
|
||||
}
|
||||
mergedAttribs.put(orAttrib, attribVal);
|
||||
}
|
||||
|
||||
return mergedAttribs;
|
||||
}
|
||||
|
||||
static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) {
|
||||
GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser, vc1);
|
||||
GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser, vc2);
|
||||
|
||||
if (!loc1.onSameContig(loc2))
|
||||
throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome");
|
||||
|
||||
if (!loc1.isBefore(loc2))
|
||||
throw new ReviewedStingException("Can only merge if vc1 is BEFORE vc2");
|
||||
|
||||
if (vc1.isFiltered() || vc2.isFiltered())
|
||||
return false;
|
||||
|
||||
if (!vc1.getSampleNames().equals(vc2.getSampleNames())) // vc1, vc2 refer to different sample sets
|
||||
return false;
|
||||
|
||||
if (!allGenotypesAreUnfilteredAndCalled(vc1) || !allGenotypesAreUnfilteredAndCalled(vc2))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static boolean allGenotypesAreUnfilteredAndCalled(VariantContext vc) {
|
||||
for (final Genotype gt : vc.getGenotypes()) {
|
||||
if (gt.isNoCall() || gt.isFiltered())
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) {
|
||||
// Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1:
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
||||
if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static boolean alleleSegregationIsKnown(Genotype gt1, Genotype gt2) {
|
||||
if (gt1.getPloidy() != gt2.getPloidy())
|
||||
return false;
|
||||
|
||||
/* If gt2 is phased or hom, then could even be MERGED with gt1 [This is standard].
|
||||
|
||||
HOWEVER, EVEN if this is not the case, but gt1.isHom(),
|
||||
it is trivially known that each of gt2's alleles segregate with the single allele type present in gt1.
|
||||
*/
|
||||
return (gt2.isPhased() || gt2.isHom() || gt1.isHom());
|
||||
}
|
||||
|
||||
static PhaseAndQuality calcPhaseForMergedGenotypes(Genotype gt1, Genotype gt2) {
|
||||
if (gt2.isPhased() || gt2.isHom())
|
||||
return new PhaseAndQuality(gt1); // maintain the phase of gt1
|
||||
|
||||
if (!gt1.isHom())
|
||||
throw new ReviewedStingException("alleleSegregationIsKnown(gt1, gt2) implies: gt2.genotypesArePhased() || gt2.isHom() || gt1.isHom()");
|
||||
|
||||
/* We're dealing with: gt1.isHom(), gt2.isHet(), !gt2.genotypesArePhased(); so, the merged (het) Genotype is not phased relative to the previous Genotype
|
||||
|
||||
For example, if we're merging the third Genotype with the second one:
|
||||
0/1
|
||||
1|1
|
||||
0/1
|
||||
|
||||
Then, we want to output:
|
||||
0/1
|
||||
1/2
|
||||
*/
|
||||
return new PhaseAndQuality(gt2); // maintain the phase of gt2 [since !gt2.genotypesArePhased()]
|
||||
}
|
||||
|
||||
static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
||||
List<Allele> site1Alleles = gt1.getAlleles();
|
||||
List<Allele> site2Alleles = gt2.getAlleles();
|
||||
|
||||
Iterator<Allele> all2It = site2Alleles.iterator();
|
||||
for (Allele all1 : site1Alleles) {
|
||||
Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()
|
||||
|
||||
if (all1.isNonReference() && all2.isNonReference()) // corresponding alleles are alternate
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) {
|
||||
// Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference):
|
||||
Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
|
||||
Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>();
|
||||
|
||||
// Note the segregation of the alleles for the reference genome:
|
||||
allele1ToAllele2.put(vc1.getReference(), vc2.getReference());
|
||||
allele2ToAllele1.put(vc2.getReference(), vc1.getReference());
|
||||
|
||||
// Note the segregation of the alleles for each sample (and check that it is consistent with the reference and all previous samples).
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
||||
List<Allele> site1Alleles = gt1.getAlleles();
|
||||
List<Allele> site2Alleles = gt2.getAlleles();
|
||||
|
||||
Iterator<Allele> all2It = site2Alleles.iterator();
|
||||
for (Allele all1 : site1Alleles) {
|
||||
Allele all2 = all2It.next();
|
||||
|
||||
Allele all1To2 = allele1ToAllele2.get(all1);
|
||||
if (all1To2 == null)
|
||||
allele1ToAllele2.put(all1, all2);
|
||||
else if (!all1To2.equals(all2)) // all1 segregates with two different alleles at site 2
|
||||
return false;
|
||||
|
||||
Allele all2To1 = allele2ToAllele1.get(all2);
|
||||
if (all2To1 == null)
|
||||
allele2ToAllele1.put(all2, all1);
|
||||
else if (!all2To1.equals(all1)) // all2 segregates with two different alleles at site 1
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
abstract static class AlleleMergeRule {
|
||||
// vc1, vc2 are ONLY passed to allelesShouldBeMerged() if mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2) AND allSamplesAreMergeable(vc1, vc2):
|
||||
abstract public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2);
|
||||
|
||||
public String toString() {
|
||||
return "all samples are mergeable";
|
||||
}
|
||||
}
|
||||
|
||||
static class AlleleOneAndTwo {
|
||||
private Allele all1;
|
||||
private Allele all2;
|
||||
|
||||
public AlleleOneAndTwo(Allele all1, Allele all2) {
|
||||
this.all1 = all1;
|
||||
this.all2 = all2;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return all1.hashCode() + all2.hashCode();
|
||||
}
|
||||
|
||||
public boolean equals(Object other) {
|
||||
if (!(other instanceof AlleleOneAndTwo))
|
||||
return false;
|
||||
|
||||
AlleleOneAndTwo otherAot = (AlleleOneAndTwo) other;
|
||||
return (this.all1.equals(otherAot.all1) && this.all2.equals(otherAot.all2));
|
||||
}
|
||||
}
|
||||
|
||||
static class MergedAllelesData {
|
||||
private Map<AlleleOneAndTwo, Allele> mergedAlleles;
|
||||
private byte[] intermediateBases;
|
||||
private int intermediateLength;
|
||||
|
||||
public MergedAllelesData(byte[] intermediateBases, VariantContext vc1, VariantContext vc2) {
|
||||
this.mergedAlleles = new HashMap<AlleleOneAndTwo, Allele>(); // implemented equals() and hashCode() for AlleleOneAndTwo
|
||||
this.intermediateBases = intermediateBases;
|
||||
this.intermediateLength = this.intermediateBases != null ? this.intermediateBases.length : 0;
|
||||
|
||||
this.ensureMergedAllele(vc1.getReference(), vc2.getReference(), true);
|
||||
}
|
||||
|
||||
public Allele ensureMergedAllele(Allele all1, Allele all2) {
|
||||
return ensureMergedAllele(all1, all2, false); // false <-> since even if all1+all2 = reference, it was already created in the constructor
|
||||
}
|
||||
|
||||
private Allele ensureMergedAllele(Allele all1, Allele all2, boolean creatingReferenceForFirstTime) {
|
||||
AlleleOneAndTwo all12 = new AlleleOneAndTwo(all1, all2);
|
||||
Allele mergedAllele = mergedAlleles.get(all12);
|
||||
|
||||
if (mergedAllele == null) {
|
||||
byte[] bases1 = all1.getBases();
|
||||
byte[] bases2 = all2.getBases();
|
||||
|
||||
byte[] mergedBases = new byte[bases1.length + intermediateLength + bases2.length];
|
||||
System.arraycopy(bases1, 0, mergedBases, 0, bases1.length);
|
||||
if (intermediateBases != null)
|
||||
System.arraycopy(intermediateBases, 0, mergedBases, bases1.length, intermediateLength);
|
||||
System.arraycopy(bases2, 0, mergedBases, bases1.length + intermediateLength, bases2.length);
|
||||
|
||||
mergedAllele = Allele.create(mergedBases, creatingReferenceForFirstTime);
|
||||
mergedAlleles.put(all12, mergedAllele);
|
||||
}
|
||||
|
||||
return mergedAllele;
|
||||
}
|
||||
|
||||
public Set<Allele> getAllMergedAlleles() {
|
||||
return new HashSet<Allele>(mergedAlleles.values());
|
||||
}
|
||||
}
|
||||
|
||||
static class PhaseAndQuality {
|
||||
public boolean isPhased;
|
||||
public Double PQ = null;
|
||||
|
||||
public PhaseAndQuality(Genotype gt) {
|
||||
this.isPhased = gt.isPhased();
|
||||
if (this.isPhased) {
|
||||
this.PQ = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
|
||||
if ( this.PQ == -1 ) this.PQ = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.phasing;
|
|||
/* PreciseNonNegativeDouble permits arithmetic operations on NON-NEGATIVE double values
|
||||
with precision (prevents underflow by representing in log10 space).
|
||||
*/
|
||||
public class PreciseNonNegativeDouble implements Comparable<PreciseNonNegativeDouble> {
|
||||
class PreciseNonNegativeDouble implements Comparable<PreciseNonNegativeDouble> {
|
||||
private static final double EQUALS_THRESH = 1e-6;
|
||||
private static final double INFINITY = Double.POSITIVE_INFINITY;
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,6 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.DisjointSet;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
|
|
@ -1052,7 +1051,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
private void writeVCF(VariantContext vc) {
|
||||
if (samplesToPhase == null || vc.isNotFiltered())
|
||||
//if ( samplesToPhase == null || (vc.isVariant() && vc.isNotFiltered())) // if we are only operating on specific samples, don't write out all sites, just those where the VC is variant
|
||||
WriteVCF.writeVCF(vc, writer, logger);
|
||||
writer.add(vc);
|
||||
}
|
||||
|
||||
public static boolean processVariantInPhasing(VariantContext vc) {
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||
|
||||
public class ReadBase {
|
||||
class ReadBase {
|
||||
public String readName;
|
||||
public byte base;
|
||||
public int mappingQual;
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
|
||||
public class ReadBasesAtPosition implements Iterable<ReadBase> {
|
||||
class ReadBasesAtPosition implements Iterable<ReadBase> {
|
||||
// list of: <read name, base>
|
||||
private LinkedList<ReadBase> bases;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,189 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/* Some methods for extracting RefSeq-related data from annotated VCF INFO fields:
|
||||
*/
|
||||
public class RefSeqDataParser {
|
||||
private static String REFSEQ_PREFIX = "refseq.";
|
||||
|
||||
private static String NUM_RECORDS_KEY = REFSEQ_PREFIX + "numMatchingRecords";
|
||||
private static String NAME_KEY = REFSEQ_PREFIX + "name";
|
||||
private static String NAME2_KEY = REFSEQ_PREFIX + "name2";
|
||||
|
||||
private static String[] NAME_KEYS = {NAME_KEY, NAME2_KEY};
|
||||
|
||||
private static Map<String, String> getRefSeqEntriesToNames(VariantContext vc, boolean getName2) {
|
||||
String nameKeyToUse = getName2 ? NAME2_KEY : NAME_KEY;
|
||||
String nameKeyToUseMultiplePrefix = nameKeyToUse + "_";
|
||||
|
||||
Map<String, String> entriesToNames = new HashMap<String, String>();
|
||||
int numRecords = vc.getAttributeAsInt(NUM_RECORDS_KEY, -1);
|
||||
if (numRecords != -1) {
|
||||
boolean done = false;
|
||||
|
||||
if (numRecords == 1) { // Check if perhaps the single record doesn't end with "_1":
|
||||
String name = vc.getAttributeAsString(nameKeyToUse, null);
|
||||
if (name != null) {
|
||||
entriesToNames.put(nameKeyToUse, name);
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!done) {
|
||||
for (int i = 1; i <= numRecords; i++) {
|
||||
String key = nameKeyToUseMultiplePrefix + i;
|
||||
String name = vc.getAttributeAsString(key, null);
|
||||
if (name != null)
|
||||
entriesToNames.put(key, name);
|
||||
}
|
||||
}
|
||||
}
|
||||
else { // no entry with the # of records:
|
||||
String name = vc.getAttributeAsString(nameKeyToUse, null);
|
||||
if (name != null) {
|
||||
entriesToNames.put(nameKeyToUse, name);
|
||||
}
|
||||
else { // Check all INFO fields for a match (if there are multiple entries):
|
||||
for (Map.Entry<String, Object> entry : vc.getAttributes().entrySet()) {
|
||||
String key = entry.getKey();
|
||||
if (key.startsWith(nameKeyToUseMultiplePrefix))
|
||||
entriesToNames.put(key, entry.getValue().toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
return entriesToNames;
|
||||
}
|
||||
|
||||
private static Map<String, String> getRefSeqEntriesToNames(VariantContext vc) {
|
||||
return getRefSeqEntriesToNames(vc, false);
|
||||
}
|
||||
|
||||
public static Set<String> getRefSeqNames(VariantContext vc, boolean getName2) {
|
||||
return new TreeSet<String>(getRefSeqEntriesToNames(vc, getName2).values());
|
||||
}
|
||||
|
||||
public static Set<String> getRefSeqNames(VariantContext vc) {
|
||||
return getRefSeqNames(vc, false);
|
||||
}
|
||||
|
||||
public static Map<String, Object> getMergedRefSeqNameAttributes(VariantContext vc1, VariantContext vc2) {
|
||||
Map<String, Object> refSeqNameAttribs = new HashMap<String, Object>();
|
||||
|
||||
Map<String, RefSeqEntry> entriesMap1 = getAllRefSeqEntriesByName(vc1);
|
||||
Map<String, RefSeqEntry> entriesMap2 = getAllRefSeqEntriesByName(vc2);
|
||||
|
||||
Set<String> commonNames = entriesMap1.keySet();
|
||||
commonNames.retainAll(entriesMap2.keySet());
|
||||
boolean addSuffix = commonNames.size() > 1;
|
||||
int nextCount = 1;
|
||||
|
||||
for (String name : commonNames) {
|
||||
RefSeqEntry refseq1 = entriesMap1.get(name);
|
||||
RefSeqEntry refseq2 = entriesMap2.get(name);
|
||||
|
||||
String keySuffix = "";
|
||||
if (addSuffix)
|
||||
keySuffix = "_" + nextCount;
|
||||
|
||||
boolean added = false;
|
||||
for (String key : NAME_KEYS) {
|
||||
Object obj1 = refseq1.info.get(key);
|
||||
Object obj2 = refseq2.info.get(key);
|
||||
if (obj1 != null && obj2 != null && obj1.equals(obj2)) {
|
||||
added = true;
|
||||
String useKey = key + keySuffix;
|
||||
refSeqNameAttribs.put(useKey, obj1);
|
||||
}
|
||||
}
|
||||
if (added)
|
||||
nextCount++;
|
||||
}
|
||||
int totalCount = nextCount - 1; // since incremented count one extra time
|
||||
if (totalCount > 1)
|
||||
refSeqNameAttribs.put(NUM_RECORDS_KEY, totalCount);
|
||||
|
||||
return refSeqNameAttribs;
|
||||
}
|
||||
|
||||
public static Map<String, Object> removeRefSeqAttributes(Map<String, Object> attributes) {
|
||||
Map<String, Object> removedRefSeqAttributes = new HashMap<String, Object>(attributes);
|
||||
|
||||
Iterator<Map.Entry<String, Object>> attrIt = removedRefSeqAttributes.entrySet().iterator();
|
||||
while (attrIt.hasNext()) {
|
||||
String key = attrIt.next().getKey();
|
||||
if (key.startsWith(REFSEQ_PREFIX))
|
||||
attrIt.remove();
|
||||
}
|
||||
|
||||
return removedRefSeqAttributes;
|
||||
}
|
||||
|
||||
private static Map<String, RefSeqEntry> getAllRefSeqEntriesByName(VariantContext vc) {
|
||||
Map<String, RefSeqEntry> nameToEntries = new TreeMap<String, RefSeqEntry>();
|
||||
|
||||
List<RefSeqEntry> allEntries = getAllRefSeqEntries(vc);
|
||||
for (RefSeqEntry entry : allEntries) {
|
||||
Object name = entry.info.get(NAME_KEY);
|
||||
if (name != null)
|
||||
nameToEntries.put(name.toString(), entry);
|
||||
}
|
||||
|
||||
return nameToEntries;
|
||||
}
|
||||
|
||||
// Returns a List of SEPARATE Map<refseq.ENTRY, refseq.VALUE> for EACH RefSeq annotation (i.e., each gene), stripping out the "_1", "_2", etc.
|
||||
private static List<RefSeqEntry> getAllRefSeqEntries(VariantContext vc) {
|
||||
List<RefSeqEntry> allRefSeq = new LinkedList<RefSeqEntry>();
|
||||
|
||||
for (Map.Entry<String, String> entryToName : getRefSeqEntriesToNames(vc).entrySet()) {
|
||||
String entry = entryToName.getKey();
|
||||
String entrySuffix = entry.replaceFirst(NAME_KEY, "");
|
||||
allRefSeq.add(new RefSeqEntry(vc, entrySuffix));
|
||||
}
|
||||
|
||||
return allRefSeq;
|
||||
}
|
||||
|
||||
private static class RefSeqEntry {
|
||||
public Map<String, Object> info;
|
||||
|
||||
public RefSeqEntry(VariantContext vc, String entrySuffix) {
|
||||
this.info = new HashMap<String, Object>();
|
||||
|
||||
for (Map.Entry<String, Object> attribEntry : vc.getAttributes().entrySet()) {
|
||||
String key = attribEntry.getKey();
|
||||
if (key.startsWith(REFSEQ_PREFIX) && key.endsWith(entrySuffix)) {
|
||||
String genericKey = key.replaceAll(entrySuffix, "");
|
||||
this.info.put(genericKey, attribEntry.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -28,7 +28,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
|
||||
public class SNPallelePair extends AllelePair {
|
||||
class SNPallelePair extends AllelePair {
|
||||
|
||||
public SNPallelePair(Genotype gt) {
|
||||
super(gt);
|
||||
|
|
|
|||
|
|
@ -1,34 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
public class WriteVCF {
|
||||
public static void writeVCF(VariantContext vc, VCFWriter writer, Logger logger) {
|
||||
writer.add(vc);
|
||||
}
|
||||
}
|
||||
|
|
@ -25,13 +25,10 @@ package org.broadinstitute.sting.utils.variantcontext;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.apache.commons.jexl2.Expression;
|
||||
import org.apache.commons.jexl2.JexlEngine;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
||||
import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -929,340 +926,4 @@ public class VariantContextUtils {
|
|||
public static final GenomeLoc getLocation(GenomeLocParser genomeLocParser,VariantContext vc) {
|
||||
return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true);
|
||||
}
|
||||
|
||||
public abstract static class AlleleMergeRule {
|
||||
// vc1, vc2 are ONLY passed to allelesShouldBeMerged() if mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2) AND allSamplesAreMergeable(vc1, vc2):
|
||||
abstract public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2);
|
||||
|
||||
public String toString() {
|
||||
return "all samples are mergeable";
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: returns null if vc1 and vc2 are not merged into a single MNP record
|
||||
|
||||
public static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile, AlleleMergeRule alleleMergeRule) {
|
||||
if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2))
|
||||
return null;
|
||||
|
||||
// Check that it's logically possible to merge the VCs:
|
||||
if (!allSamplesAreMergeable(vc1, vc2))
|
||||
return null;
|
||||
|
||||
// Check if there's a "point" in merging the VCs (e.g., annotations could be changed)
|
||||
if (!alleleMergeRule.allelesShouldBeMerged(vc1, vc2))
|
||||
return null;
|
||||
|
||||
return reallyMergeIntoMNP(vc1, vc2, referenceFile);
|
||||
}
|
||||
|
||||
private static VariantContext reallyMergeIntoMNP(VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
|
||||
int startInter = vc1.getEnd() + 1;
|
||||
int endInter = vc2.getStart() - 1;
|
||||
byte[] intermediateBases = null;
|
||||
if (startInter <= endInter) {
|
||||
intermediateBases = referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases();
|
||||
StringUtil.toUpperCase(intermediateBases);
|
||||
}
|
||||
MergedAllelesData mergeData = new MergedAllelesData(intermediateBases, vc1, vc2); // ensures that the reference allele is added
|
||||
|
||||
GenotypeCollection mergedGenotypes = GenotypeCollection.create();
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
||||
List<Allele> site1Alleles = gt1.getAlleles();
|
||||
List<Allele> site2Alleles = gt2.getAlleles();
|
||||
|
||||
List<Allele> mergedAllelesForSample = new LinkedList<Allele>();
|
||||
|
||||
/* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records,
|
||||
we preserve phase information (if any) relative to whatever precedes vc1:
|
||||
*/
|
||||
Iterator<Allele> all2It = site2Alleles.iterator();
|
||||
for (Allele all1 : site1Alleles) {
|
||||
Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()
|
||||
|
||||
Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2);
|
||||
mergedAllelesForSample.add(mergedAllele);
|
||||
}
|
||||
|
||||
double mergedGQ = Math.max(gt1.getNegLog10PError(), gt2.getNegLog10PError());
|
||||
Set<String> mergedGtFilters = new HashSet<String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered
|
||||
|
||||
Map<String, Object> mergedGtAttribs = new HashMap<String, Object>();
|
||||
PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2);
|
||||
if (phaseQual.PQ != null)
|
||||
mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);
|
||||
|
||||
Genotype mergedGt = new Genotype(gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased);
|
||||
mergedGenotypes.add(mergedGt);
|
||||
}
|
||||
|
||||
String mergedName = VariantContextUtils.mergeVariantContextNames(vc1.getSource(), vc2.getSource());
|
||||
double mergedNegLog10PError = Math.max(vc1.getNegLog10PError(), vc2.getNegLog10PError());
|
||||
Set<String> mergedFilters = new HashSet<String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered
|
||||
Map<String, Object> mergedAttribs = VariantContextUtils.mergeVariantContextAttributes(vc1, vc2);
|
||||
|
||||
// ids
|
||||
List<String> mergedIDs = new ArrayList<String>();
|
||||
if ( vc1.hasID() ) mergedIDs.add(vc1.getID());
|
||||
if ( vc2.hasID() ) mergedIDs.add(vc2.getID());
|
||||
String mergedID = Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs);
|
||||
|
||||
// TODO -- FIX ID
|
||||
VariantContext mergedVc = new VariantContext(mergedName, mergedID, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles(), mergedGenotypes, mergedNegLog10PError, mergedFilters, mergedAttribs);
|
||||
|
||||
mergedAttribs = new HashMap<String, Object>(mergedVc.getAttributes());
|
||||
VariantContextUtils.calculateChromosomeCounts(mergedVc, mergedAttribs, true);
|
||||
mergedVc = VariantContext.modifyAttributes(mergedVc, mergedAttribs);
|
||||
|
||||
return mergedVc;
|
||||
}
|
||||
|
||||
private static class AlleleOneAndTwo {
|
||||
private Allele all1;
|
||||
private Allele all2;
|
||||
|
||||
public AlleleOneAndTwo(Allele all1, Allele all2) {
|
||||
this.all1 = all1;
|
||||
this.all2 = all2;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return all1.hashCode() + all2.hashCode();
|
||||
}
|
||||
|
||||
public boolean equals(Object other) {
|
||||
if (!(other instanceof AlleleOneAndTwo))
|
||||
return false;
|
||||
|
||||
AlleleOneAndTwo otherAot = (AlleleOneAndTwo) other;
|
||||
return (this.all1.equals(otherAot.all1) && this.all2.equals(otherAot.all2));
|
||||
}
|
||||
}
|
||||
|
||||
private static class MergedAllelesData {
|
||||
private Map<AlleleOneAndTwo, Allele> mergedAlleles;
|
||||
private byte[] intermediateBases;
|
||||
private int intermediateLength;
|
||||
|
||||
public MergedAllelesData(byte[] intermediateBases, VariantContext vc1, VariantContext vc2) {
|
||||
this.mergedAlleles = new HashMap<AlleleOneAndTwo, Allele>(); // implemented equals() and hashCode() for AlleleOneAndTwo
|
||||
this.intermediateBases = intermediateBases;
|
||||
this.intermediateLength = this.intermediateBases != null ? this.intermediateBases.length : 0;
|
||||
|
||||
this.ensureMergedAllele(vc1.getReference(), vc2.getReference(), true);
|
||||
}
|
||||
|
||||
public Allele ensureMergedAllele(Allele all1, Allele all2) {
|
||||
return ensureMergedAllele(all1, all2, false); // false <-> since even if all1+all2 = reference, it was already created in the constructor
|
||||
}
|
||||
|
||||
private Allele ensureMergedAllele(Allele all1, Allele all2, boolean creatingReferenceForFirstTime) {
|
||||
AlleleOneAndTwo all12 = new AlleleOneAndTwo(all1, all2);
|
||||
Allele mergedAllele = mergedAlleles.get(all12);
|
||||
|
||||
if (mergedAllele == null) {
|
||||
byte[] bases1 = all1.getBases();
|
||||
byte[] bases2 = all2.getBases();
|
||||
|
||||
byte[] mergedBases = new byte[bases1.length + intermediateLength + bases2.length];
|
||||
System.arraycopy(bases1, 0, mergedBases, 0, bases1.length);
|
||||
if (intermediateBases != null)
|
||||
System.arraycopy(intermediateBases, 0, mergedBases, bases1.length, intermediateLength);
|
||||
System.arraycopy(bases2, 0, mergedBases, bases1.length + intermediateLength, bases2.length);
|
||||
|
||||
mergedAllele = Allele.create(mergedBases, creatingReferenceForFirstTime);
|
||||
mergedAlleles.put(all12, mergedAllele);
|
||||
}
|
||||
|
||||
return mergedAllele;
|
||||
}
|
||||
|
||||
public Set<Allele> getAllMergedAlleles() {
|
||||
return new HashSet<Allele>(mergedAlleles.values());
|
||||
}
|
||||
}
|
||||
|
||||
private static String mergeVariantContextNames(String name1, String name2) {
|
||||
return name1 + "_" + name2;
|
||||
}
|
||||
|
||||
private static Map<String, Object> mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) {
|
||||
Map<String, Object> mergedAttribs = new HashMap<String, Object>();
|
||||
|
||||
List<VariantContext> vcList = new LinkedList<VariantContext>();
|
||||
vcList.add(vc1);
|
||||
vcList.add(vc2);
|
||||
|
||||
String[] MERGE_OR_ATTRIBS = {VCFConstants.DBSNP_KEY};
|
||||
for (String orAttrib : MERGE_OR_ATTRIBS) {
|
||||
boolean attribVal = false;
|
||||
for (VariantContext vc : vcList) {
|
||||
attribVal = vc.getAttributeAsBoolean(orAttrib, false);
|
||||
if (attribVal) // already true, so no reason to continue:
|
||||
break;
|
||||
}
|
||||
mergedAttribs.put(orAttrib, attribVal);
|
||||
}
|
||||
|
||||
return mergedAttribs;
|
||||
}
|
||||
|
||||
private static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) {
|
||||
GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser, vc1);
|
||||
GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser, vc2);
|
||||
|
||||
if (!loc1.onSameContig(loc2))
|
||||
throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome");
|
||||
|
||||
if (!loc1.isBefore(loc2))
|
||||
throw new ReviewedStingException("Can only merge if vc1 is BEFORE vc2");
|
||||
|
||||
if (vc1.isFiltered() || vc2.isFiltered())
|
||||
return false;
|
||||
|
||||
if (!vc1.getSampleNames().equals(vc2.getSampleNames())) // vc1, vc2 refer to different sample sets
|
||||
return false;
|
||||
|
||||
if (!allGenotypesAreUnfilteredAndCalled(vc1) || !allGenotypesAreUnfilteredAndCalled(vc2))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static boolean allGenotypesAreUnfilteredAndCalled(VariantContext vc) {
|
||||
for (final Genotype gt : vc.getGenotypes()) {
|
||||
if (gt.isNoCall() || gt.isFiltered())
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Assumes that vc1 and vc2 were already checked to have the same sample names:
|
||||
|
||||
private static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) {
|
||||
// Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1:
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
||||
if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public static boolean alleleSegregationIsKnown(Genotype gt1, Genotype gt2) {
|
||||
if (gt1.getPloidy() != gt2.getPloidy())
|
||||
return false;
|
||||
|
||||
/* If gt2 is phased or hom, then could even be MERGED with gt1 [This is standard].
|
||||
|
||||
HOWEVER, EVEN if this is not the case, but gt1.isHom(),
|
||||
it is trivially known that each of gt2's alleles segregate with the single allele type present in gt1.
|
||||
*/
|
||||
return (gt2.isPhased() || gt2.isHom() || gt1.isHom());
|
||||
}
|
||||
|
||||
private static class PhaseAndQuality {
|
||||
public boolean isPhased;
|
||||
public Double PQ = null;
|
||||
|
||||
public PhaseAndQuality(Genotype gt) {
|
||||
this.isPhased = gt.isPhased();
|
||||
if (this.isPhased) {
|
||||
this.PQ = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
|
||||
if ( this.PQ == -1 ) this.PQ = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assumes that alleleSegregationIsKnown(gt1, gt2):
|
||||
|
||||
private static PhaseAndQuality calcPhaseForMergedGenotypes(Genotype gt1, Genotype gt2) {
|
||||
if (gt2.isPhased() || gt2.isHom())
|
||||
return new PhaseAndQuality(gt1); // maintain the phase of gt1
|
||||
|
||||
if (!gt1.isHom())
|
||||
throw new ReviewedStingException("alleleSegregationIsKnown(gt1, gt2) implies: gt2.genotypesArePhased() || gt2.isHom() || gt1.isHom()");
|
||||
|
||||
/* We're dealing with: gt1.isHom(), gt2.isHet(), !gt2.genotypesArePhased(); so, the merged (het) Genotype is not phased relative to the previous Genotype
|
||||
|
||||
For example, if we're merging the third Genotype with the second one:
|
||||
0/1
|
||||
1|1
|
||||
0/1
|
||||
|
||||
Then, we want to output:
|
||||
0/1
|
||||
1/2
|
||||
*/
|
||||
return new PhaseAndQuality(gt2); // maintain the phase of gt2 [since !gt2.genotypesArePhased()]
|
||||
}
|
||||
|
||||
/* Checks if any sample has a MNP of ALT alleles (segregating together):
|
||||
[Assumes that vc1 and vc2 were already checked to have the same sample names && allSamplesAreMergeable(vc1, vc2)]
|
||||
*/
|
||||
|
||||
public static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
||||
List<Allele> site1Alleles = gt1.getAlleles();
|
||||
List<Allele> site2Alleles = gt2.getAlleles();
|
||||
|
||||
Iterator<Allele> all2It = site2Alleles.iterator();
|
||||
for (Allele all1 : site1Alleles) {
|
||||
Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()
|
||||
|
||||
if (all1.isNonReference() && all2.isNonReference()) // corresponding alleles are alternate
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Checks if all samples are consistent in their haplotypes:
|
||||
[Assumes that vc1 and vc2 were already checked to have the same sample names && allSamplesAreMergeable(vc1, vc2)]
|
||||
*/
|
||||
|
||||
public static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) {
|
||||
// Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference):
|
||||
Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
|
||||
Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>();
|
||||
|
||||
// Note the segregation of the alleles for the reference genome:
|
||||
allele1ToAllele2.put(vc1.getReference(), vc2.getReference());
|
||||
allele2ToAllele1.put(vc2.getReference(), vc1.getReference());
|
||||
|
||||
// Note the segregation of the alleles for each sample (and check that it is consistent with the reference and all previous samples).
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
||||
List<Allele> site1Alleles = gt1.getAlleles();
|
||||
List<Allele> site2Alleles = gt2.getAlleles();
|
||||
|
||||
Iterator<Allele> all2It = site2Alleles.iterator();
|
||||
for (Allele all1 : site1Alleles) {
|
||||
Allele all2 = all2It.next();
|
||||
|
||||
Allele all1To2 = allele1ToAllele2.get(all1);
|
||||
if (all1To2 == null)
|
||||
allele1ToAllele2.put(all1, all2);
|
||||
else if (!all1To2.equals(all2)) // all1 segregates with two different alleles at site 2
|
||||
return false;
|
||||
|
||||
Allele all2To1 = allele2ToAllele1.get(all2);
|
||||
if (all2To1 == null)
|
||||
allele2ToAllele1.put(all2, all1);
|
||||
else if (!all2To1.equals(all1)) // all2 segregates with two different alleles at site 1
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue