Added validator for phasing using read information, e.g., PacBio: ReadBasedPhasingValidationWalker
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4940 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d203f5e39a
commit
4b37710bcd
|
|
@ -0,0 +1,110 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public abstract class BaseArray implements Comparable<BaseArray> {
|
||||||
|
protected Byte[] bases;
|
||||||
|
|
||||||
|
public BaseArray(byte[] bases) {
|
||||||
|
this.bases = new Byte[bases.length];
|
||||||
|
for (int i = 0; i < bases.length; i++)
|
||||||
|
this.bases[i] = bases[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
public BaseArray(Byte[] bases) {
|
||||||
|
this.bases = Arrays.copyOf(bases, bases.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BaseArray(int length) {
|
||||||
|
this.bases = new Byte[length];
|
||||||
|
Arrays.fill(this.bases, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BaseArray(BaseArray other) {
|
||||||
|
this(other.bases);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void updateBase(int index, Byte base) {
|
||||||
|
bases[index] = base;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Byte getBase(int index) {
|
||||||
|
return bases[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return bases.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder sb = new StringBuilder(bases.length);
|
||||||
|
for (Byte b : bases)
|
||||||
|
sb.append(b != null ? (char) b.byteValue() : "_");
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int compareTo(BaseArray that) {
|
||||||
|
int sz = this.bases.length;
|
||||||
|
if (sz != that.bases.length)
|
||||||
|
return (sz - that.bases.length);
|
||||||
|
|
||||||
|
for (int i = 0; i < sz; i++) {
|
||||||
|
Byte thisBase = this.getBase(i);
|
||||||
|
Byte thatBase = that.getBase(i);
|
||||||
|
if (thisBase == null || thatBase == null) {
|
||||||
|
if (thisBase == null && thatBase != null) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
else if (thisBase != null && thatBase == null) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (!BaseUtils.extendedBasesAreEqual(thisBase, thatBase)) {
|
||||||
|
return thisBase - thatBase;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int[] getNonNullIndices() {
|
||||||
|
List<Integer> nonNull = new LinkedList<Integer>();
|
||||||
|
for (int i = 0; i < bases.length; i++) {
|
||||||
|
if (getBase(i) != null)
|
||||||
|
nonNull.add(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
int[] nonNullArray = new int[nonNull.size()];
|
||||||
|
int index = 0;
|
||||||
|
for (int i : nonNull)
|
||||||
|
nonNullArray[index++] = i;
|
||||||
|
return nonNullArray;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,71 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public class Haplotype extends BaseArray implements Cloneable {
|
||||||
|
public Haplotype(byte[] bases) {
|
||||||
|
super(bases);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Haplotype(Byte[] bases) {
|
||||||
|
super(bases);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Haplotype(Haplotype other) {
|
||||||
|
super(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Haplotype(BaseArray baseArr) {
|
||||||
|
super(baseArr.bases);
|
||||||
|
|
||||||
|
if (baseArr.getNonNullIndices().length != baseArr.bases.length)
|
||||||
|
throw new ReviewedStingException("Should NEVER call Haplotype ctor with null bases!");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void updateBase(int index, Byte base) {
|
||||||
|
if (base == null) {
|
||||||
|
throw new ReviewedStingException("Internal error: CANNOT have null for a missing Haplotype base!");
|
||||||
|
}
|
||||||
|
super.updateBase(index, base);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Haplotype clone() {
|
||||||
|
try {
|
||||||
|
super.clone();
|
||||||
|
} catch (CloneNotSupportedException e) {
|
||||||
|
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||||
|
}
|
||||||
|
return new Haplotype(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a new Haplotype containing the portion of this Haplotype between the specified fromIndex, inclusive, and toIndex, exclusive.
|
||||||
|
|
||||||
|
public Haplotype subHaplotype(int fromIndex, int toIndex) {
|
||||||
|
return new Haplotype(Arrays.copyOfRange(bases, fromIndex, Math.min(toIndex, size())));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,93 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.PreciseNonNegativeDouble;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public class PhasingRead extends BaseArray {
|
||||||
|
private PreciseNonNegativeDouble mappingProb; // the probability that this read is mapped correctly
|
||||||
|
private PreciseNonNegativeDouble[] baseProbs; // the probabilities that the base identities are CORRECT
|
||||||
|
private PreciseNonNegativeDouble[] baseErrorProbs; // the probabilities that the base identities are INCORRECT
|
||||||
|
|
||||||
|
public PhasingRead(int length, int mappingQual) {
|
||||||
|
super(length);
|
||||||
|
|
||||||
|
this.mappingProb = new PreciseNonNegativeDouble(QualityUtils.qualToProb(mappingQual));
|
||||||
|
|
||||||
|
this.baseProbs = new PreciseNonNegativeDouble[length];
|
||||||
|
Arrays.fill(this.baseProbs, null);
|
||||||
|
|
||||||
|
this.baseErrorProbs = new PreciseNonNegativeDouble[length];
|
||||||
|
Arrays.fill(this.baseErrorProbs, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void updateBaseAndQuality(int index, Byte base, byte baseQual) {
|
||||||
|
updateBase(index, base);
|
||||||
|
|
||||||
|
double errProb = QualityUtils.qualToErrorProb(baseQual);
|
||||||
|
|
||||||
|
// The base error should be AT LEAST AS HIGH as the mapping error [equivalent to capping the base quality (BQ) by the mapping quality (MQ)]:
|
||||||
|
errProb = Math.max(errProb, 1.0 - mappingProb.getValue());
|
||||||
|
|
||||||
|
baseProbs[index] = new PreciseNonNegativeDouble(1.0 - errProb); // The probability that the true base is the base called in the read
|
||||||
|
baseErrorProbs[index] = new PreciseNonNegativeDouble(errProb / 3.0); // DIVIDE up the error probability EQUALLY over the 3 non-called bases
|
||||||
|
}
|
||||||
|
|
||||||
|
public PhasingScore matchHaplotypeClassScore(HaplotypeClass hapClass) {
|
||||||
|
PreciseNonNegativeDouble value = new PreciseNonNegativeDouble(0.0);
|
||||||
|
for (Haplotype h : hapClass)
|
||||||
|
value.plusEqual(matchHaplotypeScore(h));
|
||||||
|
|
||||||
|
return new PhasingScore(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
private PreciseNonNegativeDouble matchHaplotypeScore(Haplotype hap) {
|
||||||
|
PreciseNonNegativeDouble score = new PreciseNonNegativeDouble(1.0);
|
||||||
|
|
||||||
|
int sz = this.bases.length;
|
||||||
|
if (sz != hap.bases.length)
|
||||||
|
throw new ReviewedStingException("Read and Haplotype should have same length to be compared!");
|
||||||
|
|
||||||
|
// Technically, this HAS NO EFFECT since it is multiplied in for ALL haplotype pairs, but do so for completeness:
|
||||||
|
score.timesEqual(mappingProb);
|
||||||
|
|
||||||
|
for (int i = 0; i < sz; i++) {
|
||||||
|
Byte thisBase = this.getBase(i);
|
||||||
|
Byte hapBase = hap.getBase(i);
|
||||||
|
if (thisBase != null && hapBase != null) {
|
||||||
|
if (BaseUtils.basesAreEqual(thisBase, hapBase))
|
||||||
|
score.timesEqual(baseProbs[i]);
|
||||||
|
else
|
||||||
|
score.timesEqual(baseErrorProbs[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -21,7 +21,6 @@
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||||
|
|
||||||
import org.broad.tribble.util.variantcontext.Allele;
|
import org.broad.tribble.util.variantcontext.Allele;
|
||||||
|
|
@ -202,7 +201,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
mostDownstreamLocusReached = ref.getLocus();
|
mostDownstreamLocusReached = ref.getLocus();
|
||||||
if ( DEBUG ) logger.debug("map() at: " + mostDownstreamLocusReached);
|
if (DEBUG) logger.debug("map() at: " + mostDownstreamLocusReached);
|
||||||
|
|
||||||
PhasingStats phaseStats = new PhasingStats();
|
PhasingStats phaseStats = new PhasingStats();
|
||||||
List<VariantContext> unprocessedList = new LinkedList<VariantContext>();
|
List<VariantContext> unprocessedList = new LinkedList<VariantContext>();
|
||||||
|
|
@ -210,13 +209,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
boolean requireStartHere = true; // only see each VariantContext once
|
boolean requireStartHere = true; // only see each VariantContext once
|
||||||
boolean takeFirstOnly = false; // take as many entries as the VCF file has
|
boolean takeFirstOnly = false; // take as many entries as the VCF file has
|
||||||
for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) {
|
for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) {
|
||||||
if ( samplesToPhase != null ) vc = reduceVCToSamples(vc, samplesToPhase);
|
if (samplesToPhase != null) vc = reduceVCToSamples(vc, samplesToPhase);
|
||||||
|
|
||||||
if (ReadBackedPhasingWalker.processVariantInPhasing(vc)) {
|
if (ReadBackedPhasingWalker.processVariantInPhasing(vc)) {
|
||||||
VariantAndReads vr = new VariantAndReads(vc, context);
|
VariantAndReads vr = new VariantAndReads(vc, context);
|
||||||
unphasedSiteQueue.add(vr);
|
unphasedSiteQueue.add(vr);
|
||||||
|
|
||||||
if ( DEBUG ) logger.debug("Added variant to queue = " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vr.variant));
|
if (DEBUG)
|
||||||
|
logger.debug("Added variant to queue = " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vr.variant));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
unprocessedList.add(vc); // Finished with the unprocessed variant, and writer can enforce sorting on-the-fly
|
unprocessedList.add(vc); // Finished with the unprocessed variant, and writer can enforce sorting on-the-fly
|
||||||
|
|
@ -240,13 +240,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Set<String> KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet<String>(Arrays.asList("PQ"));
|
private static final Set<String> KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet<String>(Arrays.asList("PQ"));
|
||||||
|
|
||||||
private VariantContext reduceVCToSamples(VariantContext vc, List<String> samplesToPhase) {
|
private VariantContext reduceVCToSamples(VariantContext vc, List<String> samplesToPhase) {
|
||||||
// for ( String sample : samplesToPhase )
|
// for ( String sample : samplesToPhase )
|
||||||
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
|
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
|
||||||
VariantContext subvc = vc.subContextFromGenotypes(vc.getGenotypes(samplesToPhase).values());
|
VariantContext subvc = vc.subContextFromGenotypes(vc.getGenotypes(samplesToPhase).values());
|
||||||
// logger.debug("original VC = " + vc);
|
// logger.debug("original VC = " + vc);
|
||||||
// logger.debug("sub VC = " + subvc);
|
// logger.debug("sub VC = " + subvc);
|
||||||
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF );
|
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<VariantContext> processQueue(PhasingStats phaseStats, boolean processAll) {
|
private List<VariantContext> processQueue(PhasingStats phaseStats, boolean processAll) {
|
||||||
|
|
@ -255,7 +256,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
while (!unphasedSiteQueue.isEmpty()) {
|
while (!unphasedSiteQueue.isEmpty()) {
|
||||||
if (!processAll) { // otherwise, phase until the end of unphasedSiteQueue
|
if (!processAll) { // otherwise, phase until the end of unphasedSiteQueue
|
||||||
VariantContext nextToPhaseVc = unphasedSiteQueue.peek().variant;
|
VariantContext nextToPhaseVc = unphasedSiteQueue.peek().variant;
|
||||||
if (startDistancesAreInWindowRange(mostDownstreamLocusReached, VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),nextToPhaseVc))) {
|
if (startDistancesAreInWindowRange(mostDownstreamLocusReached, VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), nextToPhaseVc))) {
|
||||||
/* mostDownstreamLocusReached is still not far enough ahead of nextToPhaseVc to have all phasing information for nextToPhaseVc
|
/* mostDownstreamLocusReached is still not far enough ahead of nextToPhaseVc to have all phasing information for nextToPhaseVc
|
||||||
(note that we ASSUME that the VCF is ordered by <contig,locus>).
|
(note that we ASSUME that the VCF is ordered by <contig,locus>).
|
||||||
Note that this will always leave at least one entry (the last one), since mostDownstreamLocusReached is in range of itself.
|
Note that this will always leave at least one entry (the last one), since mostDownstreamLocusReached is in range of itself.
|
||||||
|
|
@ -266,16 +267,17 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
// Update partiallyPhasedSites before it's used in phaseSite:
|
// Update partiallyPhasedSites before it's used in phaseSite:
|
||||||
oldPhasedList.addAll(discardIrrelevantPhasedSites());
|
oldPhasedList.addAll(discardIrrelevantPhasedSites());
|
||||||
if ( DEBUG ) logger.debug("oldPhasedList(1st) = " + toStringVCL(oldPhasedList));
|
if (DEBUG) logger.debug("oldPhasedList(1st) = " + toStringVCL(oldPhasedList));
|
||||||
|
|
||||||
VariantAndReads vr = unphasedSiteQueue.remove();
|
VariantAndReads vr = unphasedSiteQueue.remove();
|
||||||
if ( DEBUG ) logger.debug("Performing phasing for " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vr.variant));
|
if (DEBUG)
|
||||||
|
logger.debug("Performing phasing for " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vr.variant));
|
||||||
phaseSite(vr, phaseStats);
|
phaseSite(vr, phaseStats);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update partiallyPhasedSites after phaseSite is done:
|
// Update partiallyPhasedSites after phaseSite is done:
|
||||||
oldPhasedList.addAll(discardIrrelevantPhasedSites());
|
oldPhasedList.addAll(discardIrrelevantPhasedSites());
|
||||||
if ( DEBUG ) logger.debug("oldPhasedList(2nd) = " + toStringVCL(oldPhasedList));
|
if (DEBUG) logger.debug("oldPhasedList(2nd) = " + toStringVCL(oldPhasedList));
|
||||||
|
|
||||||
if (outputMultipleBaseCountsWriter != null)
|
if (outputMultipleBaseCountsWriter != null)
|
||||||
outputMultipleBaseCountsWriter.outputMultipleBaseCounts();
|
outputMultipleBaseCountsWriter.outputMultipleBaseCounts();
|
||||||
|
|
@ -288,7 +290,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
|
|
||||||
GenomeLoc nextToPhaseLoc = null;
|
GenomeLoc nextToPhaseLoc = null;
|
||||||
if (!unphasedSiteQueue.isEmpty())
|
if (!unphasedSiteQueue.isEmpty())
|
||||||
nextToPhaseLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),unphasedSiteQueue.peek().variant);
|
nextToPhaseLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), unphasedSiteQueue.peek().variant);
|
||||||
|
|
||||||
while (!partiallyPhasedSites.isEmpty()) {
|
while (!partiallyPhasedSites.isEmpty()) {
|
||||||
if (nextToPhaseLoc != null) { // otherwise, unphasedSiteQueue.isEmpty(), and therefore no need to keep any of the "past"
|
if (nextToPhaseLoc != null) { // otherwise, unphasedSiteQueue.isEmpty(), and therefore no need to keep any of the "past"
|
||||||
|
|
@ -313,7 +315,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
|
|
||||||
private void phaseSite(VariantAndReads vr, PhasingStats phaseStats) {
|
private void phaseSite(VariantAndReads vr, PhasingStats phaseStats) {
|
||||||
VariantContext vc = vr.variant;
|
VariantContext vc = vr.variant;
|
||||||
logger.debug("Will phase vc = " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc));
|
logger.debug("Will phase vc = " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vc));
|
||||||
|
|
||||||
UnfinishedVariantAndReads uvr = new UnfinishedVariantAndReads(vr);
|
UnfinishedVariantAndReads uvr = new UnfinishedVariantAndReads(vr);
|
||||||
UnfinishedVariantContext uvc = uvr.unfinishedVariant;
|
UnfinishedVariantContext uvc = uvr.unfinishedVariant;
|
||||||
|
|
@ -325,7 +327,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
String samp = sampGtEntry.getKey();
|
String samp = sampGtEntry.getKey();
|
||||||
Genotype gt = sampGtEntry.getValue();
|
Genotype gt = sampGtEntry.getValue();
|
||||||
|
|
||||||
if ( DEBUG ) logger.debug("sample = " + samp);
|
if (DEBUG) logger.debug("sample = " + samp);
|
||||||
if (isUnfilteredCalledDiploidGenotype(gt)) {
|
if (isUnfilteredCalledDiploidGenotype(gt)) {
|
||||||
if (gt.isHom()) { // Note that this Genotype may be replaced later to contain the PQ of a downstream het site that was phased relative to a het site lying upstream of this hom site:
|
if (gt.isHom()) { // Note that this Genotype may be replaced later to contain the PQ of a downstream het site that was phased relative to a het site lying upstream of this hom site:
|
||||||
// true <-> can trivially phase a hom site relative to ANY previous site:
|
// true <-> can trivially phase a hom site relative to ANY previous site:
|
||||||
|
|
@ -336,7 +338,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
PhasingWindow phaseWindow = new PhasingWindow(vr, samp);
|
PhasingWindow phaseWindow = new PhasingWindow(vr, samp);
|
||||||
if (phaseWindow.hasPreviousHets()) { // Otherwise, nothing to phase this against
|
if (phaseWindow.hasPreviousHets()) { // Otherwise, nothing to phase this against
|
||||||
SNPallelePair allelePair = new SNPallelePair(gt);
|
SNPallelePair allelePair = new SNPallelePair(gt);
|
||||||
if ( DEBUG ) logger.debug("Want to phase TOP vs. BOTTOM for: " + "\n" + allelePair);
|
if (DEBUG) logger.debug("Want to phase TOP vs. BOTTOM for: " + "\n" + allelePair);
|
||||||
|
|
||||||
DoublyLinkedList.BidirectionalIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt;
|
DoublyLinkedList.BidirectionalIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt;
|
||||||
/* Notes:
|
/* Notes:
|
||||||
|
|
@ -350,15 +352,17 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality);
|
boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality);
|
||||||
|
|
||||||
if (pr.phasingContainsInconsistencies) {
|
if (pr.phasingContainsInconsistencies) {
|
||||||
if ( DEBUG ) logger.debug("MORE than " + (MAX_FRACTION_OF_INCONSISTENT_READS * 100) + "% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc));
|
if (DEBUG)
|
||||||
|
logger.debug("MORE than " + (MAX_FRACTION_OF_INCONSISTENT_READS * 100) + "% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vc));
|
||||||
uvc.setPhasingInconsistent();
|
uvc.setPhasingInconsistent();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (genotypesArePhased) {
|
if (genotypesArePhased) {
|
||||||
SNPallelePair prevAllelePair = new SNPallelePair(prevHetGenotype);
|
SNPallelePair prevAllelePair = new SNPallelePair(prevHetGenotype);
|
||||||
|
|
||||||
if ( DEBUG ) logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevAllelePair + "\n");
|
if (DEBUG)
|
||||||
if ( DEBUG ) logger.debug("THE PHASE CHOSEN HERE:\n" + allelePair + "\n\n");
|
logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevAllelePair + "\n");
|
||||||
|
if (DEBUG) logger.debug("THE PHASE CHOSEN HERE:\n" + allelePair + "\n\n");
|
||||||
|
|
||||||
ensurePhasing(allelePair, prevAllelePair, pr.haplotype);
|
ensurePhasing(allelePair, prevAllelePair, pr.haplotype);
|
||||||
Map<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
|
Map<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
|
||||||
|
|
@ -389,7 +393,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
if (statsWriter != null)
|
if (statsWriter != null)
|
||||||
statsWriter.addStat(samp, VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc), startDistance(prevUvc, vc), pr.phaseQuality, phaseWindow.readsAtHetSites.size(), phaseWindow.hetGenotypes.length);
|
statsWriter.addStat(samp, VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vc), startDistance(prevUvc, vc), pr.phaseQuality, phaseWindow.readsAtHetSites.size(), phaseWindow.hetGenotypes.length);
|
||||||
|
|
||||||
PhaseCounts sampPhaseCounts = samplePhaseStats.get(samp);
|
PhaseCounts sampPhaseCounts = samplePhaseStats.get(samp);
|
||||||
if (sampPhaseCounts == null) {
|
if (sampPhaseCounts == null) {
|
||||||
|
|
@ -436,7 +440,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
private Genotype[] hetGenotypes = null;
|
private Genotype[] hetGenotypes = null;
|
||||||
private DoublyLinkedList.BidirectionalIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = null;
|
private DoublyLinkedList.BidirectionalIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = null;
|
||||||
private int phasingSiteIndex = -1;
|
private int phasingSiteIndex = -1;
|
||||||
private Map<String, Read> readsAtHetSites = null;
|
private Map<String, PhasingRead> readsAtHetSites = null;
|
||||||
|
|
||||||
public boolean hasPreviousHets() {
|
public boolean hasPreviousHets() {
|
||||||
return phasingSiteIndex > 0;
|
return phasingSiteIndex > 0;
|
||||||
|
|
@ -458,7 +462,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
else if (gt.isHet()) {
|
else if (gt.isHet()) {
|
||||||
GenotypeAndReadBases grb = new GenotypeAndReadBases(gt, phasedVr.sampleReadBases.get(sample), phasedVr.unfinishedVariant.getLocation());
|
GenotypeAndReadBases grb = new GenotypeAndReadBases(gt, phasedVr.sampleReadBases.get(sample), phasedVr.unfinishedVariant.getLocation());
|
||||||
listHetGenotypes.add(grb);
|
listHetGenotypes.add(grb);
|
||||||
if ( DEBUG ) logger.debug("Using UPSTREAM het site = " + grb.loc);
|
if (DEBUG) logger.debug("Using UPSTREAM het site = " + grb.loc);
|
||||||
prevHetAndInteriorIt = phasedIt.clone();
|
prevHetAndInteriorIt = phasedIt.clone();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -471,10 +475,11 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
prevHetAndInteriorIt.previous(); // so that it points to the previous het site [and NOT one after it, due to the last call to next()]
|
prevHetAndInteriorIt.previous(); // so that it points to the previous het site [and NOT one after it, due to the last call to next()]
|
||||||
|
|
||||||
// Add the (het) position to be phased:
|
// Add the (het) position to be phased:
|
||||||
GenomeLoc phaseLocus = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vr.variant);
|
GenomeLoc phaseLocus = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vr.variant);
|
||||||
GenotypeAndReadBases grbPhase = new GenotypeAndReadBases(vr.variant.getGenotype(sample), vr.sampleReadBases.get(sample), phaseLocus);
|
GenotypeAndReadBases grbPhase = new GenotypeAndReadBases(vr.variant.getGenotype(sample), vr.sampleReadBases.get(sample), phaseLocus);
|
||||||
listHetGenotypes.add(grbPhase);
|
listHetGenotypes.add(grbPhase);
|
||||||
if ( DEBUG ) logger.debug("PHASING het site = " + grbPhase.loc + " [phasingSiteIndex = " + phasingSiteIndex + "]");
|
if (DEBUG)
|
||||||
|
logger.debug("PHASING het site = " + grbPhase.loc + " [phasingSiteIndex = " + phasingSiteIndex + "]");
|
||||||
|
|
||||||
// Include as-of-yet unphased sites in the phasing computation:
|
// Include as-of-yet unphased sites in the phasing computation:
|
||||||
for (VariantAndReads nextVr : unphasedSiteQueue) {
|
for (VariantAndReads nextVr : unphasedSiteQueue) {
|
||||||
|
|
@ -485,9 +490,9 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else if (gt.isHet()) {
|
else if (gt.isHet()) {
|
||||||
GenotypeAndReadBases grb = new GenotypeAndReadBases(gt, nextVr.sampleReadBases.get(sample), VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),nextVr.variant));
|
GenotypeAndReadBases grb = new GenotypeAndReadBases(gt, nextVr.sampleReadBases.get(sample), VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), nextVr.variant));
|
||||||
listHetGenotypes.add(grb);
|
listHetGenotypes.add(grb);
|
||||||
if ( DEBUG ) logger.debug("Using DOWNSTREAM het site = " + grb.loc);
|
if (DEBUG) logger.debug("Using DOWNSTREAM het site = " + grb.loc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -515,7 +520,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
buildReadsAtHetSites(listHetGenotypes, onlyKeepReads);
|
buildReadsAtHetSites(listHetGenotypes, onlyKeepReads);
|
||||||
|
|
||||||
// Copy to a fixed-size array:
|
// Copy to a fixed-size array:
|
||||||
if ( DEBUG ) logger.debug("FINAL phasing window of " + listHetGenotypes.size() + " sites:\n" + toStringGRL(listHetGenotypes));
|
if (DEBUG)
|
||||||
|
logger.debug("FINAL phasing window of " + listHetGenotypes.size() + " sites:\n" + toStringGRL(listHetGenotypes));
|
||||||
hetGenotypes = new Genotype[listHetGenotypes.size()];
|
hetGenotypes = new Genotype[listHetGenotypes.size()];
|
||||||
int index = 0;
|
int index = 0;
|
||||||
for (GenotypeAndReadBases copyGrb : listHetGenotypes)
|
for (GenotypeAndReadBases copyGrb : listHetGenotypes)
|
||||||
|
|
@ -531,7 +537,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
private void buildReadsAtHetSites(List<GenotypeAndReadBases> listHetGenotypes, String sample, GenomeLoc phasingLoc, Set<String> onlyKeepReads) {
|
private void buildReadsAtHetSites(List<GenotypeAndReadBases> listHetGenotypes, String sample, GenomeLoc phasingLoc, Set<String> onlyKeepReads) {
|
||||||
readsAtHetSites = new HashMap<String, Read>();
|
readsAtHetSites = new HashMap<String, PhasingRead>();
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
for (GenotypeAndReadBases grb : listHetGenotypes) {
|
for (GenotypeAndReadBases grb : listHetGenotypes) {
|
||||||
|
|
@ -542,9 +548,9 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
if (onlyKeepReads != null && !onlyKeepReads.contains(readName)) // if onlyKeepReads exists, ignore reads not in onlyKeepReads
|
if (onlyKeepReads != null && !onlyKeepReads.contains(readName)) // if onlyKeepReads exists, ignore reads not in onlyKeepReads
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
Read rd = readsAtHetSites.get(readName);
|
PhasingRead rd = readsAtHetSites.get(readName);
|
||||||
if (rd == null) {
|
if (rd == null) {
|
||||||
rd = new Read(listHetGenotypes.size(), rb.mappingQual);
|
rd = new PhasingRead(listHetGenotypes.size(), rb.mappingQual);
|
||||||
readsAtHetSites.put(readName, rd);
|
readsAtHetSites.put(readName, rd);
|
||||||
}
|
}
|
||||||
else if (outputMultipleBaseCountsWriter != null && rd.getBase(index) != null // rd already has a base at index
|
else if (outputMultipleBaseCountsWriter != null && rd.getBase(index) != null // rd already has a base at index
|
||||||
|
|
@ -558,13 +564,13 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
if ( DEBUG ) logger.debug("Number of sites in window = " + index);
|
if (DEBUG) logger.debug("Number of sites in window = " + index);
|
||||||
|
|
||||||
if ( DEBUG && logger.isDebugEnabled()) {
|
if (DEBUG && logger.isDebugEnabled()) {
|
||||||
logger.debug("ALL READS [phasingSiteIndex = " + phasingSiteIndex + "]:");
|
logger.debug("ALL READS [phasingSiteIndex = " + phasingSiteIndex + "]:");
|
||||||
for (Map.Entry<String, Read> nameToReads : readsAtHetSites.entrySet()) {
|
for (Map.Entry<String, PhasingRead> nameToReads : readsAtHetSites.entrySet()) {
|
||||||
String rdName = nameToReads.getKey();
|
String rdName = nameToReads.getKey();
|
||||||
Read rd = nameToReads.getValue();
|
PhasingRead rd = nameToReads.getValue();
|
||||||
logger.debug(rd + "\t" + rdName);
|
logger.debug(rd + "\t" + rdName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -620,16 +626,16 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
EdgeToReads edgeToReads = new EdgeToReads();
|
EdgeToReads edgeToReads = new EdgeToReads();
|
||||||
Set<Integer> sitesWithEdges = new TreeSet<Integer>();
|
Set<Integer> sitesWithEdges = new TreeSet<Integer>();
|
||||||
|
|
||||||
for (Map.Entry<String, Read> nameToReads : readsAtHetSites.entrySet()) {
|
for (Map.Entry<String, PhasingRead> nameToReads : readsAtHetSites.entrySet()) {
|
||||||
String rdName = nameToReads.getKey();
|
String rdName = nameToReads.getKey();
|
||||||
Read rd = nameToReads.getValue();
|
PhasingRead rd = nameToReads.getValue();
|
||||||
|
|
||||||
int[] siteInds = rd.getNonNullIndices();
|
int[] siteInds = rd.getNonNullIndices();
|
||||||
// Connect each pair of non-null sites in rd:
|
// Connect each pair of non-null sites in rd:
|
||||||
for (int i = 0; i < siteInds.length; i++) {
|
for (int i = 0; i < siteInds.length; i++) {
|
||||||
for (int j = i + 1; j < siteInds.length; j++) {
|
for (int j = i + 1; j < siteInds.length; j++) {
|
||||||
GraphEdge e = new GraphEdge(siteInds[i], siteInds[j]);
|
GraphEdge e = new GraphEdge(siteInds[i], siteInds[j]);
|
||||||
if ( DEBUG ) logger.debug("Read = " + rdName + " is adding edge: " + e);
|
if (DEBUG) logger.debug("Read = " + rdName + " is adding edge: " + e);
|
||||||
readGraph.addEdge(e);
|
readGraph.addEdge(e);
|
||||||
|
|
||||||
edgeToReads.addRead(e, rdName);
|
edgeToReads.addRead(e, rdName);
|
||||||
|
|
@ -639,7 +645,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ( DEBUG ) logger.debug("Read graph:\n" + readGraph);
|
if (DEBUG) logger.debug("Read graph:\n" + readGraph);
|
||||||
Set<String> keepReads = new HashSet<String>();
|
Set<String> keepReads = new HashSet<String>();
|
||||||
|
|
||||||
/* Check which Reads are involved in acyclic paths from (phasingSiteIndex - 1) to (phasingSiteIndex):
|
/* Check which Reads are involved in acyclic paths from (phasingSiteIndex - 1) to (phasingSiteIndex):
|
||||||
|
|
@ -692,7 +698,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
int cur = phasingSiteIndex;
|
int cur = phasingSiteIndex;
|
||||||
|
|
||||||
if (!readGraph.getConnectedComponents().inSameSet(prev, cur)) { // There is NO path between cur and prev
|
if (!readGraph.getConnectedComponents().inSameSet(prev, cur)) { // There is NO path between cur and prev
|
||||||
if ( DEBUG ) logger.debug("NO READ PATH between PHASE site [" + cur + "] and UPSTREAM site [" + prev + "]");
|
if (DEBUG)
|
||||||
|
logger.debug("NO READ PATH between PHASE site [" + cur + "] and UPSTREAM site [" + prev + "]");
|
||||||
readsAtHetSites.clear();
|
readsAtHetSites.clear();
|
||||||
return keepReads;
|
return keepReads;
|
||||||
}
|
}
|
||||||
|
|
@ -703,7 +710,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
IntegerSet[] removedSiteSameCCAsPrev = new IntegerSet[numHetSites];
|
IntegerSet[] removedSiteSameCCAsPrev = new IntegerSet[numHetSites];
|
||||||
IntegerSet[] removedSiteSameCCAsCur = new IntegerSet[numHetSites];
|
IntegerSet[] removedSiteSameCCAsCur = new IntegerSet[numHetSites];
|
||||||
for (int i : sitesWithEdges) {
|
for (int i : sitesWithEdges) {
|
||||||
if ( DEBUG ) logger.debug("Calculating CC after removing edges of site: " + i);
|
if (DEBUG) logger.debug("Calculating CC after removing edges of site: " + i);
|
||||||
|
|
||||||
// Remove all edges incident to i and see which positions have paths to prev and cur:
|
// Remove all edges incident to i and see which positions have paths to prev and cur:
|
||||||
Collection<GraphEdge> removedEdges = readGraph.removeAllIncidentEdges(i);
|
Collection<GraphEdge> removedEdges = readGraph.removeAllIncidentEdges(i);
|
||||||
|
|
@ -713,15 +720,15 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
removedSiteSameCCAsPrev[i] = new IntegerSet(ccAfterRemove.inSameSetAs(prev, sitesWithEdges));
|
removedSiteSameCCAsPrev[i] = new IntegerSet(ccAfterRemove.inSameSetAs(prev, sitesWithEdges));
|
||||||
removedSiteSameCCAsCur[i] = new IntegerSet(ccAfterRemove.inSameSetAs(cur, sitesWithEdges));
|
removedSiteSameCCAsCur[i] = new IntegerSet(ccAfterRemove.inSameSetAs(cur, sitesWithEdges));
|
||||||
|
|
||||||
if ( DEBUG ) logger.debug("Same CC as previous [" + prev + "]: " + removedSiteSameCCAsPrev[i]);
|
if (DEBUG) logger.debug("Same CC as previous [" + prev + "]: " + removedSiteSameCCAsPrev[i]);
|
||||||
if ( DEBUG ) logger.debug("Same CC as current [" + cur + "]: " + removedSiteSameCCAsCur[i]);
|
if (DEBUG) logger.debug("Same CC as current [" + cur + "]: " + removedSiteSameCCAsCur[i]);
|
||||||
|
|
||||||
// Add the removed edges back in:
|
// Add the removed edges back in:
|
||||||
readGraph.addEdges(removedEdges);
|
readGraph.addEdges(removedEdges);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (GraphEdge e : readGraph) {
|
for (GraphEdge e : readGraph) {
|
||||||
if ( DEBUG ) logger.debug("Testing the path-connectivity of Edge: " + e);
|
if (DEBUG) logger.debug("Testing the path-connectivity of Edge: " + e);
|
||||||
|
|
||||||
/* Edge e={v1,v2} contributes a path between prev and cur for testRead iff:
|
/* Edge e={v1,v2} contributes a path between prev and cur for testRead iff:
|
||||||
testRead[v1] != null, testRead[v2] != null, and there is a path from prev ---> v1 -> v2 ---> cur [or vice versa].
|
testRead[v1] != null, testRead[v2] != null, and there is a path from prev ---> v1 -> v2 ---> cur [or vice versa].
|
||||||
|
|
@ -746,13 +753,13 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
// Retain only the reads that contain an edge in a path connecting prev and cur:
|
// Retain only the reads that contain an edge in a path connecting prev and cur:
|
||||||
Iterator<Map.Entry<String, Read>> readIt = readsAtHetSites.entrySet().iterator();
|
Iterator<Map.Entry<String, PhasingRead>> readIt = readsAtHetSites.entrySet().iterator();
|
||||||
while (readIt.hasNext()) {
|
while (readIt.hasNext()) {
|
||||||
Map.Entry<String, Read> nameToReads = readIt.next();
|
Map.Entry<String, PhasingRead> nameToReads = readIt.next();
|
||||||
String rdName = nameToReads.getKey();
|
String rdName = nameToReads.getKey();
|
||||||
if (!keepReads.contains(rdName)) {
|
if (!keepReads.contains(rdName)) {
|
||||||
readIt.remove();
|
readIt.remove();
|
||||||
if ( DEBUG ) logger.debug("Removing extraneous read: " + rdName);
|
if (DEBUG) logger.debug("Removing extraneous read: " + rdName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -761,8 +768,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
|
|
||||||
private List<GenotypeAndReadBases> removeExtraneousSites(List<GenotypeAndReadBases> listHetGenotypes) {
|
private List<GenotypeAndReadBases> removeExtraneousSites(List<GenotypeAndReadBases> listHetGenotypes) {
|
||||||
Set<Integer> sitesWithReads = new HashSet<Integer>();
|
Set<Integer> sitesWithReads = new HashSet<Integer>();
|
||||||
for (Map.Entry<String, Read> nameToReads : readsAtHetSites.entrySet()) {
|
for (Map.Entry<String, PhasingRead> nameToReads : readsAtHetSites.entrySet()) {
|
||||||
Read rd = nameToReads.getValue();
|
PhasingRead rd = nameToReads.getValue();
|
||||||
for (int i : rd.getNonNullIndices())
|
for (int i : rd.getNonNullIndices())
|
||||||
sitesWithReads.add(i);
|
sitesWithReads.add(i);
|
||||||
}
|
}
|
||||||
|
|
@ -779,7 +786,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
if (keepSite || index == phasingSiteIndex || index == phasingSiteIndex - 1) {
|
if (keepSite || index == phasingSiteIndex || index == phasingSiteIndex - 1) {
|
||||||
keepHetSites.add(grb);
|
keepHetSites.add(grb);
|
||||||
if (!keepSite)
|
if (!keepSite)
|
||||||
if ( DEBUG ) logger.debug("Although current or previous sites have no relevant reads, continuing empty attempt to phase them [for sake of program flow]...");
|
if (DEBUG)
|
||||||
|
logger.debug("Although current or previous sites have no relevant reads, continuing empty attempt to phase them [for sake of program flow]...");
|
||||||
}
|
}
|
||||||
else if (index <= phasingSiteIndex)
|
else if (index <= phasingSiteIndex)
|
||||||
numPrecedingRemoved++;
|
numPrecedingRemoved++;
|
||||||
|
|
@ -792,7 +800,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<GenotypeAndReadBases> trimWindow(List<GenotypeAndReadBases> listHetGenotypes, String sample, GenomeLoc phaseLocus) {
|
private List<GenotypeAndReadBases> trimWindow(List<GenotypeAndReadBases> listHetGenotypes, String sample, GenomeLoc phaseLocus) {
|
||||||
if ( DEBUG) logger.warn("Trying to phase sample " + sample + " at locus " + phaseLocus + " within a window of " + cacheWindow + " bases yields " + listHetGenotypes.size() + " heterozygous sites to phase:\n" + toStringGRL(listHetGenotypes));
|
if (DEBUG)
|
||||||
|
logger.warn("Trying to phase sample " + sample + " at locus " + phaseLocus + " within a window of " + cacheWindow + " bases yields " + listHetGenotypes.size() + " heterozygous sites to phase:\n" + toStringGRL(listHetGenotypes));
|
||||||
|
|
||||||
int prevSiteIndex = phasingSiteIndex - 1; // index of previous in listHetGenotypes
|
int prevSiteIndex = phasingSiteIndex - 1; // index of previous in listHetGenotypes
|
||||||
int numToUse = maxPhaseSites - 2; // since always keep previous and current het sites!
|
int numToUse = maxPhaseSites - 2; // since always keep previous and current het sites!
|
||||||
|
|
@ -815,7 +824,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
int stopIndex = phasingSiteIndex + useOnRight + 1; // put the index 1 past the desired index to keep
|
int stopIndex = phasingSiteIndex + useOnRight + 1; // put the index 1 past the desired index to keep
|
||||||
phasingSiteIndex -= startIndex;
|
phasingSiteIndex -= startIndex;
|
||||||
listHetGenotypes = listHetGenotypes.subList(startIndex, stopIndex);
|
listHetGenotypes = listHetGenotypes.subList(startIndex, stopIndex);
|
||||||
if ( DEBUG ) logger.warn("NAIVELY REDUCED to " + listHetGenotypes.size() + " sites:\n" + toStringGRL(listHetGenotypes));
|
if (DEBUG)
|
||||||
|
logger.warn("NAIVELY REDUCED to " + listHetGenotypes.size() + " sites:\n" + toStringGRL(listHetGenotypes));
|
||||||
|
|
||||||
return listHetGenotypes;
|
return listHetGenotypes;
|
||||||
}
|
}
|
||||||
|
|
@ -831,9 +841,9 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
if (DEBUG && logger.isDebugEnabled()) {
|
if (DEBUG && logger.isDebugEnabled()) {
|
||||||
logger.debug("Number of USED reads [connecting the two positions to be phased] at sites: " + phaseWindow.readsAtHetSites.size());
|
logger.debug("Number of USED reads [connecting the two positions to be phased] at sites: " + phaseWindow.readsAtHetSites.size());
|
||||||
logger.debug("USED READS:");
|
logger.debug("USED READS:");
|
||||||
for (Map.Entry<String, Read> nameToReads : phaseWindow.readsAtHetSites.entrySet()) {
|
for (Map.Entry<String, PhasingRead> nameToReads : phaseWindow.readsAtHetSites.entrySet()) {
|
||||||
String rdName = nameToReads.getKey();
|
String rdName = nameToReads.getKey();
|
||||||
Read rd = nameToReads.getValue();
|
PhasingRead rd = nameToReads.getValue();
|
||||||
logger.debug(rd + "\t" + rdName);
|
logger.debug(rd + "\t" + rdName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -847,19 +857,20 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
double totalAbsPQchange = 0;
|
double totalAbsPQchange = 0;
|
||||||
int numPQchangesObserved = 0;
|
int numPQchangesObserved = 0;
|
||||||
|
|
||||||
for (Map.Entry<String, Read> nameToReads : phaseWindow.readsAtHetSites.entrySet()) {
|
for (Map.Entry<String, PhasingRead> nameToReads : phaseWindow.readsAtHetSites.entrySet()) {
|
||||||
Read rd = nameToReads.getValue();
|
PhasingRead rd = nameToReads.getValue();
|
||||||
if ( DEBUG ) logger.debug("\nrd = " + rd + "\tname = " + nameToReads.getKey());
|
if (DEBUG) logger.debug("\nrd = " + rd + "\tname = " + nameToReads.getKey());
|
||||||
|
|
||||||
for (PhasingTable.PhasingTableEntry pte : sampleHaps) {
|
for (PhasingTable.PhasingTableEntry pte : sampleHaps) {
|
||||||
PhasingScore score = rd.matchHaplotypeClassScore(pte.getHaplotypeClass());
|
PhasingScore score = rd.matchHaplotypeClassScore(pte.getHaplotypeClass());
|
||||||
pte.getScore().integrateReadScore(score);
|
pte.getScore().integrateReadScore(score);
|
||||||
if ( DEBUG ) logger.debug("score(" + rd + ", " + pte.getHaplotypeClass() + ") = " + score);
|
if (DEBUG) logger.debug("score(" + rd + ", " + pte.getHaplotypeClass() + ") = " + score);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check the current best haplotype assignment and compare it to the previous one:
|
// Check the current best haplotype assignment and compare it to the previous one:
|
||||||
MaxHaplotypeAndQuality curMaxHapAndQual = new MaxHaplotypeAndQuality(sampleHaps, false);
|
MaxHaplotypeAndQuality curMaxHapAndQual = new MaxHaplotypeAndQuality(sampleHaps, false);
|
||||||
if ( DEBUG ) logger.debug("CUR MAX hap:\t" + curMaxHapAndQual.maxEntry.getHaplotypeClass() + "\tcurPhaseQuality:\t" + curMaxHapAndQual.phaseQuality);
|
if (DEBUG)
|
||||||
|
logger.debug("CUR MAX hap:\t" + curMaxHapAndQual.maxEntry.getHaplotypeClass() + "\tcurPhaseQuality:\t" + curMaxHapAndQual.phaseQuality);
|
||||||
if (prevMaxHapAndQual != null) {
|
if (prevMaxHapAndQual != null) {
|
||||||
double changeInPQ = prevMaxHapAndQual.phaseQuality - curMaxHapAndQual.phaseQuality;
|
double changeInPQ = prevMaxHapAndQual.phaseQuality - curMaxHapAndQual.phaseQuality;
|
||||||
|
|
||||||
|
|
@ -867,7 +878,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
numHighQualityIterations++;
|
numHighQualityIterations++;
|
||||||
if (!curMaxHapAndQual.hasSameRepresentativeHaplotype(prevMaxHapAndQual) || // switched phase
|
if (!curMaxHapAndQual.hasSameRepresentativeHaplotype(prevMaxHapAndQual) || // switched phase
|
||||||
(numPQchangesObserved > 0 && changeInPQ > FRACTION_OF_MEAN_PQ_CHANGES * (totalAbsPQchange / numPQchangesObserved))) { // a "significant" decrease in PQ
|
(numPQchangesObserved > 0 && changeInPQ > FRACTION_OF_MEAN_PQ_CHANGES * (totalAbsPQchange / numPQchangesObserved))) { // a "significant" decrease in PQ
|
||||||
if ( DEBUG ) logger.debug("Inconsistent read found!");
|
if (DEBUG) logger.debug("Inconsistent read found!");
|
||||||
numInconsistentIterations++;
|
numInconsistentIterations++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -878,12 +889,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
prevMaxHapAndQual = curMaxHapAndQual;
|
prevMaxHapAndQual = curMaxHapAndQual;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( DEBUG ) logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n");
|
if (DEBUG) logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n");
|
||||||
MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, true);
|
MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, true);
|
||||||
double posteriorProb = maxHapQual.maxEntry.getScore().getValue();
|
double posteriorProb = maxHapQual.maxEntry.getScore().getValue();
|
||||||
|
|
||||||
if ( DEBUG ) logger.debug("MAX hap:\t" + maxHapQual.maxEntry.getHaplotypeClass() + "\tposteriorProb:\t" + posteriorProb + "\tphaseQuality:\t" + maxHapQual.phaseQuality);
|
if (DEBUG)
|
||||||
if ( DEBUG ) logger.debug("Number of used reads " + phaseWindow.readsAtHetSites.size() + "; number of high PQ iterations " + numHighQualityIterations + "; number of inconsistencies " + numInconsistentIterations);
|
logger.debug("MAX hap:\t" + maxHapQual.maxEntry.getHaplotypeClass() + "\tposteriorProb:\t" + posteriorProb + "\tphaseQuality:\t" + maxHapQual.phaseQuality);
|
||||||
|
if (DEBUG)
|
||||||
|
logger.debug("Number of used reads " + phaseWindow.readsAtHetSites.size() + "; number of high PQ iterations " + numHighQualityIterations + "; number of inconsistencies " + numInconsistentIterations);
|
||||||
|
|
||||||
boolean phasingContainsInconsistencies = false;
|
boolean phasingContainsInconsistencies = false;
|
||||||
if (numInconsistentIterations / (double) numHighQualityIterations > MAX_FRACTION_OF_INCONSISTENT_READS)
|
if (numInconsistentIterations / (double) numHighQualityIterations > MAX_FRACTION_OF_INCONSISTENT_READS)
|
||||||
|
|
@ -951,7 +964,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean startDistancesAreInWindowRange(VariantContext vc1, VariantContext vc2) {
|
private boolean startDistancesAreInWindowRange(VariantContext vc1, VariantContext vc2) {
|
||||||
return startDistancesAreInWindowRange(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc1), VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc2));
|
return startDistancesAreInWindowRange(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vc1), VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vc2));
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean startDistancesAreInWindowRange(GenomeLoc loc1, GenomeLoc loc2) {
|
private boolean startDistancesAreInWindowRange(GenomeLoc loc1, GenomeLoc loc2) {
|
||||||
|
|
@ -959,7 +972,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
private int startDistance(UnfinishedVariantContext uvc1, VariantContext vc2) {
|
private int startDistance(UnfinishedVariantContext uvc1, VariantContext vc2) {
|
||||||
return uvc1.getLocation().distance(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc2));
|
return uvc1.getLocation().distance(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vc2));
|
||||||
}
|
}
|
||||||
|
|
||||||
public PhasingStats reduce(PhasingStatsAndOutput statsAndList, PhasingStats stats) {
|
public PhasingStats reduce(PhasingStatsAndOutput statsAndList, PhasingStats stats) {
|
||||||
|
|
@ -1006,13 +1019,13 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
private void writeVCF(VariantContext vc) {
|
private void writeVCF(VariantContext vc) {
|
||||||
if ( samplesToPhase == null || vc.isNotFiltered() )
|
if (samplesToPhase == null || vc.isNotFiltered())
|
||||||
//if ( samplesToPhase == null || (vc.isVariant() && vc.isNotFiltered())) // if we are only operating on specific samples, don't write out all sites, just those where the VC is variant
|
//if ( samplesToPhase == null || (vc.isVariant() && vc.isNotFiltered())) // if we are only operating on specific samples, don't write out all sites, just those where the VC is variant
|
||||||
WriteVCF.writeVCF(vc, writer, logger);
|
WriteVCF.writeVCF(vc, writer, logger);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean processVariantInPhasing(VariantContext vc) {
|
public static boolean processVariantInPhasing(VariantContext vc) {
|
||||||
return vc.isNotFiltered() && ((vc.isSNP() && vc.isBiallelic()) || ! vc.isVariant()); // we can handle the non-variant case as well
|
return vc.isNotFiltered() && ((vc.isSNP() && vc.isBiallelic()) || !vc.isVariant()); // we can handle the non-variant case as well
|
||||||
//return isUnfilteredBiallelicSNP(vc);
|
//return isUnfilteredBiallelicSNP(vc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1140,47 +1153,11 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
else
|
else
|
||||||
sb.append(" -- ");
|
sb.append(" -- ");
|
||||||
|
|
||||||
sb.append(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc));
|
sb.append(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vc));
|
||||||
}
|
}
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class ReadBase {
|
|
||||||
public String readName;
|
|
||||||
public byte base;
|
|
||||||
public int mappingQual;
|
|
||||||
public byte baseQual;
|
|
||||||
|
|
||||||
public ReadBase(String readName, byte base, int mappingQual, byte baseQual) {
|
|
||||||
this.readName = readName;
|
|
||||||
this.base = base;
|
|
||||||
this.mappingQual = mappingQual;
|
|
||||||
this.baseQual = baseQual;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class ReadBasesAtPosition implements Iterable<ReadBase> {
|
|
||||||
// list of: <read name, base>
|
|
||||||
private LinkedList<ReadBase> bases;
|
|
||||||
|
|
||||||
public ReadBasesAtPosition() {
|
|
||||||
this.bases = new LinkedList<ReadBase>();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void putReadBase(PileupElement pue) {
|
|
||||||
ReadBase rb = new ReadBase(pue.getRead().getReadName(), pue.getBase(), pue.getMappingQual(), pue.getQual());
|
|
||||||
bases.add(rb);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Iterator<ReadBase> iterator() {
|
|
||||||
return bases.iterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return bases.isEmpty();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// THIS IMPLEMENTATION WILL FAIL WHEN NOT DEALING WITH SNP Alleles (e.g., MNP or INDEL), SINCE THEN THE Allele.getBases()
|
// THIS IMPLEMENTATION WILL FAIL WHEN NOT DEALING WITH SNP Alleles (e.g., MNP or INDEL), SINCE THEN THE Allele.getBases()
|
||||||
// FUNCTION WILL RETURN VARIABLE-LENGTH Byte ARRAYS. IN THAT CASE, BaseArray/Haplotype/Read WILL NEED TO BE REPLACED WITH
|
// FUNCTION WILL RETURN VARIABLE-LENGTH Byte ARRAYS. IN THAT CASE, BaseArray/Haplotype/Read WILL NEED TO BE REPLACED WITH
|
||||||
|
|
@ -1432,7 +1409,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
public void outputMultipleBaseCounts() {
|
public void outputMultipleBaseCounts() {
|
||||||
GenomeLoc nextToPhaseLoc = null;
|
GenomeLoc nextToPhaseLoc = null;
|
||||||
if (!unphasedSiteQueue.isEmpty())
|
if (!unphasedSiteQueue.isEmpty())
|
||||||
nextToPhaseLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),unphasedSiteQueue.peek().variant);
|
nextToPhaseLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), unphasedSiteQueue.peek().variant);
|
||||||
|
|
||||||
outputMultipleBaseCounts(nextToPhaseLoc);
|
outputMultipleBaseCounts(nextToPhaseLoc);
|
||||||
}
|
}
|
||||||
|
|
@ -1484,108 +1461,6 @@ class PhasingScore extends PreciseNonNegativeDouble {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract class BaseArray implements Comparable<BaseArray> {
|
|
||||||
protected Byte[] bases;
|
|
||||||
|
|
||||||
public BaseArray(byte[] bases) {
|
|
||||||
this.bases = new Byte[bases.length];
|
|
||||||
for (int i = 0; i < bases.length; i++)
|
|
||||||
this.bases[i] = bases[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
public BaseArray(Byte[] bases) {
|
|
||||||
this.bases = Arrays.copyOf(bases, bases.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
public BaseArray(int length) {
|
|
||||||
this.bases = new Byte[length];
|
|
||||||
Arrays.fill(this.bases, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
public BaseArray(BaseArray other) {
|
|
||||||
this(other.bases);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void updateBase(int index, Byte base) {
|
|
||||||
bases[index] = base;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Byte getBase(int index) {
|
|
||||||
return bases[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
public int size() {
|
|
||||||
return bases.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
StringBuilder sb = new StringBuilder(bases.length);
|
|
||||||
for (Byte b : bases)
|
|
||||||
sb.append(b != null ? (char) b.byteValue() : "_");
|
|
||||||
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public int compareTo(BaseArray that) {
|
|
||||||
int sz = this.bases.length;
|
|
||||||
if (sz != that.bases.length)
|
|
||||||
return (sz - that.bases.length);
|
|
||||||
|
|
||||||
for (int i = 0; i < sz; i++) {
|
|
||||||
Byte thisBase = this.getBase(i);
|
|
||||||
Byte thatBase = that.getBase(i);
|
|
||||||
if (thisBase == null || thatBase == null) {
|
|
||||||
if (thisBase == null && thatBase != null) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
else if (thisBase != null && thatBase == null) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (!BaseUtils.basesAreEqual(thisBase, thatBase)) {
|
|
||||||
return thisBase - thatBase;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class Haplotype extends BaseArray implements Cloneable {
|
|
||||||
public Haplotype(byte[] bases) {
|
|
||||||
super(bases);
|
|
||||||
}
|
|
||||||
|
|
||||||
private Haplotype(Byte[] bases) {
|
|
||||||
super(bases);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Haplotype(Haplotype other) {
|
|
||||||
super(other);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void updateBase(int index, Byte base) {
|
|
||||||
if (base == null) {
|
|
||||||
throw new ReviewedStingException("Internal error: CANNOT have null for a missing Haplotype base!");
|
|
||||||
}
|
|
||||||
super.updateBase(index, base);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Haplotype clone() {
|
|
||||||
try {
|
|
||||||
super.clone();
|
|
||||||
} catch (CloneNotSupportedException e) {
|
|
||||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
|
||||||
}
|
|
||||||
return new Haplotype(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns a new Haplotype containing the portion of this Haplotype between the specified fromIndex, inclusive, and toIndex, exclusive.
|
|
||||||
|
|
||||||
public Haplotype subHaplotype(int fromIndex, int toIndex) {
|
|
||||||
return new Haplotype(Arrays.copyOfRange(bases, fromIndex, Math.min(toIndex, size())));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class HaplotypeClass implements Iterable<Haplotype> {
|
class HaplotypeClass implements Iterable<Haplotype> {
|
||||||
private ArrayList<Haplotype> haps;
|
private ArrayList<Haplotype> haps;
|
||||||
private Haplotype rep;
|
private Haplotype rep;
|
||||||
|
|
@ -1619,82 +1494,6 @@ class HaplotypeClass implements Iterable<Haplotype> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class Read extends BaseArray {
|
|
||||||
private PreciseNonNegativeDouble mappingProb; // the probability that this read is mapped correctly
|
|
||||||
private PreciseNonNegativeDouble[] baseProbs; // the probabilities that the base identities are CORRECT
|
|
||||||
private PreciseNonNegativeDouble[] baseErrorProbs; // the probabilities that the base identities are INCORRECT
|
|
||||||
|
|
||||||
public Read(int length, int mappingQual) {
|
|
||||||
super(length);
|
|
||||||
|
|
||||||
this.mappingProb = new PreciseNonNegativeDouble(QualityUtils.qualToProb(mappingQual));
|
|
||||||
|
|
||||||
this.baseProbs = new PreciseNonNegativeDouble[length];
|
|
||||||
Arrays.fill(this.baseProbs, null);
|
|
||||||
|
|
||||||
this.baseErrorProbs = new PreciseNonNegativeDouble[length];
|
|
||||||
Arrays.fill(this.baseErrorProbs, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void updateBaseAndQuality(int index, Byte base, byte baseQual) {
|
|
||||||
updateBase(index, base);
|
|
||||||
|
|
||||||
double errProb = QualityUtils.qualToErrorProb(baseQual);
|
|
||||||
|
|
||||||
// The base error should be AT LEAST AS HIGH as the mapping error [equivalent to capping the base quality (BQ) by the mapping quality (MQ)]:
|
|
||||||
errProb = Math.max(errProb, 1.0 - mappingProb.getValue());
|
|
||||||
|
|
||||||
baseProbs[index] = new PreciseNonNegativeDouble(1.0 - errProb); // The probability that the true base is the base called in the read
|
|
||||||
baseErrorProbs[index] = new PreciseNonNegativeDouble(errProb / 3.0); // DIVIDE up the error probability EQUALLY over the 3 non-called bases
|
|
||||||
}
|
|
||||||
|
|
||||||
public PhasingScore matchHaplotypeClassScore(HaplotypeClass hapClass) {
|
|
||||||
PreciseNonNegativeDouble value = new PreciseNonNegativeDouble(0.0);
|
|
||||||
for (Haplotype h : hapClass)
|
|
||||||
value.plusEqual(matchHaplotypeScore(h));
|
|
||||||
|
|
||||||
return new PhasingScore(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
private PreciseNonNegativeDouble matchHaplotypeScore(Haplotype hap) {
|
|
||||||
PreciseNonNegativeDouble score = new PreciseNonNegativeDouble(1.0);
|
|
||||||
|
|
||||||
int sz = this.bases.length;
|
|
||||||
if (sz != hap.bases.length)
|
|
||||||
throw new ReviewedStingException("Read and Haplotype should have same length to be compared!");
|
|
||||||
|
|
||||||
// Technically, this HAS NO EFFECT since it is multiplied in for ALL haplotype pairs, but do so for completeness:
|
|
||||||
score.timesEqual(mappingProb);
|
|
||||||
|
|
||||||
for (int i = 0; i < sz; i++) {
|
|
||||||
Byte thisBase = this.getBase(i);
|
|
||||||
Byte hapBase = hap.getBase(i);
|
|
||||||
if (thisBase != null && hapBase != null) {
|
|
||||||
if (BaseUtils.basesAreEqual(thisBase, hapBase))
|
|
||||||
score.timesEqual(baseProbs[i]);
|
|
||||||
else
|
|
||||||
score.timesEqual(baseErrorProbs[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return score;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int[] getNonNullIndices() {
|
|
||||||
List<Integer> nonNull = new LinkedList<Integer>();
|
|
||||||
for (int i = 0; i < bases.length; i++) {
|
|
||||||
if (getBase(i) != null)
|
|
||||||
nonNull.add(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
int[] nonNullArray = new int[nonNull.size()];
|
|
||||||
int index = 0;
|
|
||||||
for (int i : nonNull)
|
|
||||||
nonNullArray[index++] = i;
|
|
||||||
return nonNullArray;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class PhasingStats {
|
class PhasingStats {
|
||||||
private int numReads;
|
private int numReads;
|
||||||
private int numVarSites;
|
private int numVarSites;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,38 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||||
|
|
||||||
|
public class ReadBase {
|
||||||
|
public String readName;
|
||||||
|
public byte base;
|
||||||
|
public int mappingQual;
|
||||||
|
public byte baseQual;
|
||||||
|
|
||||||
|
public ReadBase(String readName, byte base, int mappingQual, byte baseQual) {
|
||||||
|
this.readName = readName;
|
||||||
|
this.base = base;
|
||||||
|
this.mappingQual = mappingQual;
|
||||||
|
this.baseQual = baseQual;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,51 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
|
||||||
|
public class ReadBasesAtPosition implements Iterable<ReadBase> {
|
||||||
|
// list of: <read name, base>
|
||||||
|
private LinkedList<ReadBase> bases;
|
||||||
|
|
||||||
|
public ReadBasesAtPosition() {
|
||||||
|
this.bases = new LinkedList<ReadBase>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void putReadBase(PileupElement pue) {
|
||||||
|
ReadBase rb = new ReadBase(pue.getRead().getReadName(), pue.getBase(), pue.getMappingQual(), pue.getQual());
|
||||||
|
bases.add(rb);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterator<ReadBase> iterator() {
|
||||||
|
return bases.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return bases.isEmpty();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,402 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.phasing;
|
||||||
|
|
||||||
|
import org.broad.tribble.readers.AsciiLineReader;
|
||||||
|
import org.broad.tribble.util.variantcontext.Allele;
|
||||||
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.phasing.*;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Walks along all variant ROD loci and verifies the phasing from the reads for user-defined pairs of sites.
|
||||||
|
*/
|
||||||
|
@Allows(value = {DataSource.READS, DataSource.REFERENCE})
|
||||||
|
@Requires(value = {DataSource.READS, DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class))
|
||||||
|
@By(DataSource.READS)
|
||||||
|
|
||||||
|
@ReadFilters({ZeroMappingQualityReadFilter.class})
|
||||||
|
// Filter out all reads with zero mapping quality
|
||||||
|
|
||||||
|
public class ReadBasedPhasingValidationWalker extends RodWalker<Integer, Integer> {
|
||||||
|
private LinkedList<String> rodNames = null;
|
||||||
|
|
||||||
|
@Argument(fullName = "sitePairsFile", shortName = "sitePairsFile", doc = "File of pairs of variants for which phasing in ROD should be assessed using input reads", required = true)
|
||||||
|
protected File sitePairsFile = null;
|
||||||
|
|
||||||
|
@Output
|
||||||
|
protected PrintStream out;
|
||||||
|
|
||||||
|
private Set<SitePair> sitePairs = null;
|
||||||
|
private String sampleName = null;
|
||||||
|
|
||||||
|
SiteGenotypeAndReads prevSiteAndReads = null;
|
||||||
|
|
||||||
|
private final static int NUM_IN_PAIR = 2; // trivial
|
||||||
|
|
||||||
|
// enable deletions in the pileup
|
||||||
|
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
rodNames = new LinkedList<String>();
|
||||||
|
rodNames.add("variant");
|
||||||
|
|
||||||
|
sitePairs = new TreeSet<SitePair>();
|
||||||
|
GenomeLocParser locParser = getToolkit().getGenomeLocParser();
|
||||||
|
|
||||||
|
InputStream sitePairsStream = null;
|
||||||
|
try {
|
||||||
|
sitePairsStream = new FileInputStream(sitePairsFile);
|
||||||
|
}
|
||||||
|
catch (FileNotFoundException fnfe) {
|
||||||
|
fnfe.printStackTrace();
|
||||||
|
throw new UserException("Problem opening file: " + sitePairsFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
AsciiLineReader sitePairsReader = new AsciiLineReader(sitePairsStream);
|
||||||
|
while (true) {
|
||||||
|
String line = null;
|
||||||
|
try {
|
||||||
|
line = sitePairsReader.readLine();
|
||||||
|
}
|
||||||
|
catch (IOException ioe) {
|
||||||
|
ioe.printStackTrace();
|
||||||
|
throw new UserException("Problem reading file: " + sitePairsFile);
|
||||||
|
}
|
||||||
|
if (line == null)
|
||||||
|
break; // reached end of file
|
||||||
|
|
||||||
|
String[] twoSites = line.split("\t");
|
||||||
|
if (twoSites.length != 2)
|
||||||
|
throw new UserException("Must have PAIRS of sites in line " + line + " of " + sitePairsFile);
|
||||||
|
|
||||||
|
SitePair sp = new SitePair(locParser.parseGenomeLoc(twoSites[0]), locParser.parseGenomeLoc(twoSites[1]));
|
||||||
|
sitePairs.add(sp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean generateExtendedEvents() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param tracker the meta-data tracker
|
||||||
|
* @param ref the reference base
|
||||||
|
* @param context the context for the given locus
|
||||||
|
* @return statistics of and list of all phased VariantContexts and their base pileup that have gone out of cacheWindow range.
|
||||||
|
*/
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
if (tracker == null)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
boolean relevantSitePair = false;
|
||||||
|
SitePair sp = null;
|
||||||
|
if (prevSiteAndReads != null) {
|
||||||
|
// all vc's below start at ref.getLocus() [due to requireStartHere = true]:
|
||||||
|
sp = new SitePair(prevSiteAndReads.site, ref.getLocus());
|
||||||
|
relevantSitePair = sitePairs.contains(sp);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (context == null || !context.hasBasePileup())
|
||||||
|
return null;
|
||||||
|
ReadBackedPileup pileup = context.getBasePileup();
|
||||||
|
String nextName = null;
|
||||||
|
|
||||||
|
//
|
||||||
|
// TODO: ASSUMES THAT ALL READS COME FROM A SINGLE SAMPLE:
|
||||||
|
// TODO: CODE SHOULD BE AS FOLLOWS:
|
||||||
|
/*
|
||||||
|
Collection<String> sampNames = pileup.getSampleNames();
|
||||||
|
if (sampNames.size() != 1)
|
||||||
|
throw new UserException("Reads must be for exactly one sample [not multi-sample]");
|
||||||
|
nextName = sampNames.iterator().next();
|
||||||
|
if (nextName == null)
|
||||||
|
throw new UserException("Reads must be for exactly one sample");
|
||||||
|
|
||||||
|
if (sampleName == null)
|
||||||
|
sampleName = nextName;
|
||||||
|
else if (!nextName.equals(sampleName))
|
||||||
|
throw new UserException("Reads must have a single consistent sample name");
|
||||||
|
|
||||||
|
pileup = pileup.getPileupForSampleName(sampleName);
|
||||||
|
*/
|
||||||
|
//
|
||||||
|
|
||||||
|
ReadBasesAtPosition readBases = new ReadBasesAtPosition();
|
||||||
|
for (PileupElement p : pileup)
|
||||||
|
readBases.putReadBase(p);
|
||||||
|
|
||||||
|
ReadCounter rdCounts = null;
|
||||||
|
if (relevantSitePair) { // otherwise, processed the reads for their possible use in the future:
|
||||||
|
PhasingReadList buildReads = new PhasingReadList(NUM_IN_PAIR);
|
||||||
|
buildReads.updateBases(0, prevSiteAndReads.readBases);
|
||||||
|
buildReads.updateBases(1, readBases);
|
||||||
|
|
||||||
|
List<PhasingRead> reads = new LinkedList<PhasingRead>();
|
||||||
|
for (PhasingRead rd : buildReads) {
|
||||||
|
if (rd.getNonNullIndices().length == NUM_IN_PAIR) // only want reads with BOTH bases called [possibly as deleted ("D")]
|
||||||
|
reads.add(rd);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count the occurence of each "haplotype":
|
||||||
|
rdCounts = new ReadCounter();
|
||||||
|
for (PhasingRead rd : reads)
|
||||||
|
rdCounts.incrementCount(rd);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Now, read the ROD and note the genotypes and their phase to be validated:
|
||||||
|
Set<Haplotype> calledHaplotypes = null;
|
||||||
|
List<Haplotype> allPossibleHaplotypes = null;
|
||||||
|
|
||||||
|
boolean requireStartHere = true; // only see each VariantContext once
|
||||||
|
boolean takeFirstOnly = true; // take only the first entry from the ROD file
|
||||||
|
for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) {
|
||||||
|
if (vc.isFiltered() || !vc.isSNP())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (vc.getNSamples() != 1)
|
||||||
|
throw new UserException("ROD file must have exactly one sample [not multi-sample]");
|
||||||
|
nextName = vc.getSampleNames().iterator().next();
|
||||||
|
if (sampleName == null)
|
||||||
|
sampleName = nextName;
|
||||||
|
else if (!nextName.equals(sampleName))
|
||||||
|
throw new UserException("ROD must have a single consistent sample name");
|
||||||
|
|
||||||
|
Genotype gt = vc.getGenotype(sampleName);
|
||||||
|
|
||||||
|
if (relevantSitePair) {
|
||||||
|
Genotype prevGt = prevSiteAndReads.gt;
|
||||||
|
List<Allele> prevAlleles = prevGt.getAlleles();
|
||||||
|
List<Allele> curAlleles = gt.getAlleles();
|
||||||
|
|
||||||
|
calledHaplotypes = new TreeSet<Haplotype>(); // implemented Haplotype.compareTo()
|
||||||
|
if (gt.isPhased()) {
|
||||||
|
if (gt.getPloidy() != prevGt.getPloidy())
|
||||||
|
throw new UserException("Invalid ROD file: cannot be phased AND have different ploidys!");
|
||||||
|
|
||||||
|
// Consider only the haplotypes called to be phased
|
||||||
|
Iterator<Allele> curAllIt = curAlleles.iterator();
|
||||||
|
for (Allele prevAll : prevAlleles) {
|
||||||
|
Allele curAll = curAllIt.next();
|
||||||
|
calledHaplotypes.add(successiveAllelesToHaplotype(prevAll, curAll));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Consider EVERY combination of alleles as haplotypes [IF PHASED, this will give the contingency table in the CORRECT order]:
|
||||||
|
allPossibleHaplotypes = new LinkedList<Haplotype>();
|
||||||
|
for (Allele prevAll : prevAlleles) {
|
||||||
|
for (Allele curAll : curAlleles) {
|
||||||
|
allPossibleHaplotypes.add(successiveAllelesToHaplotype(prevAll, curAll));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
prevSiteAndReads = new SiteGenotypeAndReads(ref.getLocus(), gt, readBases);
|
||||||
|
}
|
||||||
|
|
||||||
|
int processedPairs = 0;
|
||||||
|
if (relevantSitePair) {
|
||||||
|
Map<Haplotype, Integer> haplotypeCounts = new TreeMap<Haplotype, Integer>(); // implemented Haplotype.compareTo()
|
||||||
|
|
||||||
|
processedPairs = 1;
|
||||||
|
int totalCount = rdCounts.totalCount();
|
||||||
|
System.out.println("\nPair: " + sp + " [# reads = " + totalCount + "]");
|
||||||
|
|
||||||
|
int matchCount = 0;
|
||||||
|
for (Map.Entry<PhasingRead, Integer> rdEntry : rdCounts.entrySet()) {
|
||||||
|
PhasingRead read = rdEntry.getKey();
|
||||||
|
int count = rdEntry.getValue();
|
||||||
|
|
||||||
|
Haplotype readsHaplotype = new Haplotype(read);
|
||||||
|
haplotypeCounts.put(readsHaplotype, count);
|
||||||
|
|
||||||
|
boolean readMatchesCalledHaplotype = calledHaplotypes != null && calledHaplotypes.contains(readsHaplotype);
|
||||||
|
if (readMatchesCalledHaplotype)
|
||||||
|
matchCount += count;
|
||||||
|
|
||||||
|
System.out.println("read" + ": " + read + (readMatchesCalledHaplotype ? "*" : "") + "\tcount: " + count);
|
||||||
|
}
|
||||||
|
|
||||||
|
double percentMatchingReads = 100 * (matchCount / (double) totalCount);
|
||||||
|
System.out.println("% MATCHING reads: " + percentMatchingReads + " [of " + totalCount + " TOTAL reads]");
|
||||||
|
|
||||||
|
out.print(sp);
|
||||||
|
for (Haplotype hap : allPossibleHaplotypes) {
|
||||||
|
Integer count = haplotypeCounts.get(hap);
|
||||||
|
if (count == null) // haplotype may not have been observed in ANY reads
|
||||||
|
count = 0;
|
||||||
|
|
||||||
|
out.print("\t" + count);
|
||||||
|
}
|
||||||
|
out.println();
|
||||||
|
}
|
||||||
|
|
||||||
|
return processedPairs;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Haplotype successiveAllelesToHaplotype(Allele prevAll, Allele curAll) {
|
||||||
|
byte prevBase = SNPallelePair.getSingleBase(prevAll);
|
||||||
|
byte curBase = SNPallelePair.getSingleBase(curAll);
|
||||||
|
|
||||||
|
byte[] hapBases = new byte[NUM_IN_PAIR];
|
||||||
|
hapBases[0] = prevBase;
|
||||||
|
hapBases[1] = curBase;
|
||||||
|
return new Haplotype(hapBases);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduce(Integer addIn, Integer runningCount) {
|
||||||
|
if (addIn == null)
|
||||||
|
addIn = 0;
|
||||||
|
|
||||||
|
return runningCount + addIn;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param result the number of reads and VariantContexts seen.
|
||||||
|
*/
|
||||||
|
public void onTraversalDone(Integer result) {
|
||||||
|
System.out.println("Validated " + result + " pairs of sites.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class SitePair implements Comparable<SitePair> {
|
||||||
|
public GenomeLoc site1;
|
||||||
|
public GenomeLoc site2;
|
||||||
|
|
||||||
|
public SitePair(GenomeLoc site1, GenomeLoc site2) {
|
||||||
|
if (site1.size() > 1 || site2.size() > 1)
|
||||||
|
throw new UserException("Must give pairs of SINGLE-LOCUS record start sites");
|
||||||
|
|
||||||
|
this.site1 = site1;
|
||||||
|
this.site2 = site2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return site1.toString() + "\t" + site2.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int compareTo(SitePair other) {
|
||||||
|
int comp1 = site1.compareTo(other.site1);
|
||||||
|
if (comp1 != 0)
|
||||||
|
return comp1;
|
||||||
|
|
||||||
|
return site2.compareTo(other.site2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class SiteGenotypeAndReads {
|
||||||
|
public GenomeLoc site;
|
||||||
|
public Genotype gt;
|
||||||
|
public ReadBasesAtPosition readBases;
|
||||||
|
|
||||||
|
public SiteGenotypeAndReads(GenomeLoc site, Genotype gt, ReadBasesAtPosition readBases) {
|
||||||
|
this.site = site;
|
||||||
|
this.gt = gt;
|
||||||
|
this.readBases = readBases;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class PhasingReadList implements Iterable<PhasingRead> {
|
||||||
|
private Map<String, PhasingRead> readsAtSites = null;
|
||||||
|
private int numSites;
|
||||||
|
|
||||||
|
public PhasingReadList(int numSites) {
|
||||||
|
this.readsAtSites = new HashMap<String, PhasingRead>();
|
||||||
|
this.numSites = numSites;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void updateBases(int index, ReadBasesAtPosition readBases) {
|
||||||
|
if (readBases == null)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (ReadBase rb : readBases) {
|
||||||
|
String readName = rb.readName;
|
||||||
|
|
||||||
|
PhasingRead rd = readsAtSites.get(readName);
|
||||||
|
if (rd == null) {
|
||||||
|
rd = new PhasingRead(numSites, rb.mappingQual);
|
||||||
|
readsAtSites.put(readName, rd);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Arbitrarily updates to the last base observed for this sample and read (rb.base):
|
||||||
|
rd.updateBaseAndQuality(index, rb.base, rb.baseQual);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterator<PhasingRead> iterator() {
|
||||||
|
return readsAtSites.values().iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return readsAtSites.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class ReadCounter {
|
||||||
|
private Map<PhasingRead, Integer> counts;
|
||||||
|
private int totalCount;
|
||||||
|
|
||||||
|
public ReadCounter() {
|
||||||
|
this.counts = new TreeMap<PhasingRead, Integer>(); // implemented PhasingRead.compareTo()
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrementCount(PhasingRead rd) {
|
||||||
|
Integer cnt = counts.get(rd);
|
||||||
|
if (cnt == null)
|
||||||
|
cnt = 0;
|
||||||
|
|
||||||
|
counts.put(rd, cnt + 1);
|
||||||
|
totalCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<Map.Entry<PhasingRead, Integer>> entrySet() {
|
||||||
|
return counts.entrySet();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int totalCount() {
|
||||||
|
return totalCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -93,6 +93,10 @@ public class BaseUtils {
|
||||||
return simpleBaseToBaseIndex(base1) == simpleBaseToBaseIndex(base2);
|
return simpleBaseToBaseIndex(base1) == simpleBaseToBaseIndex(base2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static public boolean extendedBasesAreEqual(byte base1, byte base2) {
|
||||||
|
return extendedBaseToBaseIndex(base1) == extendedBaseToBaseIndex(base2);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts a IUPAC nucleotide code to a pair of bases
|
* Converts a IUPAC nucleotide code to a pair of bases
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue