Changed ReadBackedPhasing to be a RodWalker (more efficient, since it is ROD-focused)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4117 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6eb1559c1d
commit
41e53d37e1
|
|
@ -52,11 +52,12 @@ import static org.broadinstitute.sting.utils.vcf.VCFUtils.getVCFHeadersFromRods;
|
||||||
* Walks along all loci, caching a user-defined window of VariantContext sites, and then finishes phasing them when they go out of range (using downstream reads).
|
* Walks along all loci, caching a user-defined window of VariantContext sites, and then finishes phasing them when they go out of range (using downstream reads).
|
||||||
* Use '-BTI variant' to only stop at positions in the VCF file bound to 'variant'.
|
* Use '-BTI variant' to only stop at positions in the VCF file bound to 'variant'.
|
||||||
*/
|
*/
|
||||||
@Requires(value = {}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class))
|
@Allows(value = {DataSource.READS, DataSource.REFERENCE})
|
||||||
|
@Requires(value = {DataSource.READS, DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class))
|
||||||
|
|
||||||
@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality
|
@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality
|
||||||
|
|
||||||
public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput, PhasingStats> {
|
public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, PhasingStats> {
|
||||||
|
|
||||||
@Output(doc="File to which variants should be written",required=true)
|
@Output(doc="File to which variants should be written",required=true)
|
||||||
protected VCFWriter writer = null;
|
protected VCFWriter writer = null;
|
||||||
|
|
@ -124,7 +125,7 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
* @return statistics of and list of all phased VariantContexts and their base pileup that have gone out of cacheWindow range.
|
* @return statistics of and list of all phased VariantContexts and their base pileup that have gone out of cacheWindow range.
|
||||||
*/
|
*/
|
||||||
public PhasingStatsAndOutput map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public PhasingStatsAndOutput map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if (tracker == null)
|
if (tracker == null || ref == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
PhasingStats phaseStats = new PhasingStats();
|
PhasingStats phaseStats = new PhasingStats();
|
||||||
|
|
@ -138,6 +139,7 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
|
|
||||||
VariantAndReads vr = new VariantAndReads(vc, context, processVariant);
|
VariantAndReads vr = new VariantAndReads(vc, context, processVariant);
|
||||||
unphasedSiteQueue.add(vr);
|
unphasedSiteQueue.add(vr);
|
||||||
|
logger.debug("Added variant to queue = " + VariantContextUtils.getLocation(vr.variant));
|
||||||
|
|
||||||
int numReads = 0;
|
int numReads = 0;
|
||||||
if (context.hasBasePileup()) {
|
if (context.hasBasePileup()) {
|
||||||
|
|
@ -155,11 +157,13 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<VariantContext> processQueue(PhasingStats phaseStats, boolean processAll) {
|
private List<VariantContext> processQueue(PhasingStats phaseStats, boolean processAll) {
|
||||||
|
List<VariantContext> oldPhasedList = new LinkedList<VariantContext>();
|
||||||
|
|
||||||
|
if (!unphasedSiteQueue.isEmpty()) {
|
||||||
GenomeLoc lastLocus = null;
|
GenomeLoc lastLocus = null;
|
||||||
if (!processAll && !unphasedSiteQueue.isEmpty())
|
if (!processAll)
|
||||||
lastLocus = VariantContextUtils.getLocation(unphasedSiteQueue.peekLast().variant);
|
lastLocus = VariantContextUtils.getLocation(unphasedSiteQueue.peekLast().variant);
|
||||||
|
|
||||||
List<VariantContext> oldPhasedList = new LinkedList<VariantContext>();
|
|
||||||
while (!unphasedSiteQueue.isEmpty()) {
|
while (!unphasedSiteQueue.isEmpty()) {
|
||||||
if (!processAll) { // otherwise, phase until the end of unphasedSiteQueue
|
if (!processAll) { // otherwise, phase until the end of unphasedSiteQueue
|
||||||
VariantContext nextToPhaseVc = unphasedSiteQueue.peek().variant;
|
VariantContext nextToPhaseVc = unphasedSiteQueue.peek().variant;
|
||||||
|
|
@ -170,16 +174,19 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
}
|
}
|
||||||
// Already saw all variant positions within cacheWindow distance ahead of vc (on its contig)
|
// Already saw all variant positions within cacheWindow distance ahead of vc (on its contig)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update phasedSites before it's used in finalizePhasing:
|
// Update phasedSites before it's used in finalizePhasing:
|
||||||
oldPhasedList.addAll(discardIrrelevantPhasedSites());
|
oldPhasedList.addAll(discardIrrelevantPhasedSites());
|
||||||
|
logger.debug("oldPhasedList(1) = " + toStringVCL(oldPhasedList));
|
||||||
|
|
||||||
VariantAndReads phasedVr = finalizePhasing(unphasedSiteQueue.remove(), phaseStats);
|
VariantAndReads phasedVr = finalizePhasing(unphasedSiteQueue.remove(), phaseStats);
|
||||||
|
logger.debug("Finalized phasing for " + VariantContextUtils.getLocation(phasedVr.variant));
|
||||||
phasedSites.add(phasedVr);
|
phasedSites.add(phasedVr);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Update phasedSites after finalizePhasing is done:
|
// Update phasedSites after finalizePhasing is done:
|
||||||
oldPhasedList.addAll(discardIrrelevantPhasedSites());
|
oldPhasedList.addAll(discardIrrelevantPhasedSites());
|
||||||
|
logger.debug("oldPhasedList(2) = " + toStringVCL(oldPhasedList));
|
||||||
return oldPhasedList;
|
return oldPhasedList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -193,10 +200,8 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
while (!phasedSites.isEmpty()) {
|
while (!phasedSites.isEmpty()) {
|
||||||
VariantAndReads phasedVr = phasedSites.peek();
|
VariantAndReads phasedVr = phasedSites.peek();
|
||||||
VariantContext phasedVc = phasedVr.variant;
|
VariantContext phasedVc = phasedVr.variant;
|
||||||
if (nextToPhaseVc != null && phasedVr.processVariant &&
|
if (nextToPhaseVc != null && phasedVr.processVariant && isInWindowRange(phasedVc, nextToPhaseVc)) {
|
||||||
(isInWindowRange(phasedVc, nextToPhaseVc) // nextToPhaseVc is still not far enough ahead of phasedVc to exclude phasedVc from calculations
|
// nextToPhaseVc is still not far enough ahead of phasedVc to exclude phasedVc from calculations
|
||||||
// since ALWAYS want the previous site to be included for phasing nextToPhaseVc:
|
|
||||||
|| (phasedSites.size() == 1 && VariantContextUtils.getLocation(phasedVc).onSameContig(VariantContextUtils.getLocation(nextToPhaseVc))))) {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
vcList.add(phasedSites.remove().variant);
|
vcList.add(phasedSites.remove().variant);
|
||||||
|
|
@ -205,8 +210,8 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
return vcList;
|
return vcList;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Finalize phasing of vc (head of unphasedSiteQueue) using all VariantContext objects in
|
/* Phase vc (removed head of unphasedSiteQueue) using all VariantContext objects in
|
||||||
phasedSites and all in unphasedSiteQueue that are within cacheWindow distance ahead of vc (on its contig).
|
phasedSites, and all in unphasedSiteQueue that are within cacheWindow distance ahead of vc (on its contig).
|
||||||
|
|
||||||
ASSUMES: All VariantContexts in unphasedSiteQueue are in positions downstream of vc (head of queue).
|
ASSUMES: All VariantContexts in unphasedSiteQueue are in positions downstream of vc (head of queue).
|
||||||
*/
|
*/
|
||||||
|
|
@ -214,12 +219,9 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
if (!vr.processVariant)
|
if (!vr.processVariant)
|
||||||
return vr; // return vr as is
|
return vr; // return vr as is
|
||||||
|
|
||||||
VariantContext vc = vr.variant;
|
|
||||||
logger.debug("Will phase vc = " + VariantContextUtils.getLocation(vc));
|
|
||||||
|
|
||||||
// Find the previous VariantContext (that was processed and phased):
|
// Find the previous VariantContext (that was processed and phased):
|
||||||
VariantAndReads prevVr = null;
|
VariantAndReads prevVr = null;
|
||||||
Iterator<VariantAndReads> backwardsIt = phasedSites.descendingIterator();
|
Iterator<VariantAndReads> backwardsIt = phasedSites.descendingIterator(); // look at most recently phased sites
|
||||||
while (backwardsIt.hasNext()) {
|
while (backwardsIt.hasNext()) {
|
||||||
VariantAndReads backVr = backwardsIt.next();
|
VariantAndReads backVr = backwardsIt.next();
|
||||||
if (backVr.processVariant) {
|
if (backVr.processVariant) {
|
||||||
|
|
@ -227,11 +229,13 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
boolean hasPreviousSite = (prevVr != null);
|
if (prevVr == null)
|
||||||
|
return vr; // return vr as is, since cannot phase against "nothing" (vc is at the beginning of the chromosome, or the previous was so far back it was removed from phasedSites)
|
||||||
|
|
||||||
LinkedList<VariantAndReads> windowVaList = null;
|
VariantContext vc = vr.variant;
|
||||||
if (hasPreviousSite) {
|
logger.debug("Will phase vc = " + VariantContextUtils.getLocation(vc));
|
||||||
windowVaList = new LinkedList<VariantAndReads>();
|
|
||||||
|
LinkedList<VariantAndReads> windowVaList = new LinkedList<VariantAndReads>();
|
||||||
|
|
||||||
// Include previously phased sites in the phasing computation:
|
// Include previously phased sites in the phasing computation:
|
||||||
for (VariantAndReads phasedVr : phasedSites) {
|
for (VariantAndReads phasedVr : phasedSites) {
|
||||||
|
|
@ -254,7 +258,6 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
for (VariantAndReads phaseInfoVr : windowVaList)
|
for (VariantAndReads phaseInfoVr : windowVaList)
|
||||||
logger.debug("Using phaseInfoVc = " + VariantContextUtils.getLocation(phaseInfoVr.variant));
|
logger.debug("Using phaseInfoVc = " + VariantContextUtils.getLocation(phaseInfoVr.variant));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
logger.debug("");
|
logger.debug("");
|
||||||
|
|
||||||
Map<String, Genotype> sampGenotypes = vc.getGenotypes();
|
Map<String, Genotype> sampGenotypes = vc.getGenotypes();
|
||||||
|
|
@ -271,12 +274,11 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
Biallele biall = new Biallele(gt);
|
Biallele biall = new Biallele(gt);
|
||||||
HashMap<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
|
HashMap<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
|
||||||
|
|
||||||
if (hasPreviousSite && gt.isHet()) {
|
if (gt.isHet()) {
|
||||||
VariantContext prevVc = prevVr.variant;
|
VariantContext prevVc = prevVr.variant;
|
||||||
Genotype prevGenotype = prevVc.getGenotype(samp);
|
Genotype prevGenotype = prevVc.getGenotype(samp);
|
||||||
if (prevGenotype.isHet()) { //otherwise, can trivially phase
|
if (prevGenotype.isHet()) { //otherwise, can trivially phase
|
||||||
logger.debug("NON-TRIVIALLY CARE about TOP vs. BOTTOM for: ");
|
logger.debug("NON-TRIVIALLY CARE about TOP vs. BOTTOM for: " + "\n" + biall);
|
||||||
logger.debug("\n" + biall);
|
|
||||||
|
|
||||||
List<VariantAndReads> sampleWindowVaList = new LinkedList<VariantAndReads>();
|
List<VariantAndReads> sampleWindowVaList = new LinkedList<VariantAndReads>();
|
||||||
int phasingSiteIndex = -1;
|
int phasingSiteIndex = -1;
|
||||||
|
|
@ -288,7 +290,7 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
sampleWindowVaList.add(phaseInfoVr);
|
sampleWindowVaList.add(phaseInfoVr);
|
||||||
if (phasingSiteIndex == -1) {
|
if (phasingSiteIndex == -1) {
|
||||||
if (phaseInfoVr == vr)
|
if (phaseInfoVr == vr)
|
||||||
phasingSiteIndex = currentIndex;
|
phasingSiteIndex = currentIndex; // index of vr in sampleWindowVaList
|
||||||
else
|
else
|
||||||
currentIndex++;
|
currentIndex++;
|
||||||
}
|
}
|
||||||
|
|
@ -299,21 +301,22 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
throw new StingException("Internal error: could NOT find vr and/or prevVr!");
|
throw new StingException("Internal error: could NOT find vr and/or prevVr!");
|
||||||
|
|
||||||
if (sampleWindowVaList.size() > maxPhaseSites) {
|
if (sampleWindowVaList.size() > maxPhaseSites) {
|
||||||
logger.warn("Trying to phase sample " + samp + " at locus " + VariantContextUtils.getLocation(vc) + " within a window of " + cacheWindow + " bases yields " + sampleWindowVaList.size() + " heterozygous sites to phase -- REDUCING to " + maxPhaseSites + " sites:\n" + toString(sampleWindowVaList));
|
logger.warn("Trying to phase sample " + samp + " at locus " + VariantContextUtils.getLocation(vc) + " within a window of " + cacheWindow + " bases yields " + sampleWindowVaList.size() + " heterozygous sites to phase:\n" + toStringVRL(sampleWindowVaList));
|
||||||
|
|
||||||
int prevSiteIndex = phasingSiteIndex - 1;
|
int prevSiteIndex = phasingSiteIndex - 1; // index of prevVr in sampleWindowVaList
|
||||||
int numToUse = maxPhaseSites - 2; // since always keep prevVr and vr
|
int numToUse = maxPhaseSites - 2; // since always keep prevVr and vr
|
||||||
int halfToUse = new Double(Math.floor(numToUse / 2.0)).intValue();
|
|
||||||
|
|
||||||
int numOnLeft = prevSiteIndex;
|
int numOnLeft = prevSiteIndex;
|
||||||
int numOnRight = sampleWindowVaList.size() - (phasingSiteIndex + 1);
|
int numOnRight = sampleWindowVaList.size() - (phasingSiteIndex + 1);
|
||||||
|
|
||||||
int useOnLeft, useOnRight;
|
int useOnLeft, useOnRight;
|
||||||
if (numOnLeft <= numOnRight) {
|
if (numOnLeft <= numOnRight) {
|
||||||
|
int halfToUse = new Double(Math.floor(numToUse / 2.0)).intValue(); // skimp on the left [floor], and be generous with the right side
|
||||||
useOnLeft = Math.min(halfToUse, numOnLeft);
|
useOnLeft = Math.min(halfToUse, numOnLeft);
|
||||||
useOnRight = Math.min(numToUse - useOnLeft, numOnRight);
|
useOnRight = Math.min(numToUse - useOnLeft, numOnRight);
|
||||||
}
|
}
|
||||||
else { // numOnRight < numOnLeft
|
else { // numOnRight < numOnLeft
|
||||||
|
int halfToUse = new Double(Math.ceil(numToUse / 2.0)).intValue(); // be generous with the right side [ceil]
|
||||||
useOnRight = Math.min(halfToUse, numOnRight);
|
useOnRight = Math.min(halfToUse, numOnRight);
|
||||||
useOnLeft = Math.min(numToUse - useOnRight, numOnLeft);
|
useOnLeft = Math.min(numToUse - useOnRight, numOnLeft);
|
||||||
}
|
}
|
||||||
|
|
@ -321,7 +324,7 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
int stopIndex = phasingSiteIndex + useOnRight + 1; // put the index 1 past the desired index to keep
|
int stopIndex = phasingSiteIndex + useOnRight + 1; // put the index 1 past the desired index to keep
|
||||||
phasingSiteIndex -= startIndex;
|
phasingSiteIndex -= startIndex;
|
||||||
sampleWindowVaList = sampleWindowVaList.subList(startIndex, stopIndex);
|
sampleWindowVaList = sampleWindowVaList.subList(startIndex, stopIndex);
|
||||||
logger.warn("REDUCED to " + sampleWindowVaList.size() + " sites:\n" + toString(sampleWindowVaList));
|
logger.warn("REDUCED to " + sampleWindowVaList.size() + " sites:\n" + toStringVRL(sampleWindowVaList));
|
||||||
}
|
}
|
||||||
|
|
||||||
PhaseResult pr = phaseSample(samp, sampleWindowVaList, phasingSiteIndex);
|
PhaseResult pr = phaseSample(samp, sampleWindowVaList, phasingSiteIndex);
|
||||||
|
|
@ -329,8 +332,8 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
if (genotypesArePhased) {
|
if (genotypesArePhased) {
|
||||||
Biallele prevBiall = new Biallele(prevGenotype);
|
Biallele prevBiall = new Biallele(prevGenotype);
|
||||||
|
|
||||||
logger.debug("CHOSEN PHASE FOR PREVIOUS:\n" + prevBiall + "\n");
|
logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevBiall + "\n");
|
||||||
logger.debug("CHOSE PHASE:\n" + biall + "\n\n");
|
logger.debug("THE PHASE CHOSEN HERE:\n" + biall + "\n\n");
|
||||||
|
|
||||||
ensurePhasing(biall, prevBiall, pr.haplotype);
|
ensurePhasing(biall, prevBiall, pr.haplotype);
|
||||||
gtAttribs.put("PQ", pr.phaseQuality);
|
gtAttribs.put("PQ", pr.phaseQuality);
|
||||||
|
|
@ -370,6 +373,8 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
LinkedList<ReadBasesAtPosition> allPositions = new LinkedList<ReadBasesAtPosition>();
|
LinkedList<ReadBasesAtPosition> allPositions = new LinkedList<ReadBasesAtPosition>();
|
||||||
for (VariantAndReads phaseInfoVr : variantList) {
|
for (VariantAndReads phaseInfoVr : variantList) {
|
||||||
ReadBasesAtPosition readBases = phaseInfoVr.sampleReadBases.get(sample);
|
ReadBasesAtPosition readBases = phaseInfoVr.sampleReadBases.get(sample);
|
||||||
|
if (readBases == null)
|
||||||
|
readBases = new ReadBasesAtPosition(); // for transparency, put an empty list of bases at this position for sample
|
||||||
allPositions.add(readBases);
|
allPositions.add(readBases);
|
||||||
}
|
}
|
||||||
HashMap<String, Read> allReads = convertReadBasesAtPositionToReads(allPositions);
|
HashMap<String, Read> allReads = convertReadBasesAtPositionToReads(allPositions);
|
||||||
|
|
@ -540,7 +545,6 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
/*
|
/*
|
||||||
Inner classes:
|
Inner classes:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private static class Biallele {
|
private static class Biallele {
|
||||||
public Allele top;
|
public Allele top;
|
||||||
public Allele bottom;
|
public Allele bottom;
|
||||||
|
|
@ -653,7 +657,7 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String toString(List<VariantAndReads> vrList) {
|
private static String toStringVRL(List<VariantAndReads> vrList) {
|
||||||
boolean first = true;
|
boolean first = true;
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for (VariantAndReads vr : vrList) {
|
for (VariantAndReads vr : vrList) {
|
||||||
|
|
@ -667,6 +671,20 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static String toStringVCL(List<VariantContext> vcList) {
|
||||||
|
boolean first = true;
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (VariantContext vc : vcList) {
|
||||||
|
if (first)
|
||||||
|
first = false;
|
||||||
|
else
|
||||||
|
sb.append(" -- ");
|
||||||
|
|
||||||
|
sb.append(VariantContextUtils.getLocation(vc));
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
private static class ReadBase {
|
private static class ReadBase {
|
||||||
public String readName;
|
public String readName;
|
||||||
public byte base;
|
public byte base;
|
||||||
|
|
@ -914,6 +932,8 @@ public class ReadBackedPhasingWalker extends LocusWalker<PhasingStatsAndOutput,
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class PhasingScore extends PreciseNonNegativeDouble {
|
class PhasingScore extends PreciseNonNegativeDouble {
|
||||||
public PhasingScore(double score) {
|
public PhasingScore(double score) {
|
||||||
super(score);
|
super(score);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue