From 41e53d37e1782721f8bbdc1ee06275c3fc1e69d6 Mon Sep 17 00:00:00 2001 From: fromer Date: Wed, 25 Aug 2010 19:43:57 +0000 Subject: [PATCH] Changed ReadBackedPhasing to be a RodWalker (more efficient, since it is ROD-focused) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4117 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/ReadBackedPhasingWalker.java | 150 ++++++++++-------- 1 file changed, 85 insertions(+), 65 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java index 35d9c3bee..a3569e475 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java @@ -52,11 +52,12 @@ import static org.broadinstitute.sting.utils.vcf.VCFUtils.getVCFHeadersFromRods; * Walks along all loci, caching a user-defined window of VariantContext sites, and then finishes phasing them when they go out of range (using downstream reads). * Use '-BTI variant' to only stop at positions in the VCF file bound to 'variant'. */ -@Requires(value = {}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class)) +@Allows(value = {DataSource.READS, DataSource.REFERENCE}) +@Requires(value = {DataSource.READS, DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class)) @ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality -public class ReadBackedPhasingWalker extends LocusWalker { +public class ReadBackedPhasingWalker extends RodWalker { @Output(doc="File to which variants should be written",required=true) protected VCFWriter writer = null; @@ -124,7 +125,7 @@ public class ReadBackedPhasingWalker extends LocusWalker windowVaList = null; - if (hasPreviousSite) { - windowVaList = new LinkedList(); + VariantContext vc = vr.variant; + logger.debug("Will phase vc = " + VariantContextUtils.getLocation(vc)); - // Include previously phased sites in the phasing computation: - for (VariantAndReads phasedVr : phasedSites) { - if (phasedVr.processVariant) - windowVaList.add(phasedVr); - } + LinkedList windowVaList = new LinkedList(); - // Add position to be phased: - windowVaList.add(vr); + // Include previously phased sites in the phasing computation: + for (VariantAndReads phasedVr : phasedSites) { + if (phasedVr.processVariant) + windowVaList.add(phasedVr); + } - // Include as of yet unphased sites in the phasing computation: - for (VariantAndReads nextVr : unphasedSiteQueue) { - if (!isInWindowRange(vc, nextVr.variant)) //nextVr too far ahead of the range used for phasing vc - break; - if (nextVr.processVariant) // include in the phasing computation - windowVaList.add(nextVr); - } + // Add position to be phased: + windowVaList.add(vr); - if (logger.isDebugEnabled()) { - for (VariantAndReads phaseInfoVr : windowVaList) - logger.debug("Using phaseInfoVc = " + VariantContextUtils.getLocation(phaseInfoVr.variant)); - } + // Include as of yet unphased sites in the phasing computation: + for (VariantAndReads nextVr : unphasedSiteQueue) { + if (!isInWindowRange(vc, nextVr.variant)) //nextVr too far ahead of the range used for phasing vc + break; + if (nextVr.processVariant) // include in the phasing computation + windowVaList.add(nextVr); + } + + if (logger.isDebugEnabled()) { + for (VariantAndReads phaseInfoVr : windowVaList) + logger.debug("Using phaseInfoVc = " + VariantContextUtils.getLocation(phaseInfoVr.variant)); } logger.debug(""); @@ -271,12 +274,11 @@ public class ReadBackedPhasingWalker extends LocusWalker gtAttribs = new HashMap(gt.getAttributes()); - if (hasPreviousSite && gt.isHet()) { + if (gt.isHet()) { VariantContext prevVc = prevVr.variant; Genotype prevGenotype = prevVc.getGenotype(samp); if (prevGenotype.isHet()) { //otherwise, can trivially phase - logger.debug("NON-TRIVIALLY CARE about TOP vs. BOTTOM for: "); - logger.debug("\n" + biall); + logger.debug("NON-TRIVIALLY CARE about TOP vs. BOTTOM for: " + "\n" + biall); List sampleWindowVaList = new LinkedList(); int phasingSiteIndex = -1; @@ -288,7 +290,7 @@ public class ReadBackedPhasingWalker extends LocusWalker maxPhaseSites) { - logger.warn("Trying to phase sample " + samp + " at locus " + VariantContextUtils.getLocation(vc) + " within a window of " + cacheWindow + " bases yields " + sampleWindowVaList.size() + " heterozygous sites to phase -- REDUCING to " + maxPhaseSites + " sites:\n" + toString(sampleWindowVaList)); + logger.warn("Trying to phase sample " + samp + " at locus " + VariantContextUtils.getLocation(vc) + " within a window of " + cacheWindow + " bases yields " + sampleWindowVaList.size() + " heterozygous sites to phase:\n" + toStringVRL(sampleWindowVaList)); - int prevSiteIndex = phasingSiteIndex - 1; + int prevSiteIndex = phasingSiteIndex - 1; // index of prevVr in sampleWindowVaList int numToUse = maxPhaseSites - 2; // since always keep prevVr and vr - int halfToUse = new Double(Math.floor(numToUse / 2.0)).intValue(); int numOnLeft = prevSiteIndex; int numOnRight = sampleWindowVaList.size() - (phasingSiteIndex + 1); int useOnLeft, useOnRight; if (numOnLeft <= numOnRight) { + int halfToUse = new Double(Math.floor(numToUse / 2.0)).intValue(); // skimp on the left [floor], and be generous with the right side useOnLeft = Math.min(halfToUse, numOnLeft); useOnRight = Math.min(numToUse - useOnLeft, numOnRight); } else { // numOnRight < numOnLeft + int halfToUse = new Double(Math.ceil(numToUse / 2.0)).intValue(); // be generous with the right side [ceil] useOnRight = Math.min(halfToUse, numOnRight); useOnLeft = Math.min(numToUse - useOnRight, numOnLeft); } @@ -321,7 +324,7 @@ public class ReadBackedPhasingWalker extends LocusWalker allPositions = new LinkedList(); for (VariantAndReads phaseInfoVr : variantList) { ReadBasesAtPosition readBases = phaseInfoVr.sampleReadBases.get(sample); + if (readBases == null) + readBases = new ReadBasesAtPosition(); // for transparency, put an empty list of bases at this position for sample allPositions.add(readBases); } HashMap allReads = convertReadBasesAtPositionToReads(allPositions); @@ -540,7 +545,6 @@ public class ReadBackedPhasingWalker extends LocusWalker vrList) { + private static String toStringVRL(List vrList) { boolean first = true; StringBuilder sb = new StringBuilder(); for (VariantAndReads vr : vrList) { @@ -667,6 +671,20 @@ public class ReadBackedPhasingWalker extends LocusWalker vcList) { + boolean first = true; + StringBuilder sb = new StringBuilder(); + for (VariantContext vc : vcList) { + if (first) + first = false; + else + sb.append(" -- "); + + sb.append(VariantContextUtils.getLocation(vc)); + } + return sb.toString(); + } + private static class ReadBase { public String readName; public byte base; @@ -914,6 +932,8 @@ public class ReadBackedPhasingWalker extends LocusWalker