diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index bcd286bde..5727abf65 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -28,6 +28,7 @@ import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; @@ -74,7 +75,8 @@ public class ReadBackedPhasingWalker extends RodWalker P(error) = 10^(-10/10) = 0.1, P(correct) = 0.9 - @Argument(fullName = "variantStatsFilePrefix", shortName = "variantStats", doc = "The prefix of the VCF/phasing statistics files", required = false) + @Hidden + @Argument(fullName = "variantStatsFilePrefix", shortName = "variantStats", doc = "The prefix of the VCF/phasing statistics files [For DEBUGGING purposes only - DO NOT USE!]", required = false) protected String variantStatsFilePrefix = null; @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for phasing [default: 10]", required = false) @@ -286,81 +288,88 @@ public class ReadBackedPhasingWalker extends RodWalker can trivially phase a hom site relative to ANY previous site: + Genotype phasedGt = new Genotype(gt.getSampleName(), gt.getAlleles(), gt.getNegLog10PError(), gt.getFilters(), gt.getAttributes(), true); + uvc.setGenotype(samp, phasedGt); + } + else if (gt.isHet()) { // Attempt to phase this het genotype relative to the previous het genotype + PhasingWindow phaseWindow = new PhasingWindow(vr, samp); + if (phaseWindow.hasPreviousHets()) { // Otherwise, nothing to phase this against + SNPallelePair allelePair = new SNPallelePair(gt); + logger.debug("Want to phase TOP vs. BOTTOM for: " + "\n" + allelePair); - DoublyLinkedList.BidirectionalIterator prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt; - /* Notes: - 1. Call to next() advances iterator to next position in partiallyPhasedSites. - 2. prevHetGenotype != null, since otherwise prevHetAndInteriorIt would not have been chosen to point to its UnfinishedVariantAndReads. - */ - UnfinishedVariantContext prevUvc = prevHetAndInteriorIt.next().unfinishedVariant; - Genotype prevHetGenotype = prevUvc.getGenotype(samp); + DoublyLinkedList.BidirectionalIterator prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt; + /* Notes: + 1. Call to next() advances iterator to next position in partiallyPhasedSites. + 2. prevHetGenotype != null, since otherwise prevHetAndInteriorIt would not have been chosen to point to its UnfinishedVariantAndReads. + */ + UnfinishedVariantContext prevUvc = prevHetAndInteriorIt.next().unfinishedVariant; + Genotype prevHetGenotype = prevUvc.getGenotype(samp); - PhaseResult pr = phaseSample(phaseWindow); - boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality); + PhaseResult pr = phaseSampleAtSite(phaseWindow); + boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality); - if (pr.phasingContainsInconsistencies) { - logger.debug("MORE than " + (MAX_FRACTION_OF_INCONSISTENT_READS * 100) + "% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(vc)); - uvc.setPhasingInconsistent(); - } - - if (genotypesArePhased) { - SNPallelePair prevAllelePair = new SNPallelePair(prevHetGenotype); - - logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevAllelePair + "\n"); - logger.debug("THE PHASE CHOSEN HERE:\n" + allelePair + "\n\n"); - - ensurePhasing(allelePair, prevAllelePair, pr.haplotype); - Map gtAttribs = new HashMap(gt.getAttributes()); - gtAttribs.put(PQ_KEY, pr.phaseQuality); - Genotype phasedGt = new Genotype(gt.getSampleName(), allelePair.getAllelesAsList(), gt.getNegLog10PError(), gt.getFilters(), gtAttribs, genotypesArePhased); - uvc.setGenotype(samp, phasedGt); - } - - // Now, update the 0 or more "interior" hom sites in between the previous het site and this het site: - while (prevHetAndInteriorIt.hasNext()) { - UnfinishedVariantContext interiorUvc = prevHetAndInteriorIt.next().unfinishedVariant; - Genotype handledGt = interiorUvc.getGenotype(samp); - if (handledGt == null || !isUnfilteredCalledDiploidGenotype(handledGt)) - throw new ReviewedStingException("LOGICAL error: should not have breaks WITHIN haplotype"); - if (!handledGt.isHom()) - throw new ReviewedStingException("LOGICAL error: should not have anything besides hom sites IN BETWEEN two het sites"); - - // Use the same phasing consistency and PQ for each hom site in the "interior" as for the het-het phase: - if (pr.phasingContainsInconsistencies) - interiorUvc.setPhasingInconsistent(); + if (pr.phasingContainsInconsistencies) { + logger.debug("MORE than " + (MAX_FRACTION_OF_INCONSISTENT_READS * 100) + "% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(vc)); + uvc.setPhasingInconsistent(); + } if (genotypesArePhased) { - Map handledGtAttribs = new HashMap(handledGt.getAttributes()); - handledGtAttribs.put(PQ_KEY, pr.phaseQuality); - Genotype phasedHomGt = new Genotype(handledGt.getSampleName(), handledGt.getAlleles(), handledGt.getNegLog10PError(), handledGt.getFilters(), handledGtAttribs, genotypesArePhased); - interiorUvc.setGenotype(samp, phasedHomGt); + SNPallelePair prevAllelePair = new SNPallelePair(prevHetGenotype); + + logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevAllelePair + "\n"); + logger.debug("THE PHASE CHOSEN HERE:\n" + allelePair + "\n\n"); + + ensurePhasing(allelePair, prevAllelePair, pr.haplotype); + Map gtAttribs = new HashMap(gt.getAttributes()); + gtAttribs.put(PQ_KEY, pr.phaseQuality); + Genotype phasedGt = new Genotype(gt.getSampleName(), allelePair.getAllelesAsList(), gt.getNegLog10PError(), gt.getFilters(), gtAttribs, genotypesArePhased); + uvc.setGenotype(samp, phasedGt); } - } - if (statsWriter != null) - statsWriter.addStat(samp, VariantContextUtils.getLocation(vc), startDistance(prevUvc, vc), pr.phaseQuality, phaseWindow.readsAtHetSites.size(), phaseWindow.hetGenotypes.length); + // Now, update the 0 or more "interior" hom sites in between the previous het site and this het site: + while (prevHetAndInteriorIt.hasNext()) { + UnfinishedVariantContext interiorUvc = prevHetAndInteriorIt.next().unfinishedVariant; + Genotype handledGt = interiorUvc.getGenotype(samp); + if (handledGt == null || !isUnfilteredCalledDiploidGenotype(handledGt)) + throw new ReviewedStingException("LOGICAL error: should not have breaks WITHIN haplotype"); + if (!handledGt.isHom()) + throw new ReviewedStingException("LOGICAL error: should not have anything besides hom sites IN BETWEEN two het sites"); - PhaseCounts sampPhaseCounts = samplePhaseStats.get(samp); - if (sampPhaseCounts == null) { - sampPhaseCounts = new PhaseCounts(); - samplePhaseStats.put(samp, sampPhaseCounts); - } - sampPhaseCounts.numTestedSites++; + // Use the same phasing consistency and PQ for each hom site in the "interior" as for the het-het phase: + if (pr.phasingContainsInconsistencies) + interiorUvc.setPhasingInconsistent(); + + if (genotypesArePhased) { + Map handledGtAttribs = new HashMap(handledGt.getAttributes()); + handledGtAttribs.put(PQ_KEY, pr.phaseQuality); + Genotype phasedHomGt = new Genotype(handledGt.getSampleName(), handledGt.getAlleles(), handledGt.getNegLog10PError(), handledGt.getFilters(), handledGtAttribs, genotypesArePhased); + interiorUvc.setGenotype(samp, phasedHomGt); + } + } + + if (statsWriter != null) + statsWriter.addStat(samp, VariantContextUtils.getLocation(vc), startDistance(prevUvc, vc), pr.phaseQuality, phaseWindow.readsAtHetSites.size(), phaseWindow.hetGenotypes.length); + + PhaseCounts sampPhaseCounts = samplePhaseStats.get(samp); + if (sampPhaseCounts == null) { + sampPhaseCounts = new PhaseCounts(); + samplePhaseStats.put(samp, sampPhaseCounts); + } + sampPhaseCounts.numTestedSites++; + + if (pr.phasingContainsInconsistencies) { + if (genotypesArePhased) + sampPhaseCounts.numInconsistentSitesPhased++; + else + sampPhaseCounts.numInconsistentSitesNotPhased++; + } - if (pr.phasingContainsInconsistencies) { if (genotypesArePhased) - sampPhaseCounts.numInconsistentSitesPhased++; - else - sampPhaseCounts.numInconsistentSitesNotPhased++; + sampPhaseCounts.numPhased++; } - - if (genotypesArePhased) - sampPhaseCounts.numPhased++; } } } @@ -769,7 +778,7 @@ public class ReadBackedPhasingWalker extends RodWalker= 10", spec); } @@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:1232503-1332503", 1, - Arrays.asList("60da5d2da66ae51bf42ad2b1c9505739")); + Arrays.asList("61768bcdcbf9aeef62bfd44862155ec3")); executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec); } @@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30) + " -L chr20:332341-382503", 1, - Arrays.asList("26befb1f5b11117f0ccb326fd05f9be7")); + Arrays.asList("98261406ac4c587eb4444ad06d40507b")); executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec); } @@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100) + " -L chr20:332341-382503", 1, - Arrays.asList("51ce38de72cf4163a272f00ba34832ff")); + Arrays.asList("d8464f2d2cf07e344986e417be8fce14")); executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec); } @@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10) + " -L chr20:332341-482503", 1, - Arrays.asList("252964ea02d83ccf1e229e01fbfaaefa")); + Arrays.asList("1dee7950ab0ec27e1ee081c064159776")); executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec); } @@ -76,9 +76,8 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:652810-681757", 1, - Arrays.asList("6983a121363ef4131b217805ae558313")); + Arrays.asList("e4f4de185bdbfaf067004c187419ac4c")); executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec); } - } \ No newline at end of file