Trivially phases any hom site (since it is always correct to continue the previous haplotypes by appending the same allele onto both haplotypes)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4568 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
da64183854
commit
c357ec775a
|
|
@ -28,6 +28,7 @@ import org.broad.tribble.util.variantcontext.Allele;
|
|||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
|
|
@ -74,7 +75,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
@Argument(fullName = "phaseQualityThresh", shortName = "phaseThresh", doc = "The minimum phasing quality score required to output phasing; [default:10.0]", required = false)
|
||||
protected Double phaseQualityThresh = 10.0; // PQ = 10.0 <=> P(error) = 10^(-10/10) = 0.1, P(correct) = 0.9
|
||||
|
||||
@Argument(fullName = "variantStatsFilePrefix", shortName = "variantStats", doc = "The prefix of the VCF/phasing statistics files", required = false)
|
||||
@Hidden
|
||||
@Argument(fullName = "variantStatsFilePrefix", shortName = "variantStats", doc = "The prefix of the VCF/phasing statistics files [For DEBUGGING purposes only - DO NOT USE!]", required = false)
|
||||
protected String variantStatsFilePrefix = null;
|
||||
|
||||
@Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for phasing [default: 10]", required = false)
|
||||
|
|
@ -286,81 +288,88 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
Genotype gt = sampGtEntry.getValue();
|
||||
|
||||
logger.debug("sample = " + samp);
|
||||
if (isUnfilteredCalledDiploidGenotype(gt) && gt.isHet()) { // Can attempt to phase this genotype
|
||||
PhasingWindow phaseWindow = new PhasingWindow(vr, samp);
|
||||
if (phaseWindow.hasPreviousHets()) { // Otherwise, nothing to phase this against
|
||||
SNPallelePair allelePair = new SNPallelePair(gt);
|
||||
logger.debug("Want to phase TOP vs. BOTTOM for: " + "\n" + allelePair);
|
||||
if (isUnfilteredCalledDiploidGenotype(gt)) {
|
||||
if (gt.isHom()) {
|
||||
// true <-> can trivially phase a hom site relative to ANY previous site:
|
||||
Genotype phasedGt = new Genotype(gt.getSampleName(), gt.getAlleles(), gt.getNegLog10PError(), gt.getFilters(), gt.getAttributes(), true);
|
||||
uvc.setGenotype(samp, phasedGt);
|
||||
}
|
||||
else if (gt.isHet()) { // Attempt to phase this het genotype relative to the previous het genotype
|
||||
PhasingWindow phaseWindow = new PhasingWindow(vr, samp);
|
||||
if (phaseWindow.hasPreviousHets()) { // Otherwise, nothing to phase this against
|
||||
SNPallelePair allelePair = new SNPallelePair(gt);
|
||||
logger.debug("Want to phase TOP vs. BOTTOM for: " + "\n" + allelePair);
|
||||
|
||||
DoublyLinkedList.BidirectionalIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt;
|
||||
/* Notes:
|
||||
1. Call to next() advances iterator to next position in partiallyPhasedSites.
|
||||
2. prevHetGenotype != null, since otherwise prevHetAndInteriorIt would not have been chosen to point to its UnfinishedVariantAndReads.
|
||||
*/
|
||||
UnfinishedVariantContext prevUvc = prevHetAndInteriorIt.next().unfinishedVariant;
|
||||
Genotype prevHetGenotype = prevUvc.getGenotype(samp);
|
||||
DoublyLinkedList.BidirectionalIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt;
|
||||
/* Notes:
|
||||
1. Call to next() advances iterator to next position in partiallyPhasedSites.
|
||||
2. prevHetGenotype != null, since otherwise prevHetAndInteriorIt would not have been chosen to point to its UnfinishedVariantAndReads.
|
||||
*/
|
||||
UnfinishedVariantContext prevUvc = prevHetAndInteriorIt.next().unfinishedVariant;
|
||||
Genotype prevHetGenotype = prevUvc.getGenotype(samp);
|
||||
|
||||
PhaseResult pr = phaseSample(phaseWindow);
|
||||
boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality);
|
||||
PhaseResult pr = phaseSampleAtSite(phaseWindow);
|
||||
boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality);
|
||||
|
||||
if (pr.phasingContainsInconsistencies) {
|
||||
logger.debug("MORE than " + (MAX_FRACTION_OF_INCONSISTENT_READS * 100) + "% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(vc));
|
||||
uvc.setPhasingInconsistent();
|
||||
}
|
||||
|
||||
if (genotypesArePhased) {
|
||||
SNPallelePair prevAllelePair = new SNPallelePair(prevHetGenotype);
|
||||
|
||||
logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevAllelePair + "\n");
|
||||
logger.debug("THE PHASE CHOSEN HERE:\n" + allelePair + "\n\n");
|
||||
|
||||
ensurePhasing(allelePair, prevAllelePair, pr.haplotype);
|
||||
Map<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
|
||||
gtAttribs.put(PQ_KEY, pr.phaseQuality);
|
||||
Genotype phasedGt = new Genotype(gt.getSampleName(), allelePair.getAllelesAsList(), gt.getNegLog10PError(), gt.getFilters(), gtAttribs, genotypesArePhased);
|
||||
uvc.setGenotype(samp, phasedGt);
|
||||
}
|
||||
|
||||
// Now, update the 0 or more "interior" hom sites in between the previous het site and this het site:
|
||||
while (prevHetAndInteriorIt.hasNext()) {
|
||||
UnfinishedVariantContext interiorUvc = prevHetAndInteriorIt.next().unfinishedVariant;
|
||||
Genotype handledGt = interiorUvc.getGenotype(samp);
|
||||
if (handledGt == null || !isUnfilteredCalledDiploidGenotype(handledGt))
|
||||
throw new ReviewedStingException("LOGICAL error: should not have breaks WITHIN haplotype");
|
||||
if (!handledGt.isHom())
|
||||
throw new ReviewedStingException("LOGICAL error: should not have anything besides hom sites IN BETWEEN two het sites");
|
||||
|
||||
// Use the same phasing consistency and PQ for each hom site in the "interior" as for the het-het phase:
|
||||
if (pr.phasingContainsInconsistencies)
|
||||
interiorUvc.setPhasingInconsistent();
|
||||
if (pr.phasingContainsInconsistencies) {
|
||||
logger.debug("MORE than " + (MAX_FRACTION_OF_INCONSISTENT_READS * 100) + "% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(vc));
|
||||
uvc.setPhasingInconsistent();
|
||||
}
|
||||
|
||||
if (genotypesArePhased) {
|
||||
Map<String, Object> handledGtAttribs = new HashMap<String, Object>(handledGt.getAttributes());
|
||||
handledGtAttribs.put(PQ_KEY, pr.phaseQuality);
|
||||
Genotype phasedHomGt = new Genotype(handledGt.getSampleName(), handledGt.getAlleles(), handledGt.getNegLog10PError(), handledGt.getFilters(), handledGtAttribs, genotypesArePhased);
|
||||
interiorUvc.setGenotype(samp, phasedHomGt);
|
||||
SNPallelePair prevAllelePair = new SNPallelePair(prevHetGenotype);
|
||||
|
||||
logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevAllelePair + "\n");
|
||||
logger.debug("THE PHASE CHOSEN HERE:\n" + allelePair + "\n\n");
|
||||
|
||||
ensurePhasing(allelePair, prevAllelePair, pr.haplotype);
|
||||
Map<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
|
||||
gtAttribs.put(PQ_KEY, pr.phaseQuality);
|
||||
Genotype phasedGt = new Genotype(gt.getSampleName(), allelePair.getAllelesAsList(), gt.getNegLog10PError(), gt.getFilters(), gtAttribs, genotypesArePhased);
|
||||
uvc.setGenotype(samp, phasedGt);
|
||||
}
|
||||
}
|
||||
|
||||
if (statsWriter != null)
|
||||
statsWriter.addStat(samp, VariantContextUtils.getLocation(vc), startDistance(prevUvc, vc), pr.phaseQuality, phaseWindow.readsAtHetSites.size(), phaseWindow.hetGenotypes.length);
|
||||
// Now, update the 0 or more "interior" hom sites in between the previous het site and this het site:
|
||||
while (prevHetAndInteriorIt.hasNext()) {
|
||||
UnfinishedVariantContext interiorUvc = prevHetAndInteriorIt.next().unfinishedVariant;
|
||||
Genotype handledGt = interiorUvc.getGenotype(samp);
|
||||
if (handledGt == null || !isUnfilteredCalledDiploidGenotype(handledGt))
|
||||
throw new ReviewedStingException("LOGICAL error: should not have breaks WITHIN haplotype");
|
||||
if (!handledGt.isHom())
|
||||
throw new ReviewedStingException("LOGICAL error: should not have anything besides hom sites IN BETWEEN two het sites");
|
||||
|
||||
PhaseCounts sampPhaseCounts = samplePhaseStats.get(samp);
|
||||
if (sampPhaseCounts == null) {
|
||||
sampPhaseCounts = new PhaseCounts();
|
||||
samplePhaseStats.put(samp, sampPhaseCounts);
|
||||
}
|
||||
sampPhaseCounts.numTestedSites++;
|
||||
// Use the same phasing consistency and PQ for each hom site in the "interior" as for the het-het phase:
|
||||
if (pr.phasingContainsInconsistencies)
|
||||
interiorUvc.setPhasingInconsistent();
|
||||
|
||||
if (genotypesArePhased) {
|
||||
Map<String, Object> handledGtAttribs = new HashMap<String, Object>(handledGt.getAttributes());
|
||||
handledGtAttribs.put(PQ_KEY, pr.phaseQuality);
|
||||
Genotype phasedHomGt = new Genotype(handledGt.getSampleName(), handledGt.getAlleles(), handledGt.getNegLog10PError(), handledGt.getFilters(), handledGtAttribs, genotypesArePhased);
|
||||
interiorUvc.setGenotype(samp, phasedHomGt);
|
||||
}
|
||||
}
|
||||
|
||||
if (statsWriter != null)
|
||||
statsWriter.addStat(samp, VariantContextUtils.getLocation(vc), startDistance(prevUvc, vc), pr.phaseQuality, phaseWindow.readsAtHetSites.size(), phaseWindow.hetGenotypes.length);
|
||||
|
||||
PhaseCounts sampPhaseCounts = samplePhaseStats.get(samp);
|
||||
if (sampPhaseCounts == null) {
|
||||
sampPhaseCounts = new PhaseCounts();
|
||||
samplePhaseStats.put(samp, sampPhaseCounts);
|
||||
}
|
||||
sampPhaseCounts.numTestedSites++;
|
||||
|
||||
if (pr.phasingContainsInconsistencies) {
|
||||
if (genotypesArePhased)
|
||||
sampPhaseCounts.numInconsistentSitesPhased++;
|
||||
else
|
||||
sampPhaseCounts.numInconsistentSitesNotPhased++;
|
||||
}
|
||||
|
||||
if (pr.phasingContainsInconsistencies) {
|
||||
if (genotypesArePhased)
|
||||
sampPhaseCounts.numInconsistentSitesPhased++;
|
||||
else
|
||||
sampPhaseCounts.numInconsistentSitesNotPhased++;
|
||||
sampPhaseCounts.numPhased++;
|
||||
}
|
||||
|
||||
if (genotypesArePhased)
|
||||
sampPhaseCounts.numPhased++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -769,7 +778,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
}
|
||||
}
|
||||
|
||||
private PhaseResult phaseSample(PhasingWindow phaseWindow) {
|
||||
private PhaseResult phaseSampleAtSite(PhasingWindow phaseWindow) {
|
||||
/* Will map a phase and its "complement" to a single representative phase,
|
||||
and marginalizeAsNewTable() marginalizes to 2 positions [starting at the previous position, and then the current position]:
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("cfa2a436008c0090ec03935b6efc6bb3"));
|
||||
Arrays.asList("3994aed2a117b93aa5010ea131033aea"));
|
||||
executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec);
|
||||
}
|
||||
|
||||
|
|
@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:1232503-1332503",
|
||||
1,
|
||||
Arrays.asList("60da5d2da66ae51bf42ad2b1c9505739"));
|
||||
Arrays.asList("61768bcdcbf9aeef62bfd44862155ec3"));
|
||||
executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec);
|
||||
}
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("26befb1f5b11117f0ccb326fd05f9be7"));
|
||||
Arrays.asList("98261406ac4c587eb4444ad06d40507b"));
|
||||
executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec);
|
||||
}
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("51ce38de72cf4163a272f00ba34832ff"));
|
||||
Arrays.asList("d8464f2d2cf07e344986e417be8fce14"));
|
||||
executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10)
|
||||
+ " -L chr20:332341-482503",
|
||||
1,
|
||||
Arrays.asList("252964ea02d83ccf1e229e01fbfaaefa"));
|
||||
Arrays.asList("1dee7950ab0ec27e1ee081c064159776"));
|
||||
executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec);
|
||||
}
|
||||
|
||||
|
|
@ -76,9 +76,8 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:652810-681757",
|
||||
1,
|
||||
Arrays.asList("6983a121363ef4131b217805ae558313"));
|
||||
Arrays.asList("e4f4de185bdbfaf067004c187419ac4c"));
|
||||
executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue