Trivially phases any hom site (since it is always correct to continue the previous haplotypes by appending the same allele onto both haplotypes)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4568 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
fromer 2010-10-25 16:58:41 +00:00
parent da64183854
commit c357ec775a
2 changed files with 81 additions and 73 deletions

View File

@ -28,6 +28,7 @@ import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.*; import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
@ -74,7 +75,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
@Argument(fullName = "phaseQualityThresh", shortName = "phaseThresh", doc = "The minimum phasing quality score required to output phasing; [default:10.0]", required = false) @Argument(fullName = "phaseQualityThresh", shortName = "phaseThresh", doc = "The minimum phasing quality score required to output phasing; [default:10.0]", required = false)
protected Double phaseQualityThresh = 10.0; // PQ = 10.0 <=> P(error) = 10^(-10/10) = 0.1, P(correct) = 0.9 protected Double phaseQualityThresh = 10.0; // PQ = 10.0 <=> P(error) = 10^(-10/10) = 0.1, P(correct) = 0.9
@Argument(fullName = "variantStatsFilePrefix", shortName = "variantStats", doc = "The prefix of the VCF/phasing statistics files", required = false) @Hidden
@Argument(fullName = "variantStatsFilePrefix", shortName = "variantStats", doc = "The prefix of the VCF/phasing statistics files [For DEBUGGING purposes only - DO NOT USE!]", required = false)
protected String variantStatsFilePrefix = null; protected String variantStatsFilePrefix = null;
@Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for phasing [default: 10]", required = false) @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for phasing [default: 10]", required = false)
@ -286,7 +288,13 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
Genotype gt = sampGtEntry.getValue(); Genotype gt = sampGtEntry.getValue();
logger.debug("sample = " + samp); logger.debug("sample = " + samp);
if (isUnfilteredCalledDiploidGenotype(gt) && gt.isHet()) { // Can attempt to phase this genotype if (isUnfilteredCalledDiploidGenotype(gt)) {
if (gt.isHom()) {
// true <-> can trivially phase a hom site relative to ANY previous site:
Genotype phasedGt = new Genotype(gt.getSampleName(), gt.getAlleles(), gt.getNegLog10PError(), gt.getFilters(), gt.getAttributes(), true);
uvc.setGenotype(samp, phasedGt);
}
else if (gt.isHet()) { // Attempt to phase this het genotype relative to the previous het genotype
PhasingWindow phaseWindow = new PhasingWindow(vr, samp); PhasingWindow phaseWindow = new PhasingWindow(vr, samp);
if (phaseWindow.hasPreviousHets()) { // Otherwise, nothing to phase this against if (phaseWindow.hasPreviousHets()) { // Otherwise, nothing to phase this against
SNPallelePair allelePair = new SNPallelePair(gt); SNPallelePair allelePair = new SNPallelePair(gt);
@ -300,7 +308,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
UnfinishedVariantContext prevUvc = prevHetAndInteriorIt.next().unfinishedVariant; UnfinishedVariantContext prevUvc = prevHetAndInteriorIt.next().unfinishedVariant;
Genotype prevHetGenotype = prevUvc.getGenotype(samp); Genotype prevHetGenotype = prevUvc.getGenotype(samp);
PhaseResult pr = phaseSample(phaseWindow); PhaseResult pr = phaseSampleAtSite(phaseWindow);
boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality); boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality);
if (pr.phasingContainsInconsistencies) { if (pr.phasingContainsInconsistencies) {
@ -364,6 +372,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
} }
} }
} }
}
partiallyPhasedSites.add(uvr); // only add it in now, since don't want it to be there during phasing partiallyPhasedSites.add(uvr); // only add it in now, since don't want it to be there during phasing
phaseStats.addIn(new PhasingStats(samplePhaseStats)); phaseStats.addIn(new PhasingStats(samplePhaseStats));
@ -769,7 +778,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
} }
} }
private PhaseResult phaseSample(PhasingWindow phaseWindow) { private PhaseResult phaseSampleAtSite(PhasingWindow phaseWindow) {
/* Will map a phase and its "complement" to a single representative phase, /* Will map a phase and its "complement" to a single representative phase,
and marginalizeAsNewTable() marginalizes to 2 positions [starting at the previous position, and then the current position]: and marginalizeAsNewTable() marginalizes to 2 positions [starting at the previous position, and then the current position]:
*/ */

View File

@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:332341-382503", + " -L chr20:332341-382503",
1, 1,
Arrays.asList("cfa2a436008c0090ec03935b6efc6bb3")); Arrays.asList("3994aed2a117b93aa5010ea131033aea"));
executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec); executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec);
} }
@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:1232503-1332503", + " -L chr20:1232503-1332503",
1, 1,
Arrays.asList("60da5d2da66ae51bf42ad2b1c9505739")); Arrays.asList("61768bcdcbf9aeef62bfd44862155ec3"));
executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec); executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec);
} }
@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30) baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30)
+ " -L chr20:332341-382503", + " -L chr20:332341-382503",
1, 1,
Arrays.asList("26befb1f5b11117f0ccb326fd05f9be7")); Arrays.asList("98261406ac4c587eb4444ad06d40507b"));
executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec); executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec);
} }
@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100) baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100)
+ " -L chr20:332341-382503", + " -L chr20:332341-382503",
1, 1,
Arrays.asList("51ce38de72cf4163a272f00ba34832ff")); Arrays.asList("d8464f2d2cf07e344986e417be8fce14"));
executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec); executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec);
} }
@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10) baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10)
+ " -L chr20:332341-482503", + " -L chr20:332341-482503",
1, 1,
Arrays.asList("252964ea02d83ccf1e229e01fbfaaefa")); Arrays.asList("1dee7950ab0ec27e1ee081c064159776"));
executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec); executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec);
} }
@ -76,9 +76,8 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:652810-681757", + " -L chr20:652810-681757",
1, 1,
Arrays.asList("6983a121363ef4131b217805ae558313")); Arrays.asList("e4f4de185bdbfaf067004c187419ac4c"));
executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec); executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec);
} }
} }