diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtils.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtils.java index 24060876b..11b80a80f 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtils.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtils.java @@ -363,7 +363,7 @@ class PhasingUtils { * * @param gt1 genotype 1 * @param gt2 genotype 2 - * @return true if genotypes have the same number of chromosomes, haplotype, phased, number of attributes + * @return true if genotypes have the same number of chromosomes, haplotype, number of attributes * as chromosomes, and either genotype is homozygous, false otherwise */ static boolean alleleSegregationIsKnown(Genotype gt1, Genotype gt2) { @@ -371,10 +371,6 @@ class PhasingUtils { if (gt1.getPloidy() != gt2.getPloidy()) return false; - // If gt1 or gt2 are not phased, then can not be merged. - if (!gt1.isPhased() || !gt2.isPhased()) - return false; - // If gt1 or gt2 are homozygous, then could be merged. if (gt1.isHom() || gt2.isHom()) return true; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasing.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasing.java index a5d4f38f5..3728f5207 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasing.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasing.java @@ -52,6 +52,7 @@ package org.broadinstitute.gatk.tools.walkers.phasing; import org.broadinstitute.gatk.engine.walkers.*; +import org.broadinstitute.gatk.utils.collections.Pair; import org.broadinstitute.gatk.utils.commandline.Argument; import org.broadinstitute.gatk.utils.commandline.ArgumentCollection; import org.broadinstitute.gatk.utils.commandline.Hidden; @@ -63,6 +64,7 @@ import org.broadinstitute.gatk.utils.contexts.ReferenceContext; import org.broadinstitute.gatk.engine.filters.MappingQualityZeroFilter; import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.utils.help.HelpConstants; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import org.broadinstitute.gatk.utils.sam.ReadUtils; import org.broadinstitute.gatk.engine.GATKVCFUtils; import org.broadinstitute.gatk.utils.variant.GATKVCFConstants; @@ -338,6 +340,7 @@ public class ReadBackedPhasing extends RodWalker processQueue(PhasingStats phaseStats, boolean processAll) { List oldPhasedList = new LinkedList(); + VariantAndReads prevVr = null; while (!unphasedSiteQueue.isEmpty()) { if (!processAll) { // otherwise, phase until the end of unphasedSiteQueue VariantContext nextToPhaseVc = unphasedSiteQueue.peek().variant; @@ -354,10 +357,35 @@ public class ReadBackedPhasing extends RodWalker sampleReadBases; + public Set variantReadNames; public VariantAndReads(VariantContext variant, HashMap sampleReadBases) { this.variant = variant; @@ -1232,6 +1270,22 @@ public class ReadBackedPhasing extends RodWalker(); + for ( final GATKSAMRecord read : pileup.getReads() ) { + // get the SNP position in the read + Pair pair = ReadUtils.getReadCoordinateForReferenceCoordinate(read, variant.getStart()); + + // get the reads containing the SNP + for (final Allele altAllele : variant.getAlternateAlleles()) { + if (read.getReadBases()[pair.first] == altAllele.getBases()[0]) { + variantReadNames.add(read.getReadName()); + } + } + } + } } } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtilsUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtilsUnitTest.java index 08be802f2..89a7e89d7 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtilsUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtilsUnitTest.java @@ -81,7 +81,6 @@ public class PhasingUtilsUnitTest extends BaseTest { private ReferenceSequenceFile referenceFile; private Genotype genotype1; private Genotype genotype2; - private Genotype genotype2unphased; private String contig; private List alleleList1; private List alleleList2; @@ -94,9 +93,8 @@ public class PhasingUtilsUnitTest extends BaseTest { genomeLocParser = new GenomeLocParser(referenceFile); alleleList1 = Arrays.asList(Allele.create("T", true), Allele.create("C", false)); alleleList2 = Arrays.asList(Allele.create("G", true), Allele.create("A", false)); - genotype1 = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-1", "10-2"}).attribute("PQ", 100.0).alleles(alleleList1).phased(true).make(); - genotype2 = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-2", "10-1"}).attribute("PQ", 200.0).alleles(alleleList2).phased(true).make(); - genotype2unphased = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-2", "10-1"}).attribute("PQ", 200.0).alleles(alleleList2).phased(false).make(); + genotype1 = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-1", "10-2"}).attribute("PQ", 100.0).alleles(alleleList1).make(); + genotype2 = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-2", "10-1"}).attribute("PQ", 200.0).alleles(alleleList2).make(); contig = new String("1"); vc1 = new VariantContextBuilder().chr(contig).id("id1").source("TC").start(start).stop(start).alleles(alleleList1).genotypes(genotype1).make(); vc2 = new VariantContextBuilder().chr(contig).id("id2").source("GA").start(start+1).stop(start+1).alleles(alleleList2).genotypes(genotype2).make(); @@ -186,7 +184,6 @@ public class PhasingUtilsUnitTest extends BaseTest { @Test public void TestAlleleSegregationIsKnown(){ Assert.assertTrue(PhasingUtils.alleleSegregationIsKnown(genotype1, genotype2)); - Assert.assertFalse(PhasingUtils.alleleSegregationIsKnown(genotype1, genotype2unphased)); } @Test diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java index 094163ea7..16a6f5df6 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java @@ -152,7 +152,22 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { " -o %s" + " --no_cmdline_in_header", 1, - Arrays.asList("d7797171d9ca4e173fab6b5af1e6d539")); + Arrays.asList("b251b4378fda9784f2175c7e3d80f032")); executeTest("Do not merge unphased SNPs", spec); } + + @Test + public void testMergeSNPsIfSameRead() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T ReadBackedPhasing" + + " -R " + b37KGReferenceWithDecoy + + " -I " + privateTestDir + "readBackedPhasing.bam" + + " --variant " + privateTestDir + "readBackedPhasing.vcf.gz" + + " -enableMergeToMNP -maxDistMNP 2 -L 1:1875000-1877000" + + " -o %s" + + " --no_cmdline_in_header", + 1, + Arrays.asList("630816da701b9ea8674c23c91fa61bec")); + executeTest("Merge SNPs if on the same read", spec); + } }