diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java new file mode 100644 index 000000000..4da049b39 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java @@ -0,0 +1,107 @@ +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; + +import java.util.*; + +public class MergeAndMatchHaplotypes extends RodWalker { + @Output + protected VCFWriter vcfWriter = null; + + private Map pbtCache = new HashMap(); + private Map rbpCache = new HashMap(); + + private final String SOURCE_NAME = "MergeReadBackedAndTransmissionPhasedVariants"; + + public void initialize() { + ArrayList rodNames = new ArrayList(); + rodNames.add("pbt"); + + Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); + Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + Set headerLines = new HashSet(); + headerLines.addAll(VCFUtils.getHeaderFields(this.getToolkit())); + + vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples)); + } + + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (tracker != null) { + Collection pbts = tracker.getVariantContexts(ref, "pbt", null, ref.getLocus(), true, true); + Collection rbps = tracker.getVariantContexts(ref, "rbp", null, ref.getLocus(), true, true); + + VariantContext pbt = pbts.iterator().hasNext() ? pbts.iterator().next() : null; + VariantContext rbp = rbps.iterator().hasNext() ? rbps.iterator().next() : null; + + if (pbt != null && rbp != null) { + Map genotypes = pbt.getGenotypes(); + + if (!rbp.isFiltered()) { + for (String sample : rbp.getSampleNames()) { + Genotype rbpg = rbp.getGenotype(sample); + Genotype pbtg = pbt.getGenotype(sample); + + // Propagate read-backed phasing information to genotypes unphased by transmission + //if (!pbtg.isPhased() && rbpCache.containsKey(sample)) { + if (!pbtg.isPhased() && rbpg.isPhased() && rbpCache.containsKey(sample)) { + boolean orientationMatches = rbpCache.get(sample).sameGenotype(pbtCache.get(sample), false); + + if (orientationMatches) { + pbtg = rbpg; + } else { + List fwdAlleles = rbpg.getAlleles(); + List revAlleles = new ArrayList(); + + for (int i = fwdAlleles.size() - 1; i >= 0; i--) { + revAlleles.add(fwdAlleles.get(i)); + } + + pbtg = new Genotype(sample, revAlleles, rbpg.getNegLog10PError(), rbpg.getFilters(), rbpg.getAttributes(), rbpg.isPhased()); + } + } + + genotypes.put(sample, pbtg); + + // Update the cache + if (/*rbpg.isPhased() &&*/ rbpg.isHet()) { + rbpCache.put(sample, rbpg); + pbtCache.put(sample, pbtg); + } else if (!rbpg.isPhased()) { + rbpCache.remove(sample); + pbtCache.remove(sample); + } + } + } + + VariantContext newvc = new VariantContext(SOURCE_NAME, pbt.getChr(), pbt.getStart(), pbt.getStart(), pbt.getAlleles(), genotypes, pbt.getNegLog10PError(), pbt.getFilters(), pbt.getAttributes()); + vcfWriter.add(newvc, ref.getBase()); + } + } + + return null; + } + + @Override + public Integer reduceInit() { + return null; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return null; + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java new file mode 100644 index 000000000..46d96da27 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public class MergeAndMatchHaplotypesIntegrationTest extends WalkerTest { + private static String mergeAndMatchHaplotypesTestDataRoot = validationDataLocation + "/MergeByHaplotypes"; + private static String fundamentalTestPBTVCF = mergeAndMatchHaplotypesTestDataRoot + "/" + "FundamentalsTest.pbt.vcf"; + private static String fundamentalTestRBPVCF = mergeAndMatchHaplotypesTestDataRoot + "/" + "FundamentalsTest.pbt.rbp.vcf"; + + @Test + public void testBasicFunctionality() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T MergeAndMatchHaplotypes", + "-R " + b37KGReference, + "-B:pbt,VCF " + fundamentalTestPBTVCF, + "-B:rbp,VCF " + fundamentalTestRBPVCF, + "-o %s" + ), + 1, + Arrays.asList("") + ); + executeTest("testBasicMergeAndMatchHaplotypesFunctionality", spec); + } +}