Moved (poorly named) MergeAndMatchHaplotypes to public. Added integration test

This commit is contained in:
Kiran V Garimella 2011-07-17 22:47:32 -04:00
parent afb506e128
commit 8167aba601
2 changed files with 135 additions and 0 deletions

View File

@ -0,0 +1,107 @@
package org.broadinstitute.sting.gatk.walkers.phasing;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import java.util.*;
public class MergeAndMatchHaplotypes extends RodWalker<Integer, Integer> {
@Output
protected VCFWriter vcfWriter = null;
private Map<String, Genotype> pbtCache = new HashMap<String, Genotype>();
private Map<String, Genotype> rbpCache = new HashMap<String, Genotype>();
private final String SOURCE_NAME = "MergeReadBackedAndTransmissionPhasedVariants";
public void initialize() {
ArrayList<String> rodNames = new ArrayList<String>();
rodNames.add("pbt");
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
headerLines.addAll(VCFUtils.getHeaderFields(this.getToolkit()));
vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples));
}
@Override
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (tracker != null) {
Collection<VariantContext> pbts = tracker.getVariantContexts(ref, "pbt", null, ref.getLocus(), true, true);
Collection<VariantContext> rbps = tracker.getVariantContexts(ref, "rbp", null, ref.getLocus(), true, true);
VariantContext pbt = pbts.iterator().hasNext() ? pbts.iterator().next() : null;
VariantContext rbp = rbps.iterator().hasNext() ? rbps.iterator().next() : null;
if (pbt != null && rbp != null) {
Map<String, Genotype> genotypes = pbt.getGenotypes();
if (!rbp.isFiltered()) {
for (String sample : rbp.getSampleNames()) {
Genotype rbpg = rbp.getGenotype(sample);
Genotype pbtg = pbt.getGenotype(sample);
// Propagate read-backed phasing information to genotypes unphased by transmission
//if (!pbtg.isPhased() && rbpCache.containsKey(sample)) {
if (!pbtg.isPhased() && rbpg.isPhased() && rbpCache.containsKey(sample)) {
boolean orientationMatches = rbpCache.get(sample).sameGenotype(pbtCache.get(sample), false);
if (orientationMatches) {
pbtg = rbpg;
} else {
List<Allele> fwdAlleles = rbpg.getAlleles();
List<Allele> revAlleles = new ArrayList<Allele>();
for (int i = fwdAlleles.size() - 1; i >= 0; i--) {
revAlleles.add(fwdAlleles.get(i));
}
pbtg = new Genotype(sample, revAlleles, rbpg.getNegLog10PError(), rbpg.getFilters(), rbpg.getAttributes(), rbpg.isPhased());
}
}
genotypes.put(sample, pbtg);
// Update the cache
if (/*rbpg.isPhased() &&*/ rbpg.isHet()) {
rbpCache.put(sample, rbpg);
pbtCache.put(sample, pbtg);
} else if (!rbpg.isPhased()) {
rbpCache.remove(sample);
pbtCache.remove(sample);
}
}
}
VariantContext newvc = new VariantContext(SOURCE_NAME, pbt.getChr(), pbt.getStart(), pbt.getStart(), pbt.getAlleles(), genotypes, pbt.getNegLog10PError(), pbt.getFilters(), pbt.getAttributes());
vcfWriter.add(newvc, ref.getBase());
}
}
return null;
}
@Override
public Integer reduceInit() {
return null;
}
@Override
public Integer reduce(Integer value, Integer sum) {
return null;
}
}

View File

@ -0,0 +1,28 @@
package org.broadinstitute.sting.gatk.walkers.phasing;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.util.Arrays;
public class MergeAndMatchHaplotypesIntegrationTest extends WalkerTest {
private static String mergeAndMatchHaplotypesTestDataRoot = validationDataLocation + "/MergeByHaplotypes";
private static String fundamentalTestPBTVCF = mergeAndMatchHaplotypesTestDataRoot + "/" + "FundamentalsTest.pbt.vcf";
private static String fundamentalTestRBPVCF = mergeAndMatchHaplotypesTestDataRoot + "/" + "FundamentalsTest.pbt.rbp.vcf";
@Test
public void testBasicFunctionality() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T MergeAndMatchHaplotypes",
"-R " + b37KGReference,
"-B:pbt,VCF " + fundamentalTestPBTVCF,
"-B:rbp,VCF " + fundamentalTestRBPVCF,
"-o %s"
),
1,
Arrays.asList("")
);
executeTest("testBasicMergeAndMatchHaplotypesFunctionality", spec);
}
}