Two changes to CombineVariants.

1. Fix: VCs were padded before the merge, but they were never unpadded afterwards.  This leaves us with a VC that doesn't meet our spec.
2. Update: instead of running the merged VC through every standard annotation (which seems really wrong, since this isn't the annotator tool), just update the chromosome count annotations (AC,AF,AN) through VCUtils.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4734 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-11-25 04:52:12 +00:00
parent d775192631
commit 6934f83cc7
3 changed files with 19 additions and 85 deletions

View File

@ -33,6 +33,7 @@ import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
import org.broad.tribble.util.variantcontext.*;
import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker;
import org.broadinstitute.sting.utils.*;
import org.broad.tribble.vcf.VCFCodec;
import org.broad.tribble.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -467,6 +468,9 @@ public class VariantContextUtils {
attributes.put(VariantContext.ID_KEY, rsID);
VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, attributes);
// Trim the padded bases of all alleles if necessary
merged = VCFCodec.createVariantContextWithTrimmedAlleles(merged);
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
return merged;
}
@ -564,70 +568,6 @@ public class VariantContextUtils {
}
}
public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
// see if we need to trim common reference base from all alleles
boolean trimVC = true;
// We need to trim common reference base from all alleles if a ref base is common to all alleles
Allele refAllele = inputVC.getReference();
if (!inputVC.isVariant())
trimVC = false;
else if (refAllele.isNull())
trimVC = false;
else {
for (Allele a : inputVC.getAlternateAlleles()) {
if (a.length() < 1 || (a.getBases()[0] != refAllele.getBases()[0]))
trimVC = false;
}
}
// nothing to do if we don't need to trim bases
if (trimVC) {
List<Allele> alleles = new ArrayList<Allele>();
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
Map<String, Genotype> inputGenotypes = inputVC.getGenotypes();
// set the reference base for indels in the attributes
Map<String,Object> attributes = new TreeMap<String,Object>();
for ( Map.Entry<String, Object> p : inputVC.getAttributes().entrySet() ) {
attributes.put(p.getKey(), p.getValue());
}
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(inputVC.getReference().getBases()[0]));
for (Allele a : inputVC.getAlleles()) {
// get bases for current allele and create a new one with trimmed bases
byte[] newBases = Arrays.copyOfRange(a.getBases(),1,a.length());
alleles.add(Allele.create(newBases,a.isReference()));
}
// now we can recreate new genotypes with trimmed alleles
for (String sample : inputVC.getSampleNames()) {
Genotype g = inputGenotypes.get(sample);
List<Allele> inAlleles = g.getAlleles();
List<Allele> newGenotypeAlleles = new ArrayList<Allele>();
for (Allele a : inAlleles) {
byte[] newBases = Arrays.copyOfRange(a.getBases(),1,a.length());
newGenotypeAlleles.add(Allele.create(newBases, a.isReference()));
}
genotypes.put(sample, new Genotype(sample, newGenotypeAlleles, g.getNegLog10PError(),
g.getFilters(),g.getAttributes(),g.genotypesArePhased()));
}
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(),
inputVC.getFilters(), attributes);
}
else
return inputVC;
}
static class CompareByPriority implements Comparator<VariantContext>, Serializable {
List<String> priorityListOfVCs;
public CompareByPriority(List<String> priorityListOfVCs) {

View File

@ -27,8 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
@ -37,7 +35,6 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
@ -83,18 +80,12 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
private List<String> priority = null;
private VariantAnnotatorEngine engine;
public void initialize() {
validateAnnotateUnionArguments();
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), null);
Set<String> samples = SampleUtils.getSampleList(vcfRods, genotypeMergeOption);
ArrayList<String> annotationClassesToUse = new ArrayList<String>();
annotationClassesToUse.add("Standard");
engine = new VariantAnnotatorEngine(getToolkit(), annotationClassesToUse, new ArrayList<String>());
if ( SET_KEY.toLowerCase().equals("null") )
SET_KEY = null;
@ -136,7 +127,10 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
//out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC);
if ( mergedVC != null ) { // only operate at the start of events
VariantContext annotatedMergedVC = engine.annotateContext(tracker, ref, mergedVC);
HashMap<String, Object> attributes = new HashMap<String, Object>(mergedVC.getAttributes());
// re-compute chromosome counts
VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false);
VariantContext annotatedMergedVC = VariantContext.modifyAttributes(mergedVC, attributes);
if ( minimalVCF )
annotatedMergedVC = VariantContextUtils.pruneVariantContext(annotatedMergedVC, new HashSet(Arrays.asList(SET_KEY)));
vcfWriter.add(annotatedMergedVC, ref.getBase());

View File

@ -60,21 +60,21 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
}
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "3287ddd7c5003b3c791048cb2532578a"); }
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "1b8b58c2b231926f0ba45d29c5242df5", " -setKey foo"); }
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "5aaa695bc1754d6abcbed27f0c0b4c64", " -setKey null"); }
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "79e7e474b0a2bb82930201f143328d5d"); } // official project VCF files in tabix format
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "b9a1887dc8ff20a63a2bae07ea1131f4"); }
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "1dc8fedba840de1335d23300446c1c07", " -setKey foo"); }
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "33afd1b97340dfa192e5f886fcadd76f", " -setKey null"); }
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "99796c731462d5a60dd2789a8074fbc0"); } // official project VCF files in tabix format
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "d93ee8b9f36eedc80a3afa548bffc888"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "26c99206407fb2c42beadcac5e5de246"); }
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "79566b450a71b295aef0285c73f36d6c"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "0b08098d0519b26e58137c8b337a5119"); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "9b44bb39702b41240371815320e452d5"); } // official project VCF files in tabix format
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "f1749a11f05b383a8df52c98670b2d9a"); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "460bdbeaf7e9641395ac2ce6e1afc106"); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "02f0634d0176138e4195009eff7f2308"); }
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "67001f987e8d4aab237c506d6813970e"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "d443358367211b96762ae64d1461b587"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "96e473ce30b3aca3e5f6c51b8fc562ea"); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "ff7bdac468045715d4ac0309d8736c23"); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0e6379c4a8c6ddc47ddc8059cf72d7bc"); }
@Test public void threeWayWithRefs() {
WalkerTestSpec spec = new WalkerTestSpec(
@ -87,7 +87,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
" -genotypeMergeOptions UNIQUIFY -L 1"),
1,
Arrays.asList("13cb75cf37ec370763a34910ba48e42f"));
Arrays.asList("070dd17f49231576e4f8713e384c8026"));
executeTest("threeWayWithRefs", spec);
}