Two changes to CombineVariants.
1. Fix: VCs were padded before the merge, but they were never unpadded afterwards. This leaves us with a VC that doesn't meet our spec. 2. Update: instead of running the merged VC through every standard annotation (which seems really wrong, since this isn't the annotator tool), just update the chromosome count annotations (AC,AF,AN) through VCUtils. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4734 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d775192631
commit
6934f83cc7
|
|
@ -33,6 +33,7 @@ import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
|||
import org.broad.tribble.util.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broad.tribble.vcf.VCFCodec;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -467,6 +468,9 @@ public class VariantContextUtils {
|
|||
attributes.put(VariantContext.ID_KEY, rsID);
|
||||
|
||||
VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, attributes);
|
||||
// Trim the padded bases of all alleles if necessary
|
||||
merged = VCFCodec.createVariantContextWithTrimmedAlleles(merged);
|
||||
|
||||
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
|
||||
return merged;
|
||||
}
|
||||
|
|
@ -564,70 +568,6 @@ public class VariantContextUtils {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
|
||||
// see if we need to trim common reference base from all alleles
|
||||
boolean trimVC = true;
|
||||
|
||||
// We need to trim common reference base from all alleles if a ref base is common to all alleles
|
||||
Allele refAllele = inputVC.getReference();
|
||||
if (!inputVC.isVariant())
|
||||
trimVC = false;
|
||||
else if (refAllele.isNull())
|
||||
trimVC = false;
|
||||
else {
|
||||
for (Allele a : inputVC.getAlternateAlleles()) {
|
||||
if (a.length() < 1 || (a.getBases()[0] != refAllele.getBases()[0]))
|
||||
trimVC = false;
|
||||
}
|
||||
}
|
||||
|
||||
// nothing to do if we don't need to trim bases
|
||||
if (trimVC) {
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
|
||||
|
||||
Map<String, Genotype> inputGenotypes = inputVC.getGenotypes();
|
||||
// set the reference base for indels in the attributes
|
||||
Map<String,Object> attributes = new TreeMap<String,Object>();
|
||||
|
||||
for ( Map.Entry<String, Object> p : inputVC.getAttributes().entrySet() ) {
|
||||
attributes.put(p.getKey(), p.getValue());
|
||||
}
|
||||
|
||||
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(inputVC.getReference().getBases()[0]));
|
||||
|
||||
|
||||
for (Allele a : inputVC.getAlleles()) {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
byte[] newBases = Arrays.copyOfRange(a.getBases(),1,a.length());
|
||||
alleles.add(Allele.create(newBases,a.isReference()));
|
||||
}
|
||||
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
for (String sample : inputVC.getSampleNames()) {
|
||||
Genotype g = inputGenotypes.get(sample);
|
||||
|
||||
List<Allele> inAlleles = g.getAlleles();
|
||||
List<Allele> newGenotypeAlleles = new ArrayList<Allele>();
|
||||
for (Allele a : inAlleles) {
|
||||
byte[] newBases = Arrays.copyOfRange(a.getBases(),1,a.length());
|
||||
newGenotypeAlleles.add(Allele.create(newBases, a.isReference()));
|
||||
}
|
||||
genotypes.put(sample, new Genotype(sample, newGenotypeAlleles, g.getNegLog10PError(),
|
||||
g.getFilters(),g.getAttributes(),g.genotypesArePhased()));
|
||||
|
||||
}
|
||||
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(),
|
||||
inputVC.getFilters(), attributes);
|
||||
|
||||
}
|
||||
else
|
||||
return inputVC;
|
||||
|
||||
}
|
||||
|
||||
|
||||
static class CompareByPriority implements Comparator<VariantContext>, Serializable {
|
||||
List<String> priorityListOfVCs;
|
||||
public CompareByPriority(List<String> priorityListOfVCs) {
|
||||
|
|
|
|||
|
|
@ -27,8 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
|
|
@ -37,7 +35,6 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
|
|||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
|
|
@ -83,18 +80,12 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
private List<String> priority = null;
|
||||
|
||||
private VariantAnnotatorEngine engine;
|
||||
|
||||
public void initialize() {
|
||||
validateAnnotateUnionArguments();
|
||||
|
||||
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), null);
|
||||
Set<String> samples = SampleUtils.getSampleList(vcfRods, genotypeMergeOption);
|
||||
|
||||
ArrayList<String> annotationClassesToUse = new ArrayList<String>();
|
||||
annotationClassesToUse.add("Standard");
|
||||
engine = new VariantAnnotatorEngine(getToolkit(), annotationClassesToUse, new ArrayList<String>());
|
||||
|
||||
if ( SET_KEY.toLowerCase().equals("null") )
|
||||
SET_KEY = null;
|
||||
|
||||
|
|
@ -136,7 +127,10 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
//out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC);
|
||||
|
||||
if ( mergedVC != null ) { // only operate at the start of events
|
||||
VariantContext annotatedMergedVC = engine.annotateContext(tracker, ref, mergedVC);
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>(mergedVC.getAttributes());
|
||||
// re-compute chromosome counts
|
||||
VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false);
|
||||
VariantContext annotatedMergedVC = VariantContext.modifyAttributes(mergedVC, attributes);
|
||||
if ( minimalVCF )
|
||||
annotatedMergedVC = VariantContextUtils.pruneVariantContext(annotatedMergedVC, new HashSet(Arrays.asList(SET_KEY)));
|
||||
vcfWriter.add(annotatedMergedVC, ref.getBase());
|
||||
|
|
|
|||
|
|
@ -60,21 +60,21 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
|
||||
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "3287ddd7c5003b3c791048cb2532578a"); }
|
||||
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "1b8b58c2b231926f0ba45d29c5242df5", " -setKey foo"); }
|
||||
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "5aaa695bc1754d6abcbed27f0c0b4c64", " -setKey null"); }
|
||||
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "79e7e474b0a2bb82930201f143328d5d"); } // official project VCF files in tabix format
|
||||
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "b9a1887dc8ff20a63a2bae07ea1131f4"); }
|
||||
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "1dc8fedba840de1335d23300446c1c07", " -setKey foo"); }
|
||||
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "33afd1b97340dfa192e5f886fcadd76f", " -setKey null"); }
|
||||
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "99796c731462d5a60dd2789a8074fbc0"); } // official project VCF files in tabix format
|
||||
|
||||
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "d93ee8b9f36eedc80a3afa548bffc888"); }
|
||||
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "26c99206407fb2c42beadcac5e5de246"); }
|
||||
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "79566b450a71b295aef0285c73f36d6c"); }
|
||||
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "0b08098d0519b26e58137c8b337a5119"); }
|
||||
|
||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "9b44bb39702b41240371815320e452d5"); } // official project VCF files in tabix format
|
||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "f1749a11f05b383a8df52c98670b2d9a"); } // official project VCF files in tabix format
|
||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "460bdbeaf7e9641395ac2ce6e1afc106"); } // official project VCF files in tabix format
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "02f0634d0176138e4195009eff7f2308"); }
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "67001f987e8d4aab237c506d6813970e"); }
|
||||
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "d443358367211b96762ae64d1461b587"); }
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "96e473ce30b3aca3e5f6c51b8fc562ea"); }
|
||||
|
||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "ff7bdac468045715d4ac0309d8736c23"); }
|
||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0e6379c4a8c6ddc47ddc8059cf72d7bc"); }
|
||||
|
||||
@Test public void threeWayWithRefs() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
@ -87,7 +87,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||
1,
|
||||
Arrays.asList("13cb75cf37ec370763a34910ba48e42f"));
|
||||
Arrays.asList("070dd17f49231576e4f8713e384c8026"));
|
||||
executeTest("threeWayWithRefs", spec);
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue