Refactor rsID and overlap detection in VariantOverlapAnnotator utility class
-- Variants will be considered matching if they have the same reference allele and at least 1 common alternative allele. This matching algorithm determines how rsID are added back into the VariantContext we want to annotate, and as well determining the overlap FLAG attribute field. -- Updated VariantAnnotator and VariantsToVCF to use this class, removing its old stale implementation -- Added unit tests for this VariantOverlapAnnotator class -- Removed GATKVCFUtils.rsIDOfFirstRealVariant as this is now better to use VariantOverlapAnnotator -- Now requires strict allele matching, without any option to just use site annotation.
This commit is contained in:
parent
3e979f30a9
commit
0d593cff70
|
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
|
||||
public class VariantOverlapAnnotatorUnitTest extends BaseTest {
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private IndexedFastaSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public void setup() throws FileNotFoundException {
|
||||
// sequence
|
||||
seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
genomeLocParser = new GenomeLocParser(seq);
|
||||
}
|
||||
|
||||
private VariantContext makeVC(final String source, final String id, final List<String> alleles) {
|
||||
final VariantContext vc = GATKVariantContextUtils.makeFromAlleles(source, "20", 10, alleles);
|
||||
return new VariantContextBuilder(vc).id(id).make();
|
||||
}
|
||||
|
||||
private VariantOverlapAnnotator makeAnnotator(final String dbSNP, final String ... overlaps) {
|
||||
final RodBinding<VariantContext> dbSNPBinding = dbSNP == null ? null : new RodBinding<>(VariantContext.class, dbSNP);
|
||||
final Map<RodBinding<VariantContext>, String> overlapBinding = new LinkedHashMap<>();
|
||||
for ( final String overlap : overlaps ) overlapBinding.put(new RodBinding<>(VariantContext.class, overlap), overlap);
|
||||
return new VariantOverlapAnnotator(dbSNPBinding, overlapBinding, genomeLocParser);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateWithSpecialNames() {
|
||||
final List<String> names = Arrays.asList("X", "Y", "Z");
|
||||
final Map<RodBinding<VariantContext>, String> overlapBinding = new LinkedHashMap<>();
|
||||
for ( final String overlap : names ) overlapBinding.put(new RodBinding<>(VariantContext.class, overlap + "Binding"), overlap);
|
||||
final VariantOverlapAnnotator annotator = new VariantOverlapAnnotator(null, overlapBinding, genomeLocParser);
|
||||
Assert.assertEquals(annotator.getOverlapNames(), names);
|
||||
}
|
||||
|
||||
@DataProvider(name = "AnnotateRsIDData")
|
||||
public Object[][] makeAnnotateRsIDData() {
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
final VariantContext callNoIDAC = makeVC("call", VCFConstants.EMPTY_ID_FIELD, Arrays.asList("A", "C"));
|
||||
final VariantContext callNoIDAT = makeVC("call", VCFConstants.EMPTY_ID_FIELD, Arrays.asList("A", "T"));
|
||||
final VariantContext callIDAC = makeVC("call", "foo", Arrays.asList("A", "C"));
|
||||
final VariantContext callExistingIDAC = makeVC("call", "rsID1", Arrays.asList("A", "C"));
|
||||
|
||||
final VariantContext dbSNP_AC = makeVC("DBSNP", "rsID1", Arrays.asList("A", "C"));
|
||||
final VariantContext dbSNP_AT = makeVC("DBSNP", "rsID2", Arrays.asList("A", "T"));
|
||||
final VariantContext dbSNP_AG = makeVC("DBSNP", "rsID3", Arrays.asList("A", "G"));
|
||||
final VariantContext dbSNP_AC_AT = makeVC("DBSNP", "rsID1;rsID2", Arrays.asList("A", "C", "T"));
|
||||
final VariantContext dbSNP_AC_AG = makeVC("DBSNP", "rsID1;rsID3", Arrays.asList("A", "C", "G"));
|
||||
|
||||
tests.add(new Object[]{callNoIDAC, Arrays.asList(dbSNP_AC), dbSNP_AC.getID(), true});
|
||||
tests.add(new Object[]{callNoIDAC, Arrays.asList(dbSNP_AT), VCFConstants.EMPTY_ID_FIELD, false});
|
||||
tests.add(new Object[]{callIDAC, Arrays.asList(dbSNP_AC), "foo" + ";" + dbSNP_AC.getID(), true});
|
||||
tests.add(new Object[]{callIDAC, Arrays.asList(dbSNP_AT), "foo", false});
|
||||
tests.add(new Object[]{callExistingIDAC, Arrays.asList(dbSNP_AC), "rsID1", true});
|
||||
tests.add(new Object[]{callExistingIDAC, Arrays.asList(dbSNP_AT), "rsID1", false});
|
||||
|
||||
final VariantContext callNoIDACT = makeVC("call", VCFConstants.EMPTY_ID_FIELD, Arrays.asList("A", "C", "T"));
|
||||
tests.add(new Object[]{callNoIDACT, Arrays.asList(dbSNP_AC), dbSNP_AC.getID(), true});
|
||||
tests.add(new Object[]{callNoIDACT, Arrays.asList(dbSNP_AT), dbSNP_AT.getID(), true});
|
||||
tests.add(new Object[]{callNoIDACT, Arrays.asList(dbSNP_AG), VCFConstants.EMPTY_ID_FIELD, false});
|
||||
tests.add(new Object[]{callNoIDACT, Arrays.asList(dbSNP_AC_AT), dbSNP_AC_AT.getID(), true});
|
||||
tests.add(new Object[]{callNoIDACT, Arrays.asList(dbSNP_AC_AG), dbSNP_AC_AG.getID(), true});
|
||||
|
||||
// multiple options
|
||||
tests.add(new Object[]{callNoIDAC, Arrays.asList(dbSNP_AC, dbSNP_AT), "rsID1", true});
|
||||
tests.add(new Object[]{callNoIDAC, Arrays.asList(dbSNP_AT, dbSNP_AC), "rsID1", true});
|
||||
tests.add(new Object[]{callNoIDAC, Arrays.asList(dbSNP_AC_AT), "rsID1;rsID2", true});
|
||||
tests.add(new Object[]{callNoIDAT, Arrays.asList(dbSNP_AC_AT), "rsID1;rsID2", true});
|
||||
tests.add(new Object[]{callNoIDAC, Arrays.asList(dbSNP_AC_AG), "rsID1;rsID3", true});
|
||||
tests.add(new Object[]{callNoIDAT, Arrays.asList(dbSNP_AC_AG), VCFConstants.EMPTY_ID_FIELD, false});
|
||||
|
||||
final VariantContext dbSNP_AC_FAIL = new VariantContextBuilder(makeVC("DBSNP", "rsID1", Arrays.asList("A", "C"))).filter("FAIL").make();
|
||||
tests.add(new Object[]{callNoIDAC, Arrays.asList(dbSNP_AC_FAIL), VCFConstants.EMPTY_ID_FIELD, false});
|
||||
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "AnnotateRsIDData")
|
||||
public void testAnnotateRsID(final VariantContext toAnnotate, final List<VariantContext> dbSNPRecords, final String expectedID, final boolean expectOverlap) throws Exception {
|
||||
final VariantOverlapAnnotator annotator = makeAnnotator("dbnsp");
|
||||
final VariantContext annotated = annotator.annotateRsID(dbSNPRecords, toAnnotate);
|
||||
Assert.assertNotNull(annotated);
|
||||
Assert.assertEquals(annotated.getID(), expectedID);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "AnnotateRsIDData")
|
||||
public void testAnnotateOverlaps(final VariantContext toAnnotate, final List<VariantContext> records, final String expectedID, final boolean expectOverlap) throws Exception {
|
||||
final String name = "binding";
|
||||
final VariantOverlapAnnotator annotator = makeAnnotator(null, name);
|
||||
final VariantContext annotated = annotator.annotateOverlap(records, name, toAnnotate);
|
||||
Assert.assertNotNull(annotated);
|
||||
Assert.assertEquals(annotated.getID(), toAnnotate.getID(), "Shouldn't modify annotation");
|
||||
Assert.assertEquals(annotated.hasAttribute(name), expectOverlap);
|
||||
if ( expectOverlap ) {
|
||||
Assert.assertEquals(annotated.getAttribute(name), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("19f77f557150905ef3fa4713f611a1b9"));
|
||||
Arrays.asList("14ad6eeed46e9b6f4757370267b1a1cc"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -101,7 +101,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("bb3dbad9666ebf38d338f0c9c211a42e"));
|
||||
Arrays.asList("cd184a2a5a1932dcf3e8f0424652176b"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -111,7 +111,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("8052390ca2b6a57c3ddf379a51225d64"));
|
||||
Arrays.asList("e8d98996eb81ece8cfb52437920ae2e0"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -121,7 +121,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("b6b9dba97fbabaeeb458a41051983e7b"));
|
||||
Arrays.asList("23a78c16f64bffe1dea3a5587fcabdad"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
|
||||
Arrays.asList("38730c7030271f5d0ca0b59365d57814"));
|
||||
Arrays.asList("294183823d678d3668f4fa98b4de6e06"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -176,7 +176,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("264325878b988acc11d8e5d9d2ba0b7f"));
|
||||
Arrays.asList("e90256acfc360fc4bf377094732a673a"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("698e54aeae3130779d246b9480a4052c"));
|
||||
Arrays.asList("60115af273fde49c76d4df6c9c0f6501"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -180,9 +180,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality threshold in order to annotate mendelian violation ratio")
|
||||
public double minGenotypeQualityP = 0.0;
|
||||
|
||||
@Argument(fullName="requireStrictAlleleMatch", shortName="strict", doc="If provided only comp tracks that exactly match both reference and alternate alleles will be counted as concordant", required=false)
|
||||
protected boolean requireStrictAlleleMatch = false;
|
||||
|
||||
private VariantAnnotatorEngine engine;
|
||||
|
||||
/**
|
||||
|
|
@ -204,7 +201,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
else
|
||||
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, annotationsToExclude, this, getToolkit());
|
||||
engine.initializeExpressions(expressionsToUse);
|
||||
engine.setRequireStrictAlleleMatch(requireStrictAlleleMatch);
|
||||
|
||||
// setup the header fields
|
||||
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
|
||||
|
|
|
|||
|
|
@ -34,26 +34,23 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class VariantAnnotatorEngine {
|
||||
|
||||
private List<InfoFieldAnnotation> requestedInfoAnnotations = Collections.emptyList();
|
||||
private List<GenotypeAnnotation> requestedGenotypeAnnotations = Collections.emptyList();
|
||||
private List<VAExpression> requestedExpressions = new ArrayList<VAExpression>();
|
||||
private List<VAExpression> requestedExpressions = new ArrayList<>();
|
||||
|
||||
private final HashMap<RodBinding<VariantContext>, String> dbAnnotations = new HashMap<RodBinding<VariantContext>, String>();
|
||||
private final AnnotatorCompatible walker;
|
||||
private final GenomeAnalysisEngine toolkit;
|
||||
|
||||
private boolean requireStrictAlleleMatch = false;
|
||||
VariantOverlapAnnotator variantOverlapAnnotator = null;
|
||||
|
||||
protected static class VAExpression {
|
||||
|
||||
|
|
@ -85,7 +82,7 @@ public class VariantAnnotatorEngine {
|
|||
requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations();
|
||||
requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations();
|
||||
excludeAnnotations(annotationsToExclude);
|
||||
initializeDBs();
|
||||
initializeDBs(toolkit);
|
||||
}
|
||||
|
||||
// use this constructor if you want to select specific annotations (and/or interfaces)
|
||||
|
|
@ -93,14 +90,7 @@ public class VariantAnnotatorEngine {
|
|||
this.walker = walker;
|
||||
this.toolkit = toolkit;
|
||||
initializeAnnotations(annotationGroupsToUse, annotationsToUse, annotationsToExclude);
|
||||
initializeDBs();
|
||||
}
|
||||
|
||||
// experimental constructor for active region traversal
|
||||
public VariantAnnotatorEngine(GenomeAnalysisEngine toolkit) {
|
||||
this.walker = null;
|
||||
this.toolkit = toolkit;
|
||||
requestedInfoAnnotations = AnnotationInterfaceManager.createInfoFieldAnnotations(Arrays.asList("ActiveRegionBasedAnnotation"), Collections.<String>emptyList());
|
||||
initializeDBs(toolkit);
|
||||
}
|
||||
|
||||
// select specific expressions to use
|
||||
|
|
@ -138,16 +128,19 @@ public class VariantAnnotatorEngine {
|
|||
requestedGenotypeAnnotations = tempRequestedGenotypeAnnotations;
|
||||
}
|
||||
|
||||
private void initializeDBs() {
|
||||
|
||||
private void initializeDBs(final GenomeAnalysisEngine engine) {
|
||||
// check to see whether comp rods were included
|
||||
final RodBinding<VariantContext> dbsnp = walker.getDbsnpRodBinding();
|
||||
if ( dbsnp != null && dbsnp.isBound() )
|
||||
dbAnnotations.put(dbsnp, VCFConstants.DBSNP_KEY);
|
||||
RodBinding<VariantContext> dbSNPBinding = walker.getDbsnpRodBinding();
|
||||
if ( dbSNPBinding != null && ! dbSNPBinding.isBound() )
|
||||
dbSNPBinding = null;
|
||||
|
||||
final List<RodBinding<VariantContext>> comps = walker.getCompRodBindings();
|
||||
for ( RodBinding<VariantContext> rod : comps )
|
||||
dbAnnotations.put(rod, rod.getName());
|
||||
final Map<RodBinding<VariantContext>, String> overlapBindings = new LinkedHashMap<>();
|
||||
for ( final RodBinding<VariantContext> b : walker.getCompRodBindings())
|
||||
if ( b.isBound() ) overlapBindings.put(b, b.getName());
|
||||
if ( dbSNPBinding != null && ! overlapBindings.keySet().contains(VCFConstants.DBSNP_KEY) )
|
||||
overlapBindings.put(dbSNPBinding, VCFConstants.DBSNP_KEY); // add overlap detection with DBSNP by default
|
||||
|
||||
variantOverlapAnnotator = new VariantOverlapAnnotator(dbSNPBinding, overlapBindings, engine.getGenomeLocParser());
|
||||
}
|
||||
|
||||
public void invokeAnnotationInitializationMethods( Set<VCFHeaderLine> headerLines ) {
|
||||
|
|
@ -161,14 +154,13 @@ public class VariantAnnotatorEngine {
|
|||
}
|
||||
|
||||
public Set<VCFHeaderLine> getVCFAnnotationDescriptions() {
|
||||
|
||||
Set<VCFHeaderLine> descriptions = new HashSet<VCFHeaderLine>();
|
||||
|
||||
for ( InfoFieldAnnotation annotation : requestedInfoAnnotations )
|
||||
descriptions.addAll(annotation.getDescriptions());
|
||||
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations )
|
||||
descriptions.addAll(annotation.getDescriptions());
|
||||
for ( String db : dbAnnotations.values() ) {
|
||||
for ( String db : variantOverlapAnnotator.getOverlapNames() ) {
|
||||
if ( VCFStandardHeaderLines.getInfoLine(db, false) != null )
|
||||
descriptions.add(VCFStandardHeaderLines.getInfoLine(db));
|
||||
else
|
||||
|
|
@ -178,10 +170,6 @@ public class VariantAnnotatorEngine {
|
|||
return descriptions;
|
||||
}
|
||||
|
||||
public void setRequireStrictAlleleMatch( final boolean requireStrictAlleleMatch ) {
|
||||
this.requireStrictAlleleMatch = requireStrictAlleleMatch;
|
||||
}
|
||||
|
||||
public VariantContext annotateContext(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
|
|
@ -192,13 +180,10 @@ public class VariantAnnotatorEngine {
|
|||
public VariantContext annotateContext(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
VariantContext vc,
|
||||
final VariantContext vc,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
|
||||
|
||||
// annotate db occurrences
|
||||
vc = annotateDBs(tracker, ref.getLocus(), vc, infoAnnotations);
|
||||
|
||||
// annotate expressions where available
|
||||
annotateExpressions(tracker, ref.getLocus(), infoAnnotations);
|
||||
|
||||
|
|
@ -213,7 +198,10 @@ public class VariantAnnotatorEngine {
|
|||
VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(infoAnnotations);
|
||||
|
||||
// annotate genotypes, creating another new VC in the process
|
||||
return builder.genotypes(annotateGenotypes(tracker, ref, stratifiedContexts, vc, perReadAlleleLikelihoodMap)).make();
|
||||
final VariantContext annotated = builder.genotypes(annotateGenotypes(tracker, ref, stratifiedContexts, vc, perReadAlleleLikelihoodMap)).make();
|
||||
|
||||
// annotate db occurrences
|
||||
return annotateDBs(tracker, annotated);
|
||||
}
|
||||
|
||||
public VariantContext annotateContext(final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap, VariantContext vc) {
|
||||
|
|
@ -241,66 +229,13 @@ public class VariantAnnotatorEngine {
|
|||
* Annotate the ID field and other DBs for the given Variant Context
|
||||
*
|
||||
* @param tracker ref meta data tracker (cannot be null)
|
||||
* @param loc location of the vc
|
||||
* @param vc variant context to annotate
|
||||
* @return non-null annotated version of vc
|
||||
*/
|
||||
@Requires({"tracker != null && loc != null && vc != null"})
|
||||
@Ensures("result != null")
|
||||
public VariantContext annotateDBs(final RefMetaDataTracker tracker, final GenomeLoc loc, VariantContext vc) {
|
||||
final Map<String, Object> newInfoAnnotations = new HashMap<String, Object>(0);
|
||||
vc = annotateDBs(tracker, loc, vc, newInfoAnnotations);
|
||||
|
||||
if ( !newInfoAnnotations.isEmpty() ) {
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(newInfoAnnotations);
|
||||
vc = builder.make();
|
||||
}
|
||||
|
||||
return vc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Annotate the ID field and other DBs for the given Variant Context
|
||||
*
|
||||
* @param tracker ref meta data tracker (cannot be null)
|
||||
* @param loc location of the vc
|
||||
* @param vc variant context to annotate
|
||||
* @param infoAnnotations info annotation map to populate
|
||||
* @return non-null annotated version of vc
|
||||
*/
|
||||
@Requires({"tracker != null && loc != null && vc != null && infoAnnotations != null"})
|
||||
@Ensures("result != null")
|
||||
private VariantContext annotateDBs(final RefMetaDataTracker tracker, final GenomeLoc loc, VariantContext vc, final Map<String, Object> infoAnnotations) {
|
||||
for ( Map.Entry<RodBinding<VariantContext>, String> dbSet : dbAnnotations.entrySet() ) {
|
||||
if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {
|
||||
final String rsID = GATKVCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), loc), vc.getType());
|
||||
|
||||
// add the ID if appropriate
|
||||
if ( rsID != null ) {
|
||||
// put the DB key into the INFO field
|
||||
infoAnnotations.put(VCFConstants.DBSNP_KEY, true);
|
||||
|
||||
if ( vc.emptyID() ) {
|
||||
vc = new VariantContextBuilder(vc).id(rsID).make();
|
||||
} else if ( walker.alwaysAppendDbsnpId() && vc.getID().indexOf(rsID) == -1 ) {
|
||||
final String newRsID = vc.getID() + VCFConstants.ID_FIELD_SEPARATOR + rsID;
|
||||
vc = new VariantContextBuilder(vc).id(newRsID).make();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
boolean overlapsComp = false;
|
||||
for ( VariantContext comp : tracker.getValues(dbSet.getKey(), loc) ) {
|
||||
if ( !comp.isFiltered() && ( !requireStrictAlleleMatch || comp.getAlleles().equals(vc.getAlleles()) ) ) {
|
||||
overlapsComp = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( overlapsComp )
|
||||
infoAnnotations.put(dbSet.getValue(), overlapsComp);
|
||||
}
|
||||
}
|
||||
|
||||
return vc;
|
||||
private VariantContext annotateDBs(final RefMetaDataTracker tracker, VariantContext vc) {
|
||||
return variantOverlapAnnotator.annotateOverlaps(tracker, variantOverlapAnnotator.annotateRsID(tracker, vc));
|
||||
}
|
||||
|
||||
private void annotateExpressions(final RefMetaDataTracker tracker, final GenomeLoc loc, final Map<String, Object> infoAnnotations) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,224 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Annotate the ID field and attribute overlap FLAGs for a VariantContext against a RefMetaDataTracker or a list
|
||||
* of VariantContexts
|
||||
*/
|
||||
public final class VariantOverlapAnnotator {
|
||||
final RodBinding<VariantContext> dbSNPBinding;
|
||||
final Map<RodBinding<VariantContext>, String> overlapBindings;
|
||||
final GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
* Create a new VariantOverlapAnnotator without overall bindings
|
||||
*
|
||||
* @see #VariantOverlapAnnotator(org.broadinstitute.sting.commandline.RodBinding, java.util.Map, org.broadinstitute.sting.utils.GenomeLocParser)
|
||||
*/
|
||||
public VariantOverlapAnnotator(RodBinding<VariantContext> dbSNPBinding, GenomeLocParser genomeLocParser) {
|
||||
this(dbSNPBinding, Collections.<RodBinding<VariantContext>, String>emptyMap(), genomeLocParser);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new VariantOverlapAnnotator
|
||||
*
|
||||
* @param dbSNPBinding the RodBinding to use for updating ID field values, or null if that behavior isn't desired
|
||||
* @param overlapBindings a map of RodBindings / name to use for overlap annotation. Each binding will be used to
|
||||
* add name => true for variants that overlap with variants found to a
|
||||
* RefMetaDataTracker at each location. Can be empty but not null
|
||||
* @param genomeLocParser the genome loc parser we'll use to create GenomeLocs for VariantContexts
|
||||
*/
|
||||
public VariantOverlapAnnotator(RodBinding<VariantContext> dbSNPBinding, Map<RodBinding<VariantContext>, String> overlapBindings, GenomeLocParser genomeLocParser) {
|
||||
if ( overlapBindings == null ) throw new IllegalArgumentException("overlapBindings cannot be null");
|
||||
if ( genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser cannot be null");
|
||||
|
||||
this.dbSNPBinding = dbSNPBinding;
|
||||
this.overlapBindings = overlapBindings;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update rsID in vcToAnnotate with rsIDs from dbSNPBinding fetched from tracker
|
||||
* @see #annotateOverlap(java.util.List, String, org.broadinstitute.variant.variantcontext.VariantContext)
|
||||
*
|
||||
* @param tracker non-null tracker, which we will use to update the rsID of vcToAnnotate
|
||||
* for VariantContexts bound to dbSNPBinding that start at vcToAnnotate
|
||||
* @param vcToAnnotate a variant context to annotate
|
||||
* @return a VariantContext (may be == to vcToAnnotate) with updated rsID value
|
||||
*/
|
||||
public VariantContext annotateRsID(final RefMetaDataTracker tracker, final VariantContext vcToAnnotate) {
|
||||
if ( dbSNPBinding != null ) {
|
||||
final GenomeLoc loc = getLoc(vcToAnnotate);
|
||||
return annotateRsID(tracker.getValues(dbSNPBinding, loc), vcToAnnotate);
|
||||
} else {
|
||||
return vcToAnnotate;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update rsID of vcToAnnotate with rsID match found in vcsAtLoc, if one exists
|
||||
*
|
||||
* @param vcsAtLoc a list of variant contexts starting at this location to use as sources for rsID values
|
||||
* @param vcToAnnotate a variant context to annotate
|
||||
* @return a VariantContext (may be == to vcToAnnotate) with updated rsID value
|
||||
*/
|
||||
public VariantContext annotateRsID(final List<VariantContext> vcsAtLoc, final VariantContext vcToAnnotate ) {
|
||||
final String rsID = getRsID(vcsAtLoc, vcToAnnotate);
|
||||
|
||||
// add the ID if appropriate
|
||||
if ( rsID != null ) {
|
||||
final VariantContextBuilder vcb = new VariantContextBuilder(vcToAnnotate);
|
||||
|
||||
if ( ! vcToAnnotate.hasID() ) {
|
||||
return vcb.id(rsID).make();
|
||||
} else if ( ! vcToAnnotate.getID().contains(rsID) ) {
|
||||
return vcb.id(vcToAnnotate.getID() + VCFConstants.ID_FIELD_SEPARATOR + rsID).make();
|
||||
} // falling through to return VC lower down
|
||||
}
|
||||
|
||||
// nothing to do, just return vc
|
||||
return vcToAnnotate;
|
||||
}
|
||||
|
||||
private GenomeLoc getLoc(final VariantContext vc) {
|
||||
return genomeLocParser.createGenomeLoc(vc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add overlap attributes to vcToAnnotate against all overlapBindings in tracker
|
||||
*
|
||||
* @see #annotateOverlap(java.util.List, , String, org.broadinstitute.variant.variantcontext.VariantContext)
|
||||
* for more information
|
||||
*
|
||||
* @param tracker non-null tracker, which we will use to update the rsID of vcToAnnotate
|
||||
* for VariantContexts bound to dbSNPBinding that start at vcToAnnotate
|
||||
* @param vcToAnnotate a variant context to annotate
|
||||
* @return a VariantContext (may be == to vcToAnnotate) with updated overlaps update fields value
|
||||
*/
|
||||
public VariantContext annotateOverlaps(final RefMetaDataTracker tracker, VariantContext vcToAnnotate) {
|
||||
if ( overlapBindings.isEmpty() ) return vcToAnnotate;
|
||||
|
||||
VariantContext annotated = vcToAnnotate;
|
||||
final GenomeLoc loc = getLoc(vcToAnnotate);
|
||||
for ( Map.Entry<RodBinding<VariantContext>, String> overlapBinding : overlapBindings.entrySet() ) {
|
||||
annotated = annotateOverlap(tracker.getValues(overlapBinding.getKey(), loc), overlapBinding.getValue(), vcToAnnotate);
|
||||
}
|
||||
|
||||
return annotated;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add overlaps flag attributes to vcToAnnotate binding overlapTestVCs.getSource() => true if
|
||||
* an overlapping variant context can be found in overlapTestVCs with vcToAnnotate
|
||||
*
|
||||
* Overlaps here means that the reference alleles are the same and at least one alt
|
||||
* allele in vcToAnnotate is equals to one of the alt alleles in overlapTestVCs
|
||||
*
|
||||
* @param overlapTestVCs a non-null list of potential overlaps that start at vcToAnnotate
|
||||
* @param attributeKey the key to set to true in the attribute map for vcToAnnotate if it overlaps
|
||||
* @param vcToAnnotate a non-null VariantContext to annotate
|
||||
* @return
|
||||
*/
|
||||
public VariantContext annotateOverlap(final List<VariantContext> overlapTestVCs, final String attributeKey, VariantContext vcToAnnotate) {
|
||||
if ( overlapBindings.isEmpty() ) return vcToAnnotate;
|
||||
|
||||
final boolean overlaps = overlaps(overlapTestVCs, vcToAnnotate);
|
||||
if ( overlaps ) {
|
||||
return new VariantContextBuilder(vcToAnnotate).attribute(attributeKey, true).make();
|
||||
} else {
|
||||
return vcToAnnotate;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the ID field of the first VariantContext in rsIDSourceVCs that has the same reference allele
|
||||
* as vcToAnnotate and all of the alternative alleles in vcToAnnotate.
|
||||
*
|
||||
* Doesn't require vcToAnnotate to be a complete match, so
|
||||
*
|
||||
* A/C/G in VC in rsIDSourceVCs
|
||||
*
|
||||
* would match the a VC with A/C but not A/T. Also we don't require all alleles to match
|
||||
* so we would also match A/C/T to A/C/G.
|
||||
*
|
||||
* Will only match rsIDSourceVCs that aren't failing filters.
|
||||
*
|
||||
* @param rsIDSourceVCs a non-null list of potential overlaps that start at vcToAnnotate
|
||||
* @param vcToAnnotate a non-null VariantContext to annotate
|
||||
* @return a String to use for the rsID from rsIDSourceVCs if one matches, or null if none matches
|
||||
*/
|
||||
private String getRsID(final List<VariantContext> rsIDSourceVCs, final VariantContext vcToAnnotate) {
|
||||
if ( rsIDSourceVCs == null ) throw new IllegalArgumentException("rsIDSourceVCs cannot be null");
|
||||
if ( vcToAnnotate == null ) throw new IllegalArgumentException("vcToAnnotate cannot be null");
|
||||
|
||||
for ( VariantContext vcComp : rsIDSourceVCs ) {
|
||||
if ( vcComp.isFiltered() ) continue; // don't process any failed VCs
|
||||
|
||||
if ( ! vcComp.getChr().equals(vcToAnnotate.getChr()) || vcComp.getStart() != vcToAnnotate.getStart() )
|
||||
throw new IllegalArgumentException("source rsID VariantContext " + vcComp + " doesn't start at the same position as vcToAnnotate " + vcToAnnotate);
|
||||
|
||||
if ( vcToAnnotate.getReference().equals(vcComp.getReference()) ) {
|
||||
for ( final Allele allele : vcToAnnotate.getAlternateAlleles() ) {
|
||||
if ( vcComp.getAlternateAlleles().contains(allele) )
|
||||
return vcComp.getID();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does vcToAnnotate overlap with any of the records in potentialOverlaps?
|
||||
*
|
||||
* @param potentialOverlaps a non-null list of potential overlaps that start at vcToAnnotate
|
||||
* @param vcToAnnotate a non-null VariantContext to annotate
|
||||
* @return true if vcToAnnotate overlaps (position and all alt alleles) with some variant in potentialOverlaps
|
||||
*/
|
||||
private boolean overlaps(final List<VariantContext> potentialOverlaps, final VariantContext vcToAnnotate) {
|
||||
return getRsID(potentialOverlaps, vcToAnnotate) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the collection of the RodBinding names for those being used for overlap detection
|
||||
* @return a non-null collection of Strings
|
||||
*/
|
||||
public Collection<String> getOverlapNames() {
|
||||
return overlapBindings.values();
|
||||
}
|
||||
}
|
||||
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
|||
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantOverlapAnnotator;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
|
@ -112,24 +113,21 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
|
||||
// for dealing with indels in hapmap
|
||||
CloseableIterator<GATKFeature> dbsnpIterator = null;
|
||||
VariantOverlapAnnotator variantOverlapAnnotator = null;
|
||||
|
||||
public void initialize() {
|
||||
vcfwriter = VariantContextWriterFactory.sortOnTheFly(baseWriter, 40, false);
|
||||
variantOverlapAnnotator = new VariantOverlapAnnotator(dbsnp.dbsnp, getToolkit().getGenomeLocParser());
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )
|
||||
return 0;
|
||||
|
||||
String rsID = dbsnp == null ? null : GATKVCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbsnp.dbsnp, context.getLocation()), VariantContext.Type.SNP);
|
||||
|
||||
Collection<VariantContext> contexts = getVariantContexts(tracker, ref);
|
||||
|
||||
for ( VariantContext vc : contexts ) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
if ( rsID != null && vc.emptyID() ) {
|
||||
builder.id(rsID).make();
|
||||
}
|
||||
|
||||
// set the appropriate sample name if necessary
|
||||
if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) {
|
||||
|
|
@ -137,7 +135,8 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
builder.genotypes(g);
|
||||
}
|
||||
|
||||
writeRecord(builder.make(), tracker, ref.getLocus());
|
||||
final VariantContext withID = variantOverlapAnnotator.annotateRsID(tracker, builder.make());
|
||||
writeRecord(withID, tracker, ref.getLocus());
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
|
|
|||
|
|
@ -149,21 +149,6 @@ public class GATKVCFUtils {
|
|||
return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary());
|
||||
}
|
||||
|
||||
public static String rsIDOfFirstRealVariant(List<VariantContext> VCs, VariantContext.Type type) {
|
||||
if ( VCs == null )
|
||||
return null;
|
||||
|
||||
String rsID = null;
|
||||
for ( VariantContext vc : VCs ) {
|
||||
if ( vc.getType() == type ) {
|
||||
rsID = vc.getID();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return rsID;
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility class to read all of the VC records from a file
|
||||
*
|
||||
|
|
|
|||
Loading…
Reference in New Issue