From 8e3c3324fa03ba9d9932327f675ad77bb23e446a Mon Sep 17 00:00:00 2001 From: ebanks Date: Mon, 31 Aug 2009 04:32:32 +0000 Subject: [PATCH] Added filter for SNPs cleaned out by the realigner. It uses the realigner output for filtering; in addition, dbsnp indels partially work; IndelGenotyper calls don't yet work. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1489 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/CleanedOutSNPROD.java | 21 +++++++ .../gatk/refdata/ReferenceOrderedData.java | 1 + .../walkers/variants/VECIndelArtifact.java | 59 +++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100755 java/src/org/broadinstitute/sting/gatk/refdata/CleanedOutSNPROD.java create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECIndelArtifact.java diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/CleanedOutSNPROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/CleanedOutSNPROD.java new file mode 100755 index 000000000..608b93748 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/CleanedOutSNPROD.java @@ -0,0 +1,21 @@ +package org.broadinstitute.sting.gatk.refdata; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + + +public class CleanedOutSNPROD extends TabularROD { + + private static final String REAL_SNP_STRING = "SAME_SNP"; + private static final String FALSE_SNP_STRING = "NOT_SNP"; + + public CleanedOutSNPROD(String name) { + super(name); + } + + public GenomeLoc getLocation() { + return GenomeLocParser.parseGenomeLoc(this.get("0")); + } + + public boolean isRealSNP() { return this.get("1").equals(REAL_SNP_STRING); } + } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java index 3d3082c83..e198bf0c2 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java @@ -72,6 +72,7 @@ public class ReferenceOrderedData implements addModule("Table", TabularROD.class); addModule("PooledEM", PooledEMSNPROD.class); addModule("1KGSNPs", KGenomesSNPROD.class); + addModule("CleanedOutSNP", CleanedOutSNPROD.class); addModule("SangerSNP", SangerSNPROD.class); addModule("SimpleIndel", SimpleIndelROD.class); addModule("HapMapGenotype", HapMapGenotypeROD.class); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECIndelArtifact.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECIndelArtifact.java new file mode 100755 index 000000000..239176a17 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECIndelArtifact.java @@ -0,0 +1,59 @@ +package org.broadinstitute.sting.playground.gatk.walkers.variants; + +import org.broadinstitute.sting.gatk.contexts.VariantContext; +import org.broadinstitute.sting.gatk.refdata.*; + + +public class VECIndelArtifact implements VariantExclusionCriterion { + private boolean exclude; + private String source = "N/A"; + + public void initialize(String arguments) { + if (arguments != null && !arguments.isEmpty()) { + } + } + + public void compute(VariantContextWindow contextWindow) { + VariantContext context = contextWindow.getContext(); + RefMetaDataTracker tracker = context.getTracker(); + + CleanedOutSNPROD cleanedSNP = (CleanedOutSNPROD)tracker.lookup("cleaned", null); + if ( cleanedSNP != null && !cleanedSNP.isRealSNP() ) { + exclude = true; + source = "Cleaner"; + return; + } + + AllelicVariant indelCall = (AllelicVariant)tracker.lookup("indels", null); + // TODO - fix indel call capability to span full indel + if ( indelCall != null ) { + exclude = true; + source = "IndelCall"; + return; + } + + rodDbSNP dbsnp = (rodDbSNP)tracker.lookup("dbSNP", null); + // TODO - fix dbsnp capability to span full indel + if ( dbsnp != null && dbsnp.isIndel() ) { + exclude = true; + source = "dbsnp"; + return; + } + + exclude = false; + } + + public double inclusionProbability() { + return exclude ? 0.0 : 1.0; + } + + public String getStudyHeader() { + return "IndelArtifact\tSource"; + } + + public String getStudyInfo() { + return (exclude ? "fail" : "pass") + "\t" + source; + } + + public boolean useZeroQualityReads() { return false; } +} \ No newline at end of file