From ea8fd506bfb92b473da3351dd407249b629598ac Mon Sep 17 00:00:00 2001 From: chartl Date: Fri, 9 Jul 2010 04:03:19 +0000 Subject: [PATCH] Update to PickSequenomProbes: Option to ignore mask sites within X bp of a variant (very useful for indels where dbSNP entries near the indel are almost always false SNP calls). Also fixed an integration test where the variant site itself, being in dbSNP, was represented as [N/C] rather than [A/C]. Added integration test for 1bp no-mask window. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3753 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/sequenom/PickSequenomProbes.java | 26 ++++++++++++++++++- .../PickSequenomProbesIntegrationTest.java | 16 +++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java b/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java index 78f9c0540..0254c3bac 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java @@ -29,6 +29,7 @@ import net.sf.samtools.util.CloseableIterator; import org.broad.tribble.dbsnp.DbSNPCodec; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder; @@ -63,6 +64,8 @@ public class PickSequenomProbes extends RodWalker { boolean omitWindow = false; @Argument(required = false, fullName="usePlinkRODNamingConvention", shortName="nameConvention",doc="Use the naming convention defined in PLINKROD") boolean useNamingConvention = false; + @Argument(required = false, fullName="noMaskWindow",shortName="nmw",doc="Do not mask bases within X bases of an event when designing probes") + int noMaskWindow = 0; private byte [] maskFlags = new byte[401]; @@ -139,7 +142,7 @@ public class PickSequenomProbes extends RodWalker { byte[] context_bases = ref.getBases(); for (int i = 0; i < 401; i++) { - if ( maskFlags[i] == 1 ) { + if ( maskFlags[i] == 1 && ( i < 200 - noMaskWindow || i > 200 + getNoMaskWindowRightEnd(vc,noMaskWindow) ) ) { context_bases[i] = 'N'; } true_offset += 1; @@ -188,5 +191,26 @@ public class PickSequenomProbes extends RodWalker { return ""; } + private int getNoMaskWindowRightEnd(VariantContext vc, int window) { + if ( window == 0 ) { + return 0; + } + + if ( vc.isInsertion() ) { + return window-1; + } + + int max = 0; + for (Allele a : vc.getAlleles() ) { + if ( vc.isInsertion() ) { + logger.debug("Getting length of allele "+a.toString()+" it is "+a.getBases().length+" (ref allele is "+vc.getReference().toString()+")"); + } + if ( a.getBases().length > max ) { + max = a.getBases().length; + } + } + return max+window-1; + } + public void onTraversalDone(String sum) {} } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java index 348f27880..9a3ff8d3d 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java @@ -23,7 +23,21 @@ public class PickSequenomProbesIntegrationTest extends WalkerTest { + "-project_id 1kgp3_s4_lf -T PickSequenomProbes -L " + validationDataLocation + "pickSeqIntegrationTest.interval_list -B input,VCF4,"+testVCF+" -o %s"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("cb1f57e8bcaec4b599be075b6d5288a1")); + Arrays.asList("8b5b715b9918a0b70f4868614f197b72")); + executeTest("Test probes", spec); + } + + // 03c8cef968ae2d0ef5f51ac82b24f891 + + @Test + public void testProbesUsingDbSNPMaskWithNMW1() { + String testVCF = validationDataLocation + "pickSeqIntegrationTest.vcf"; + String testArgs = "-snp_mask " + GATKDataLocation + "/dbsnp_130_b36.rod -R " + + oneKGLocation + "reference/human_b36_both.fasta -omitWindow -nameConvention " + + "-nmw 1 -project_id 1kgp3_s4_lf -T PickSequenomProbes -L " + validationDataLocation + + "pickSeqIntegrationTest.interval_list -B input,VCF4,"+testVCF+" -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, + Arrays.asList("03c8cef968ae2d0ef5f51ac82b24f891")); executeTest("Test probes", spec); } }