Update to PickSequenomProbes: Option to ignore mask sites within X bp of a variant (very useful for indels where dbSNP entries near the indel are almost always false SNP calls). Also fixed an integration test where the variant site itself, being in dbSNP, was represented as [N/C] rather than [A/C]. Added integration test for 1bp no-mask window.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3753 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
179067e3f4
commit
ea8fd506bf
|
|
@ -29,6 +29,7 @@ import net.sf.samtools.util.CloseableIterator;
|
|||
import org.broad.tribble.dbsnp.DbSNPCodec;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
|
||||
|
|
@ -63,6 +64,8 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
|||
boolean omitWindow = false;
|
||||
@Argument(required = false, fullName="usePlinkRODNamingConvention", shortName="nameConvention",doc="Use the naming convention defined in PLINKROD")
|
||||
boolean useNamingConvention = false;
|
||||
@Argument(required = false, fullName="noMaskWindow",shortName="nmw",doc="Do not mask bases within X bases of an event when designing probes")
|
||||
int noMaskWindow = 0;
|
||||
|
||||
private byte [] maskFlags = new byte[401];
|
||||
|
||||
|
|
@ -139,7 +142,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
|||
|
||||
byte[] context_bases = ref.getBases();
|
||||
for (int i = 0; i < 401; i++) {
|
||||
if ( maskFlags[i] == 1 ) {
|
||||
if ( maskFlags[i] == 1 && ( i < 200 - noMaskWindow || i > 200 + getNoMaskWindowRightEnd(vc,noMaskWindow) ) ) {
|
||||
context_bases[i] = 'N';
|
||||
}
|
||||
true_offset += 1;
|
||||
|
|
@ -188,5 +191,26 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
|||
return "";
|
||||
}
|
||||
|
||||
private int getNoMaskWindowRightEnd(VariantContext vc, int window) {
|
||||
if ( window == 0 ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( vc.isInsertion() ) {
|
||||
return window-1;
|
||||
}
|
||||
|
||||
int max = 0;
|
||||
for (Allele a : vc.getAlleles() ) {
|
||||
if ( vc.isInsertion() ) {
|
||||
logger.debug("Getting length of allele "+a.toString()+" it is "+a.getBases().length+" (ref allele is "+vc.getReference().toString()+")");
|
||||
}
|
||||
if ( a.getBases().length > max ) {
|
||||
max = a.getBases().length;
|
||||
}
|
||||
}
|
||||
return max+window-1;
|
||||
}
|
||||
|
||||
public void onTraversalDone(String sum) {}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,21 @@ public class PickSequenomProbesIntegrationTest extends WalkerTest {
|
|||
+ "-project_id 1kgp3_s4_lf -T PickSequenomProbes -L " + validationDataLocation +
|
||||
"pickSeqIntegrationTest.interval_list -B input,VCF4,"+testVCF+" -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||
Arrays.asList("cb1f57e8bcaec4b599be075b6d5288a1"));
|
||||
Arrays.asList("8b5b715b9918a0b70f4868614f197b72"));
|
||||
executeTest("Test probes", spec);
|
||||
}
|
||||
|
||||
// 03c8cef968ae2d0ef5f51ac82b24f891
|
||||
|
||||
@Test
|
||||
public void testProbesUsingDbSNPMaskWithNMW1() {
|
||||
String testVCF = validationDataLocation + "pickSeqIntegrationTest.vcf";
|
||||
String testArgs = "-snp_mask " + GATKDataLocation + "/dbsnp_130_b36.rod -R "
|
||||
+ oneKGLocation + "reference/human_b36_both.fasta -omitWindow -nameConvention "
|
||||
+ "-nmw 1 -project_id 1kgp3_s4_lf -T PickSequenomProbes -L " + validationDataLocation +
|
||||
"pickSeqIntegrationTest.interval_list -B input,VCF4,"+testVCF+" -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||
Arrays.asList("03c8cef968ae2d0ef5f51ac82b24f891"));
|
||||
executeTest("Test probes", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue