From 83a7012d69d95d5e1e9b60f768663c49e1ff6f36 Mon Sep 17 00:00:00 2001 From: Ron Levine Date: Wed, 9 Sep 2015 10:37:39 -0400 Subject: [PATCH] Mask snps with --snpmask --- ...astaAlternateReferenceIntegrationTest.java | 28 +++++++++++-- .../fasta/FastaAlternateReferenceMaker.java | 42 +++++++++++++++---- 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index 9916196b4..de63ed550 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -115,13 +115,33 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { } @Test - public void testIndelsAndSnpMask() { + public void testSnpMask() { + + WalkerTestSpec spec = new WalkerTestSpec( + "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + b36dbSNP129 + " --snpmask:vcf " + b36dbSNP129 + " -L 1:10,271,272-10,271,302 -o %s", + 1, + Arrays.asList("01a0dffc62fc940c97e29276457f1ff0")); + executeTest("test snp mask", spec); + } + + @Test + public void testSnpMaskPriority() { + + WalkerTestSpec spec = new WalkerTestSpec( + "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + b36dbSNP129 + " --snpmaskPriority --snpmask:vcf " + b36dbSNP129 + " -L 1:10,271,272-10,271,302 -o %s", + 1, + Arrays.asList("0950493e5038f7d588034ce4dd21292a")); + executeTest("test snp mask priority", spec); + } + + @Test + public void testIndelsAndSnpMask() { WalkerTestSpec spec = new WalkerTestSpec( "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s", - 1, - Arrays.asList("375efb2feb017f01339f680fdffac6cd")); - executeTest("test indels", spec); + 1, + Arrays.asList("375efb2feb017f01339f680fdffac6cd")); + executeTest("test indels and snp mask", spec); } @Test diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceMaker.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceMaker.java index f9c2eb9d5..5be2e8933 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceMaker.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceMaker.java @@ -102,12 +102,17 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker { protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); /** - * Snps from this file are used as a mask (inserting N's in the sequence) when constructing the alternate reference - * (regardless of whether they overlap a variant site). + * SNPs from this file are used as a mask (inserting N's in the sequence) when constructing the alternate reference */ @Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false) protected RodBinding snpmask; + /** + * Gives priority to a SNP mask over an input VCF for a site. Only has an effect if the --snpmask argument is used. + */ + @Argument(fullName="snpmaskPriority", shortName = "snpmaskPriority", doc="SNP mask priority", required=false) + protected Boolean snpmaskPriority = false; + /** * This option will generate an error if the specified sample does not exist in the VCF. * Non-diploid (or non-called) genotypes are ignored. @@ -138,6 +143,13 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker { final String refBase = String.valueOf((char)ref.getBase()); + // If we have a mask at this site, use it + if ( snpmaskPriority ){ + final Pair mask = maskSnp(tracker, context); + if ( mask != null ) + return mask; + } + // Check to see if we have a called snp for ( final VariantContext vc : tracker.getValues(variantCollection.variants, ref.getLocus()) ) { if ( vc.isFiltered() ) @@ -155,17 +167,33 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker { } } - // if we don't have a called site, and we have a mask at this site, mask it - for ( final VariantContext vc : tracker.getValues(snpmask) ) { - if ( vc.isSNP()) { - return new Pair<>(context.getLocation(), "N"); - } + if ( !snpmaskPriority ){ + final Pair mask = maskSnp(tracker, context); + if ( mask != null ) + return mask; } // if we got here then we're just ref return new Pair<>(context.getLocation(), refBase); } + /** + * Mask a SNP (inserting N's in the sequence) + * + * @param tracker the Reference Metadata available at a particular site in the genome + * @param context the locus context data + * @return mask at the locus or null if no SNP at that locus + */ + private Pair maskSnp(final RefMetaDataTracker tracker, final AlignmentContext context){ + for (final VariantContext vc : tracker.getValues(snpmask)) { + if (vc.isSNP()) { + return new Pair<>(context.getLocation(), "N"); + } + } + + return null; + } + /** * Returns the IUPAC encoding for the given genotype or the reference base if not possible *