Mask snps with --snpmask

This commit is contained in:
Ron Levine 2015-09-09 10:37:39 -04:00
parent b0dea2ccca
commit 83a7012d69
2 changed files with 59 additions and 11 deletions

View File

@ -115,13 +115,33 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
} }
@Test @Test
public void testIndelsAndSnpMask() { public void testSnpMask() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + b36dbSNP129 + " --snpmask:vcf " + b36dbSNP129 + " -L 1:10,271,272-10,271,302 -o %s",
1,
Arrays.asList("01a0dffc62fc940c97e29276457f1ff0"));
executeTest("test snp mask", spec);
}
@Test
public void testSnpMaskPriority() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + b36dbSNP129 + " --snpmaskPriority --snpmask:vcf " + b36dbSNP129 + " -L 1:10,271,272-10,271,302 -o %s",
1,
Arrays.asList("0950493e5038f7d588034ce4dd21292a"));
executeTest("test snp mask priority", spec);
}
@Test
public void testIndelsAndSnpMask() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s", "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
1, 1,
Arrays.asList("375efb2feb017f01339f680fdffac6cd")); Arrays.asList("375efb2feb017f01339f680fdffac6cd"));
executeTest("test indels", spec); executeTest("test indels and snp mask", spec);
} }
@Test @Test

View File

@ -102,12 +102,17 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
/** /**
* Snps from this file are used as a mask (inserting N's in the sequence) when constructing the alternate reference * SNPs from this file are used as a mask (inserting N's in the sequence) when constructing the alternate reference
* (regardless of whether they overlap a variant site).
*/ */
@Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false) @Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false)
protected RodBinding<VariantContext> snpmask; protected RodBinding<VariantContext> snpmask;
/**
* Gives priority to a SNP mask over an input VCF for a site. Only has an effect if the --snpmask argument is used.
*/
@Argument(fullName="snpmaskPriority", shortName = "snpmaskPriority", doc="SNP mask priority", required=false)
protected Boolean snpmaskPriority = false;
/** /**
* This option will generate an error if the specified sample does not exist in the VCF. * This option will generate an error if the specified sample does not exist in the VCF.
* Non-diploid (or non-called) genotypes are ignored. * Non-diploid (or non-called) genotypes are ignored.
@ -138,6 +143,13 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
final String refBase = String.valueOf((char)ref.getBase()); final String refBase = String.valueOf((char)ref.getBase());
// If we have a mask at this site, use it
if ( snpmaskPriority ){
final Pair<GenomeLoc, String> mask = maskSnp(tracker, context);
if ( mask != null )
return mask;
}
// Check to see if we have a called snp // Check to see if we have a called snp
for ( final VariantContext vc : tracker.getValues(variantCollection.variants, ref.getLocus()) ) { for ( final VariantContext vc : tracker.getValues(variantCollection.variants, ref.getLocus()) ) {
if ( vc.isFiltered() ) if ( vc.isFiltered() )
@ -155,17 +167,33 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
} }
} }
// if we don't have a called site, and we have a mask at this site, mask it if ( !snpmaskPriority ){
for ( final VariantContext vc : tracker.getValues(snpmask) ) { final Pair<GenomeLoc, String> mask = maskSnp(tracker, context);
if ( vc.isSNP()) { if ( mask != null )
return new Pair<>(context.getLocation(), "N"); return mask;
}
} }
// if we got here then we're just ref // if we got here then we're just ref
return new Pair<>(context.getLocation(), refBase); return new Pair<>(context.getLocation(), refBase);
} }
/**
* Mask a SNP (inserting N's in the sequence)
*
* @param tracker the Reference Metadata available at a particular site in the genome
* @param context the locus context data
* @return mask at the locus or null if no SNP at that locus
*/
private Pair<GenomeLoc, String> maskSnp(final RefMetaDataTracker tracker, final AlignmentContext context){
for (final VariantContext vc : tracker.getValues(snpmask)) {
if (vc.isSNP()) {
return new Pair<>(context.getLocation(), "N");
}
}
return null;
}
/** /**
* Returns the IUPAC encoding for the given genotype or the reference base if not possible * Returns the IUPAC encoding for the given genotype or the reference base if not possible
* *