Mask snps with --snpmask
This commit is contained in:
parent
b0dea2ccca
commit
83a7012d69
|
|
@ -115,13 +115,33 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testIndelsAndSnpMask() {
|
||||
public void testSnpMask() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + b36dbSNP129 + " --snpmask:vcf " + b36dbSNP129 + " -L 1:10,271,272-10,271,302 -o %s",
|
||||
1,
|
||||
Arrays.asList("01a0dffc62fc940c97e29276457f1ff0"));
|
||||
executeTest("test snp mask", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSnpMaskPriority() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + b36dbSNP129 + " --snpmaskPriority --snpmask:vcf " + b36dbSNP129 + " -L 1:10,271,272-10,271,302 -o %s",
|
||||
1,
|
||||
Arrays.asList("0950493e5038f7d588034ce4dd21292a"));
|
||||
executeTest("test snp mask priority", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIndelsAndSnpMask() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
|
||||
1,
|
||||
Arrays.asList("375efb2feb017f01339f680fdffac6cd"));
|
||||
executeTest("test indels", spec);
|
||||
1,
|
||||
Arrays.asList("375efb2feb017f01339f680fdffac6cd"));
|
||||
executeTest("test indels and snp mask", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -102,12 +102,17 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
|
|||
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||
|
||||
/**
|
||||
* Snps from this file are used as a mask (inserting N's in the sequence) when constructing the alternate reference
|
||||
* (regardless of whether they overlap a variant site).
|
||||
* SNPs from this file are used as a mask (inserting N's in the sequence) when constructing the alternate reference
|
||||
*/
|
||||
@Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false)
|
||||
protected RodBinding<VariantContext> snpmask;
|
||||
|
||||
/**
|
||||
* Gives priority to a SNP mask over an input VCF for a site. Only has an effect if the --snpmask argument is used.
|
||||
*/
|
||||
@Argument(fullName="snpmaskPriority", shortName = "snpmaskPriority", doc="SNP mask priority", required=false)
|
||||
protected Boolean snpmaskPriority = false;
|
||||
|
||||
/**
|
||||
* This option will generate an error if the specified sample does not exist in the VCF.
|
||||
* Non-diploid (or non-called) genotypes are ignored.
|
||||
|
|
@ -138,6 +143,13 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
|
|||
|
||||
final String refBase = String.valueOf((char)ref.getBase());
|
||||
|
||||
// If we have a mask at this site, use it
|
||||
if ( snpmaskPriority ){
|
||||
final Pair<GenomeLoc, String> mask = maskSnp(tracker, context);
|
||||
if ( mask != null )
|
||||
return mask;
|
||||
}
|
||||
|
||||
// Check to see if we have a called snp
|
||||
for ( final VariantContext vc : tracker.getValues(variantCollection.variants, ref.getLocus()) ) {
|
||||
if ( vc.isFiltered() )
|
||||
|
|
@ -155,17 +167,33 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
|
|||
}
|
||||
}
|
||||
|
||||
// if we don't have a called site, and we have a mask at this site, mask it
|
||||
for ( final VariantContext vc : tracker.getValues(snpmask) ) {
|
||||
if ( vc.isSNP()) {
|
||||
return new Pair<>(context.getLocation(), "N");
|
||||
}
|
||||
if ( !snpmaskPriority ){
|
||||
final Pair<GenomeLoc, String> mask = maskSnp(tracker, context);
|
||||
if ( mask != null )
|
||||
return mask;
|
||||
}
|
||||
|
||||
// if we got here then we're just ref
|
||||
return new Pair<>(context.getLocation(), refBase);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mask a SNP (inserting N's in the sequence)
|
||||
*
|
||||
* @param tracker the Reference Metadata available at a particular site in the genome
|
||||
* @param context the locus context data
|
||||
* @return mask at the locus or null if no SNP at that locus
|
||||
*/
|
||||
private Pair<GenomeLoc, String> maskSnp(final RefMetaDataTracker tracker, final AlignmentContext context){
|
||||
for (final VariantContext vc : tracker.getValues(snpmask)) {
|
||||
if (vc.isSNP()) {
|
||||
return new Pair<>(context.getLocation(), "N");
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the IUPAC encoding for the given genotype or the reference base if not possible
|
||||
*
|
||||
|
|
|
|||
Loading…
Reference in New Issue