Mask snps with --snpmask
This commit is contained in:
parent
b0dea2ccca
commit
83a7012d69
|
|
@ -115,13 +115,33 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIndelsAndSnpMask() {
|
public void testSnpMask() {
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + b36dbSNP129 + " --snpmask:vcf " + b36dbSNP129 + " -L 1:10,271,272-10,271,302 -o %s",
|
||||||
|
1,
|
||||||
|
Arrays.asList("01a0dffc62fc940c97e29276457f1ff0"));
|
||||||
|
executeTest("test snp mask", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSnpMaskPriority() {
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + b36dbSNP129 + " --snpmaskPriority --snpmask:vcf " + b36dbSNP129 + " -L 1:10,271,272-10,271,302 -o %s",
|
||||||
|
1,
|
||||||
|
Arrays.asList("0950493e5038f7d588034ce4dd21292a"));
|
||||||
|
executeTest("test snp mask priority", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIndelsAndSnpMask() {
|
||||||
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
|
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("375efb2feb017f01339f680fdffac6cd"));
|
Arrays.asList("375efb2feb017f01339f680fdffac6cd"));
|
||||||
executeTest("test indels", spec);
|
executeTest("test indels and snp mask", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
||||||
|
|
@ -102,12 +102,17 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
|
||||||
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Snps from this file are used as a mask (inserting N's in the sequence) when constructing the alternate reference
|
* SNPs from this file are used as a mask (inserting N's in the sequence) when constructing the alternate reference
|
||||||
* (regardless of whether they overlap a variant site).
|
|
||||||
*/
|
*/
|
||||||
@Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false)
|
@Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false)
|
||||||
protected RodBinding<VariantContext> snpmask;
|
protected RodBinding<VariantContext> snpmask;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gives priority to a SNP mask over an input VCF for a site. Only has an effect if the --snpmask argument is used.
|
||||||
|
*/
|
||||||
|
@Argument(fullName="snpmaskPriority", shortName = "snpmaskPriority", doc="SNP mask priority", required=false)
|
||||||
|
protected Boolean snpmaskPriority = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This option will generate an error if the specified sample does not exist in the VCF.
|
* This option will generate an error if the specified sample does not exist in the VCF.
|
||||||
* Non-diploid (or non-called) genotypes are ignored.
|
* Non-diploid (or non-called) genotypes are ignored.
|
||||||
|
|
@ -138,6 +143,13 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
|
||||||
|
|
||||||
final String refBase = String.valueOf((char)ref.getBase());
|
final String refBase = String.valueOf((char)ref.getBase());
|
||||||
|
|
||||||
|
// If we have a mask at this site, use it
|
||||||
|
if ( snpmaskPriority ){
|
||||||
|
final Pair<GenomeLoc, String> mask = maskSnp(tracker, context);
|
||||||
|
if ( mask != null )
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
|
|
||||||
// Check to see if we have a called snp
|
// Check to see if we have a called snp
|
||||||
for ( final VariantContext vc : tracker.getValues(variantCollection.variants, ref.getLocus()) ) {
|
for ( final VariantContext vc : tracker.getValues(variantCollection.variants, ref.getLocus()) ) {
|
||||||
if ( vc.isFiltered() )
|
if ( vc.isFiltered() )
|
||||||
|
|
@ -155,17 +167,33 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we don't have a called site, and we have a mask at this site, mask it
|
if ( !snpmaskPriority ){
|
||||||
for ( final VariantContext vc : tracker.getValues(snpmask) ) {
|
final Pair<GenomeLoc, String> mask = maskSnp(tracker, context);
|
||||||
if ( vc.isSNP()) {
|
if ( mask != null )
|
||||||
return new Pair<>(context.getLocation(), "N");
|
return mask;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we got here then we're just ref
|
// if we got here then we're just ref
|
||||||
return new Pair<>(context.getLocation(), refBase);
|
return new Pair<>(context.getLocation(), refBase);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mask a SNP (inserting N's in the sequence)
|
||||||
|
*
|
||||||
|
* @param tracker the Reference Metadata available at a particular site in the genome
|
||||||
|
* @param context the locus context data
|
||||||
|
* @return mask at the locus or null if no SNP at that locus
|
||||||
|
*/
|
||||||
|
private Pair<GenomeLoc, String> maskSnp(final RefMetaDataTracker tracker, final AlignmentContext context){
|
||||||
|
for (final VariantContext vc : tracker.getValues(snpmask)) {
|
||||||
|
if (vc.isSNP()) {
|
||||||
|
return new Pair<>(context.getLocation(), "N");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the IUPAC encoding for the given genotype or the reference base if not possible
|
* Returns the IUPAC encoding for the given genotype or the reference base if not possible
|
||||||
*
|
*
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue