Added option to mask out SNP sites with "N"s in the new reference.

This is useful when producing Sequenom input files for validating indels...


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1414 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-08-12 15:17:45 +00:00
parent 43f63b7530
commit 7f1159b6a9
1 changed files with 9 additions and 1 deletions

View File

@ -5,15 +5,20 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.util.Iterator;
// create a fasta sequence file from a reference, intervals, and rod(s) of variants
// if there are multiple variants at a site, we take the first one seen
@WalkerName("FastaAlternateReferenceMaker")
@Requires(value={DataSource.REFERENCE})
public class FastaAlternateReferenceWalker extends RefWalker<Pair<GenomeLoc, String>, Pair<GenomeLoc, String>> {
@Argument(fullName="maskSNPs", shortName="mask", doc="print 'N' at SNP sites instead of the alternate allele", required=false)
private Boolean MASK_SNPS = false;
private StringBuffer sb = new StringBuffer();
int deletionBasesRemaining = 0;
@ -38,7 +43,10 @@ public class FastaAlternateReferenceWalker extends RefWalker<Pair<GenomeLoc, Str
} else if ( variant.isInsertion() ) {
return new Pair<GenomeLoc, String>(context.getLocation(), String.valueOf(ref.getBase()).concat(variant.getAltBasesFWD()));
} else if ( variant.isSNP() ) {
return new Pair<GenomeLoc, String>(context.getLocation(), variant.getAltBasesFWD());
if ( MASK_SNPS )
return new Pair<GenomeLoc, String>(context.getLocation(), "N");
else
return new Pair<GenomeLoc, String>(context.getLocation(), variant.getAltBasesFWD());
}
}