Do not slurp the whole set of snp mask sites into memory (gets pretty heavy on full dbSNP!); instantiate a privare ROD iterator instead and drag it across the sites we are designing probes for.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2694 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2010-01-26 22:39:46 +00:00
parent 47440bc029
commit 1f64c5d41a
1 changed files with 33 additions and 11 deletions

View File

@ -23,19 +23,20 @@ public class PickSequenomProbes extends RefWalker<String, String> {
@Argument(required=false, shortName="snp_mask", doc="positions to be masked with N's")
protected String SNP_MASK = null;
private Object[] mask_array = null;
private byte [] maskFlags = new byte[401];
private SeekableRODIterator<TabularROD> snpMaskIterator=null;
public void initialize() {
if ( SNP_MASK != null ) {
logger.info("Loading SNP mask... ");
mask_array = GenomeLocParser.parseIntervals(Arrays.asList(SNP_MASK), GenomeAnalysisEngine.instance.getArguments().intervalMerging).toArray();
ReferenceOrderedData<TabularROD> snp_mask = new ReferenceOrderedData<TabularROD>("snp_mask",
new java.io.File(SNP_MASK), TabularROD.class);
snpMaskIterator = snp_mask.iterator();
}
}
private boolean in_mask(GenomeLoc loc) {
return mask_array == null ? false : (Arrays.binarySearch(mask_array, loc) >= 0);
}
public String map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {
logger.debug("Probing " + ref.getLocus() + " " + ref.getWindow());
@ -57,16 +58,37 @@ public class PickSequenomProbes extends RefWalker<String, String> {
if ( variant == null )
return "";
String contig = context.getLocation().getContig();
long offset = context.getLocation().getStart();
long true_offset = offset - 200;
// we have variant; let's load all the snps falling into the current window and prepare the mask array:
if ( snpMaskIterator != null ) {
RODRecordList<TabularROD> snpList = snpMaskIterator.seekForward(GenomeLocParser.createGenomeLoc(contig,offset-200,offset+200));
if ( snpList != null && snpList.size() != 0 ) {
Iterator<TabularROD> snpsInWindow = snpList.iterator();
int i = 0;
while ( snpsInWindow.hasNext() ) {
GenomeLoc snp = snpsInWindow.next().getLocation();
int offsetInWindow = (int)(snp.getStart() - true_offset);
for ( ; i < offsetInWindow; i++ ) maskFlags[i] = 0;
maskFlags[i++] = 1;
}
for ( ; i < 401; i++ ) maskFlags[i] = 0;
} else {
// we got no snps, don't forget to clear the mask
for ( int i = 0 ; i < 401; i++ ) maskFlags[i] = 0;
}
}
char[] context_bases = ref.getBases();
long true_offset = offset - 200;
for (long i = 0; i < 401; i++) {
for (int i = 0; i < 401; i++) {
GenomeLoc loc = GenomeLocParser.parseGenomeLoc(contig + ":" + true_offset + "-" + true_offset);
if ( in_mask(loc) )
context_bases[(int)i] = 'N';
true_offset += 1;
if ( maskFlags[i]==1 ) {
context_bases[i] = 'N';
}
true_offset += 1;
}
String leading_bases = new String(Arrays.copyOfRange(context_bases, 0, 200));
String trailing_bases = new String(Arrays.copyOfRange(context_bases, 201, 401));