Can design assays when multiple (distinct) events occur at the same locus (one assay per event)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4110 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
dc9e4098b2
commit
23dbaa68e6
|
|
@ -19,8 +19,8 @@
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.sequenom;
|
package org.broadinstitute.sting.gatk.walkers.sequenom;
|
||||||
|
|
@ -45,9 +45,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.*;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -72,6 +70,8 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
||||||
boolean useNamingConvention = false;
|
boolean useNamingConvention = false;
|
||||||
@Argument(required = false, fullName="noMaskWindow",shortName="nmw",doc="Do not mask bases within X bases of an event when designing probes")
|
@Argument(required = false, fullName="noMaskWindow",shortName="nmw",doc="Do not mask bases within X bases of an event when designing probes")
|
||||||
int noMaskWindow = 0;
|
int noMaskWindow = 0;
|
||||||
|
@Argument(required = false, shortName="counter", doc = "If specified, unique count id (ordinal number) is added to the end of each assay name")
|
||||||
|
boolean addCounter = false;
|
||||||
|
|
||||||
private byte [] maskFlags = new byte[401];
|
private byte [] maskFlags = new byte[401];
|
||||||
|
|
||||||
|
|
@ -79,6 +79,10 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
||||||
|
|
||||||
private GenomeLoc positionOfLastVariant = null;
|
private GenomeLoc positionOfLastVariant = null;
|
||||||
|
|
||||||
|
private int cnt = 0;
|
||||||
|
|
||||||
|
private List<GenomeLoc> processedVariantsInScope = new LinkedList<GenomeLoc>();
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
if ( SNP_MASK != null ) {
|
if ( SNP_MASK != null ) {
|
||||||
logger.info("Loading SNP mask... ");
|
logger.info("Loading SNP mask... ");
|
||||||
|
|
@ -97,6 +101,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public String map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public String map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if ( tracker == null )
|
if ( tracker == null )
|
||||||
return "";
|
return "";
|
||||||
|
|
@ -104,87 +109,127 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
||||||
logger.debug("Probing " + ref.getLocus() + " " + ref.getWindow());
|
logger.debug("Probing " + ref.getLocus() + " " + ref.getWindow());
|
||||||
|
|
||||||
Collection<VariantContext> VCs = tracker.getAllVariantContexts(ref);
|
Collection<VariantContext> VCs = tracker.getAllVariantContexts(ref);
|
||||||
if ( VCs.size() == 0 )
|
if ( VCs.size() == 0 ) {
|
||||||
|
logger.debug(" Context empty");
|
||||||
return "";
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
// if there are multiple variants at this position, just take the first one
|
if ( VCs.size() > 1 ) {
|
||||||
VariantContext vc = VCs.iterator().next();
|
logger.debug(" "+VCs.size()+ " variants at the locus");
|
||||||
|
}
|
||||||
|
|
||||||
// we can only deal with biallelic sites for now
|
// little optimization: since we may have few events at the current site on the reference,
|
||||||
if ( !vc.isBiallelic() )
|
// we are going to make sure we compute the mask and ref bases only once for each location and only if we need to
|
||||||
return "";
|
boolean haveMaskForWindow = false;
|
||||||
|
boolean haveBasesForWindow = false;
|
||||||
|
String leading_bases = null;
|
||||||
|
String trailing_bases = null;
|
||||||
|
|
||||||
// we don't want to see the same multi-base deletion multiple times
|
StringBuilder assaysForLocus = new StringBuilder(""); // all assays for current locus will be collected here (will be multi-line if multiple events are assayed)
|
||||||
if ( positionOfLastVariant != null &&
|
|
||||||
positionOfLastVariant.size() > 1 &&
|
|
||||||
positionOfLastVariant.equals(VariantContextUtils.getLocation(vc)) )
|
|
||||||
return "";
|
|
||||||
positionOfLastVariant = VariantContextUtils.getLocation(vc);
|
|
||||||
|
|
||||||
String contig = context.getLocation().getContig();
|
// get all variant contexts!!!!
|
||||||
long offset = context.getLocation().getStart();
|
for ( VariantContext vc : VCs ) {
|
||||||
long true_offset = offset - 200;
|
|
||||||
|
|
||||||
// we have variant; let's load all the snps falling into the current window and prepare the mask array:
|
// we can only deal with biallelic sites for now
|
||||||
if ( snpMaskIterator != null ) {
|
if ( !vc.isBiallelic() ) {
|
||||||
// clear the mask
|
logger.debug(" Not biallelic; skipped");
|
||||||
for ( int i = 0 ; i < 401; i++ )
|
continue;
|
||||||
maskFlags[i] = 0;
|
}
|
||||||
|
|
||||||
|
// we don't want to see the same multi-base event (deletion, DNP etc) multiple times.
|
||||||
|
// All the vcs we are currently seeing are clearly on the same contig as the current reference
|
||||||
|
// poisiton (or we would not see them at all!). All we need to check is if the vc starts at the
|
||||||
|
// current reference position (i.e. it is the first time we see it) or not (i.e. we saw it already).
|
||||||
|
if ( ref.getLocus().getStart() != vc.getStart() )
|
||||||
|
continue;
|
||||||
|
|
||||||
RODRecordList snpList = snpMaskIterator.seekForward(GenomeLocParser.createGenomeLoc(contig,offset-200,offset+200));
|
if ( ! haveMaskForWindow ) {
|
||||||
if ( snpList != null && snpList.size() != 0 ) {
|
String contig = context.getLocation().getContig();
|
||||||
Iterator<GATKFeature> snpsInWindow = snpList.iterator();
|
long offset = context.getLocation().getStart();
|
||||||
while ( snpsInWindow.hasNext() ) {
|
long true_offset = offset - 200;
|
||||||
GenomeLoc snp = snpsInWindow.next().getLocation();
|
|
||||||
// we don't really want to mask out multi-base indels
|
// we have variant; let's load all the snps falling into the current window and prepare the mask array.
|
||||||
if ( snp.size() > 1 )
|
// we need to do it only once per window, regardless of how many vcs we may have at this location!
|
||||||
continue;
|
if ( snpMaskIterator != null ) {
|
||||||
int offsetInWindow = (int)(snp.getStart() - true_offset);
|
// clear the mask
|
||||||
maskFlags[offsetInWindow] = 1;
|
for ( int i = 0 ; i < 401; i++ )
|
||||||
|
maskFlags[i] = 0;
|
||||||
|
|
||||||
|
RODRecordList snpList = snpMaskIterator.seekForward(GenomeLocParser.createGenomeLoc(contig,offset-200,offset+200));
|
||||||
|
if ( snpList != null && snpList.size() != 0 ) {
|
||||||
|
Iterator<GATKFeature> snpsInWindow = snpList.iterator();
|
||||||
|
int i = 0;
|
||||||
|
while ( snpsInWindow.hasNext() ) {
|
||||||
|
GenomeLoc snp = snpsInWindow.next().getLocation();
|
||||||
|
// we don't really want to mask out multi-base indels
|
||||||
|
if ( snp.size() > 1 )
|
||||||
|
continue;
|
||||||
|
logger.debug(" SNP at "+snp.getStart());
|
||||||
|
int offsetInWindow = (int)(snp.getStart() - true_offset);
|
||||||
|
maskFlags[offsetInWindow] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
haveMaskForWindow = true; // if we use masking, we will probably need to recompute the window...
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
byte[] context_bases = ref.getBases();
|
if ( ! haveBasesForWindow ) {
|
||||||
for (int i = 0; i < 401; i++) {
|
byte[] context_bases = ref.getBases();
|
||||||
if ( maskFlags[i] == 1 && ( i < 200 - noMaskWindow || i > 200 + getNoMaskWindowRightEnd(vc,noMaskWindow) ) ) {
|
for (int i = 0; i < 401; i++) {
|
||||||
context_bases[i] = 'N';
|
if ( maskFlags[i] == 1 && ( i < 200 - noMaskWindow || i > 200 + getNoMaskWindowRightEnd(vc,noMaskWindow) ) ) {
|
||||||
|
context_bases[i] = 'N';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
leading_bases = new String(Arrays.copyOfRange(context_bases, 0, 200));
|
||||||
|
trailing_bases = new String(Arrays.copyOfRange(context_bases, 201, 401));
|
||||||
|
// masked bases are not gonna change for the current window, unless we use windowed masking;
|
||||||
|
// in the latter case the bases (N's) will depend on the event we are currently looking at,
|
||||||
|
// so we better recompute..
|
||||||
|
if ( noMaskWindow == 0 ) haveBasesForWindow = true;
|
||||||
}
|
}
|
||||||
true_offset += 1;
|
|
||||||
}
|
|
||||||
String leading_bases = new String(Arrays.copyOfRange(context_bases, 0, 200));
|
|
||||||
String trailing_bases = new String(Arrays.copyOfRange(context_bases, 201, 401));
|
|
||||||
|
|
||||||
String assay_sequence;
|
|
||||||
if ( vc.isSNP() )
|
|
||||||
assay_sequence = leading_bases + "[" + (char)ref.getBase() + "/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases;
|
|
||||||
else if ( vc.isInsertion() )
|
|
||||||
assay_sequence = leading_bases + "[-/" + vc.getAlternateAllele(0).toString() + "]" + (char)ref.getBase() + trailing_bases;
|
|
||||||
else if ( vc.isDeletion() )
|
|
||||||
assay_sequence = leading_bases + "[" + vc.getReference().getBaseString() + "/-]" + trailing_bases.substring(vc.getReference().length()-1);
|
|
||||||
else
|
|
||||||
return "";
|
|
||||||
|
|
||||||
StringBuilder assay_id = new StringBuilder();
|
// below, build single assay line for the current VC:
|
||||||
if ( project_id != null ) {
|
|
||||||
assay_id.append(project_id);
|
|
||||||
assay_id.append('|');
|
|
||||||
}
|
|
||||||
if ( useNamingConvention ) {
|
|
||||||
assay_id.append('c');
|
|
||||||
assay_id.append(context.getLocation().toString().replace(":","_p"));
|
|
||||||
} else {
|
|
||||||
assay_id.append(context.getLocation().toString().replace(':','_'));
|
|
||||||
}
|
|
||||||
if ( vc.isInsertion() ) assay_id.append("_gI");
|
|
||||||
else if ( vc.isDeletion()) assay_id.append("_gD");
|
|
||||||
|
|
||||||
if ( ! omitWindow ) {
|
String assay_sequence;
|
||||||
assay_id.append("_");
|
if ( vc.isSNP() )
|
||||||
assay_id.append(ref.getWindow().toString().replace(':', '_'));
|
assay_sequence = leading_bases + "[" + (char)ref.getBase() + "/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases;
|
||||||
|
else if ( vc.getType() == VariantContext.Type.MNP )
|
||||||
|
assay_sequence = leading_bases + "[" + new String(vc.getReference().getBases()) + "/" + new String(vc.getAlternateAllele(0).getBases())+"]"+trailing_bases.substring(vc.getReference().length()-1);
|
||||||
|
else if ( vc.isInsertion() )
|
||||||
|
assay_sequence = leading_bases + "[-/" + vc.getAlternateAllele(0).toString() + "]" + (char)ref.getBase() + trailing_bases;
|
||||||
|
else if ( vc.isDeletion() )
|
||||||
|
assay_sequence = leading_bases + "[" + new String(vc.getReference().getBases()) + "/-]" + trailing_bases.substring(vc.getReference().length()-1);
|
||||||
|
else
|
||||||
|
continue;
|
||||||
|
|
||||||
|
StringBuilder assay_id = new StringBuilder();
|
||||||
|
if ( project_id != null ) {
|
||||||
|
assay_id.append(project_id);
|
||||||
|
assay_id.append('|');
|
||||||
|
}
|
||||||
|
if ( useNamingConvention ) {
|
||||||
|
assay_id.append('c');
|
||||||
|
assay_id.append(context.getLocation().toString().replace(":","_p"));
|
||||||
|
} else {
|
||||||
|
assay_id.append(context.getLocation().toString().replace(':','_'));
|
||||||
|
}
|
||||||
|
if ( vc.isInsertion() ) assay_id.append("_gI");
|
||||||
|
else if ( vc.isDeletion()) assay_id.append("_gD");
|
||||||
|
|
||||||
|
if ( ! omitWindow ) {
|
||||||
|
assay_id.append("_");
|
||||||
|
assay_id.append(ref.getWindow().toString().replace(':', '_'));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( addCounter ) assay_id.append("_"+(++cnt));
|
||||||
|
|
||||||
|
assaysForLocus.append(assay_id);
|
||||||
|
assaysForLocus.append('\t');
|
||||||
|
assaysForLocus.append(assay_sequence);
|
||||||
|
assaysForLocus.append('\n');
|
||||||
}
|
}
|
||||||
|
return assaysForLocus.toString();
|
||||||
return assay_id.toString() + "\t" + assay_sequence + "\n";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String reduceInit() {
|
public String reduceInit() {
|
||||||
|
|
@ -201,9 +246,9 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( vc.isInsertion() ) {
|
if ( vc.isInsertion() ) {
|
||||||
return window-1;
|
return window-1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int max = 0;
|
int max = 0;
|
||||||
for (Allele a : vc.getAlleles() ) {
|
for (Allele a : vc.getAlleles() ) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue