Merge both versions of the Sequenom assay design maker: use Jared's base code and add in indels. [Jared, this still emits the same output for SNPs as your original version)
Remove all sequenom stuff from the FastaAlternateReferenceMaker so it can just concentrate on making alternate references... git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1831 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
49af5269e5
commit
0c95d6906f
|
|
@ -70,7 +70,7 @@ public class SequenomROD extends TabularROD implements Variation {
|
|||
*/
|
||||
@Override
|
||||
public char getAlternativeBaseForSNP() {
|
||||
return 'N';
|
||||
return getAltBasesFWD().charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -80,7 +80,7 @@ public class SequenomROD extends TabularROD implements Variation {
|
|||
*/
|
||||
@Override
|
||||
public char getReferenceForSNP() {
|
||||
return getAltBasesFWD().charAt(0);
|
||||
return 'N';
|
||||
}
|
||||
|
||||
public boolean isBiallelic() { return true; }
|
||||
|
|
@ -127,4 +127,4 @@ public class SequenomROD extends TabularROD implements Variation {
|
|||
sb.append(getLocation().getContig() + "\t" + getLocation().getStart() + "\t" + getFWDAlleles().get(0));
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,8 +16,6 @@ import java.util.Iterator;
|
|||
@Requires(value={DataSource.REFERENCE})
|
||||
public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
|
||||
|
||||
@Argument(fullName="outputSequenomFormat", shortName="sequenom", doc="output results in sequenom format (overrides 'maskSNPs' argument)", required=false)
|
||||
private Boolean SEQUENOM = false;
|
||||
@Argument(fullName="outputIndelPositions", shortName="indelPositions", doc="output the positions of the indels in the new reference", required=false)
|
||||
String indelsFile = null;
|
||||
|
||||
|
|
@ -41,7 +39,7 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
|
|||
|
||||
if (deletionBasesRemaining > 0) {
|
||||
deletionBasesRemaining--;
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), (SEQUENOM ? (deletionBasesRemaining == 0 ? refBase.concat("/-]") : refBase) : ""));
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), "");
|
||||
}
|
||||
|
||||
Iterator<ReferenceOrderedDatum> rods = rodData.getAllRods().iterator();
|
||||
|
|
@ -57,16 +55,16 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
|
|||
if (indelsWriter != null)
|
||||
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length()));
|
||||
// delete the next n bases, not this one
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), (SEQUENOM ? refBase.concat("[") : refBase));
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
||||
} else if (!rod.getName().startsWith("snpmask") && variant.isInsertion()) {
|
||||
basesSeen++;
|
||||
if (indelsWriter != null)
|
||||
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length()));
|
||||
basesSeen += variant.getAlternateBases().length();
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), (SEQUENOM ? refBase.concat("[-/" + variant.getAlternateBases() + "]") : refBase.concat(variant.getAlternateBases())));
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(variant.getAlternateBases()));
|
||||
} else if (variant.isSNP()) {
|
||||
basesSeen++;
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), (rod.getName().startsWith("snpmask") ? "N" : (SEQUENOM ? "[" + refBase + "/" + variant.getAlternativeBaseForSNP() + "]" : String.valueOf(variant.getAlternativeBaseForSNP()))));
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), (rod.getName().startsWith("snpmask") ? "N" : String.valueOf(variant.getAlternativeBaseForSNP())));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,93 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.fasta;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
@WalkerName("PickSequenomProbes")
|
||||
@Requires(value={DataSource.REFERENCE})
|
||||
@Reference(window=@Window(start=-200,stop=200))
|
||||
public class PickSequenomProbes extends RefWalker<String, String> {
|
||||
@Argument(required=false, shortName="snp_mask", doc="positions to be masked with N's")
|
||||
protected String SNP_MASK = null;
|
||||
|
||||
private Object[] mask_array = null;
|
||||
|
||||
public void initialize() {
|
||||
if ( SNP_MASK != null ) {
|
||||
logger.info("Loading SNP mask... ");
|
||||
mask_array = GenomeLocParser.parseIntervals(Arrays.asList(SNP_MASK)).toArray();
|
||||
}
|
||||
}
|
||||
|
||||
private boolean in_mask(GenomeLoc loc) {
|
||||
return mask_array == null ? false : (Arrays.binarySearch(mask_array, loc) >= 0);
|
||||
}
|
||||
|
||||
public String map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
logger.debug("Probing " + ref.getLocus() + " " + ref.getWindow());
|
||||
|
||||
String refBase = String.valueOf(ref.getBase());
|
||||
|
||||
Iterator<ReferenceOrderedDatum> rods = rodData.getAllRods().iterator();
|
||||
Variation variant = null;
|
||||
while (rods.hasNext()) {
|
||||
ReferenceOrderedDatum rod = rods.next();
|
||||
|
||||
// if we have multiple variants at a locus, just take the first one we see
|
||||
if ( rod instanceof Variation ) {
|
||||
variant = (Variation)rod;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( variant == null )
|
||||
return "";
|
||||
|
||||
String contig = context.getLocation().getContig();
|
||||
long offset = context.getLocation().getStart();
|
||||
|
||||
char[] context_bases = ref.getBases();
|
||||
long true_offset = offset - 200;
|
||||
for (long i = 0; i < 401; i++) {
|
||||
GenomeLoc loc = GenomeLocParser.parseGenomeLoc(contig + ":" + true_offset + "-" + true_offset);
|
||||
if ( in_mask(loc) )
|
||||
context_bases[(int)i] = 'N';
|
||||
true_offset += 1;
|
||||
}
|
||||
String leading_bases = new String(Arrays.copyOfRange(context_bases, 0, 200));
|
||||
String trailing_bases = new String(Arrays.copyOfRange(context_bases, 201, 401));
|
||||
|
||||
String assay_sequence;
|
||||
if ( variant.isSNP() )
|
||||
assay_sequence = leading_bases + "[" + refBase + "/" + variant.getAlternativeBaseForSNP() + "]" + trailing_bases;
|
||||
else if ( variant.isInsertion() )
|
||||
assay_sequence = leading_bases + refBase + "[-/" + variant.getAlternateBases() + "]" + trailing_bases;
|
||||
else if ( variant.isDeletion() )
|
||||
assay_sequence = leading_bases + refBase + "[" + variant.getAlternateBases() + "/-]" + trailing_bases.substring(variant.getAlternateBases().length());
|
||||
else
|
||||
return "";
|
||||
|
||||
String assay_id = new String(context.getLocation().toString() + "_" + ref.getWindow().toString()).replace(':', '_');
|
||||
return assay_id + "\t" + assay_sequence + "\n";
|
||||
}
|
||||
|
||||
public String reduceInit() {
|
||||
return "";
|
||||
}
|
||||
|
||||
public String reduce(String data, String sum) {
|
||||
out.print(data);
|
||||
return "";
|
||||
}
|
||||
|
||||
public void onTraversalDone(String sum) {}
|
||||
}
|
||||
|
|
@ -1,108 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
@WalkerName("PickSequenomProbes")
|
||||
@Requires(value={DataSource.REFERENCE})
|
||||
@Reference(window=@Window(start=-200,stop=200))
|
||||
public class PickSequenomProbes extends RefWalker<String, String>
|
||||
{
|
||||
@Argument(required=false, shortName="snp_mask", doc="positions to be masked with N's") public String SNP_MASK = null;
|
||||
|
||||
ArrayList<GenomeLoc> mask = null;
|
||||
Object[] mask_array = null;
|
||||
public void initialize()
|
||||
{
|
||||
System.out.printf("Loading SNP mask... ");
|
||||
if (SNP_MASK != null)
|
||||
{
|
||||
mask = new ArrayList<GenomeLoc>();
|
||||
mask.addAll(GenomeLocParser.intervalFileToList(SNP_MASK));
|
||||
|
||||
Object[] temp_array = mask.toArray();
|
||||
mask_array = new GenomeLoc[temp_array.length];
|
||||
for (int i = 0; i < temp_array.length; i++) { mask_array[i] = (GenomeLoc)temp_array[i]; }
|
||||
Arrays.sort(mask_array);
|
||||
}
|
||||
System.out.printf("Done.\n");
|
||||
}
|
||||
|
||||
private boolean in_mask(GenomeLoc loc)
|
||||
{
|
||||
return (Arrays.binarySearch(mask_array, loc) >= 0);
|
||||
}
|
||||
|
||||
public String map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context)
|
||||
{
|
||||
String refBase = String.valueOf(ref.getBase());
|
||||
|
||||
System.out.printf("Probing " + ref.getLocus() + " " + ref.getWindow() + "\n");
|
||||
|
||||
Iterator<ReferenceOrderedDatum> rods = rodData.getAllRods().iterator();
|
||||
|
||||
Variation snp = null;
|
||||
while (rods.hasNext())
|
||||
{
|
||||
ReferenceOrderedDatum rod = rods.next();
|
||||
if (!(rod instanceof Variation))
|
||||
continue;
|
||||
|
||||
Variation variant = (Variation) rod;
|
||||
if (variant.isSNP())
|
||||
{
|
||||
snp = variant;
|
||||
}
|
||||
}
|
||||
|
||||
String contig = context.getLocation().getContig();
|
||||
long offset = context.getLocation().getStart();
|
||||
|
||||
char[] context_bases = ref.getBases();
|
||||
long true_offset = offset - 200;
|
||||
for (long i = 0; i < 401; i++)
|
||||
{
|
||||
GenomeLoc loc = GenomeLocParser.parseGenomeLoc(context.getLocation().getContig() + ":" + true_offset + "-" + true_offset);
|
||||
if (in_mask(loc)) { context_bases[(int)i] = 'N'; }
|
||||
true_offset += 1;
|
||||
}
|
||||
char[] leading_bases = Arrays.copyOfRange(context_bases, 0, 200);
|
||||
char[] trailing_bases = Arrays.copyOfRange(context_bases, 201, 401);
|
||||
|
||||
if (snp != null)
|
||||
{
|
||||
String assay_sequence = new String(leading_bases) + new String("[" + refBase + "/" + snp.getAlternativeBaseForSNP() + "]") + new String(trailing_bases);
|
||||
String assay_id = new String(context.getLocation().toString() + "_" + ref.getWindow().toString()).replace(':', '_');
|
||||
return assay_id + "\t" + assay_sequence + "\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
public String reduceInit()
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
public String reduce(String data, String sum)
|
||||
{
|
||||
out.print(data);
|
||||
return "";
|
||||
}
|
||||
|
||||
public void onTraversalDone(String sum)
|
||||
{
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue