Merge both versions of the Sequenom assay design maker: use Jared's base code and add in indels. [Jared, this still emits the same output for SNPs as your original version)

Remove all sequenom stuff from the FastaAlternateReferenceMaker so it can just concentrate on making alternate references...


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1831 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-10-14 17:11:45 +00:00
parent 49af5269e5
commit 0c95d6906f
4 changed files with 100 additions and 117 deletions

View File

@ -70,7 +70,7 @@ public class SequenomROD extends TabularROD implements Variation {
*/
@Override
public char getAlternativeBaseForSNP() {
return 'N';
return getAltBasesFWD().charAt(0);
}
/**
@ -80,7 +80,7 @@ public class SequenomROD extends TabularROD implements Variation {
*/
@Override
public char getReferenceForSNP() {
return getAltBasesFWD().charAt(0);
return 'N';
}
public boolean isBiallelic() { return true; }
@ -127,4 +127,4 @@ public class SequenomROD extends TabularROD implements Variation {
sb.append(getLocation().getContig() + "\t" + getLocation().getStart() + "\t" + getFWDAlleles().get(0));
return sb.toString();
}
}
}

View File

@ -16,8 +16,6 @@ import java.util.Iterator;
@Requires(value={DataSource.REFERENCE})
public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
@Argument(fullName="outputSequenomFormat", shortName="sequenom", doc="output results in sequenom format (overrides 'maskSNPs' argument)", required=false)
private Boolean SEQUENOM = false;
@Argument(fullName="outputIndelPositions", shortName="indelPositions", doc="output the positions of the indels in the new reference", required=false)
String indelsFile = null;
@ -41,7 +39,7 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
if (deletionBasesRemaining > 0) {
deletionBasesRemaining--;
return new Pair<GenomeLoc, String>(context.getLocation(), (SEQUENOM ? (deletionBasesRemaining == 0 ? refBase.concat("/-]") : refBase) : ""));
return new Pair<GenomeLoc, String>(context.getLocation(), "");
}
Iterator<ReferenceOrderedDatum> rods = rodData.getAllRods().iterator();
@ -57,16 +55,16 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
if (indelsWriter != null)
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length()));
// delete the next n bases, not this one
return new Pair<GenomeLoc, String>(context.getLocation(), (SEQUENOM ? refBase.concat("[") : refBase));
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
} else if (!rod.getName().startsWith("snpmask") && variant.isInsertion()) {
basesSeen++;
if (indelsWriter != null)
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length()));
basesSeen += variant.getAlternateBases().length();
return new Pair<GenomeLoc, String>(context.getLocation(), (SEQUENOM ? refBase.concat("[-/" + variant.getAlternateBases() + "]") : refBase.concat(variant.getAlternateBases())));
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(variant.getAlternateBases()));
} else if (variant.isSNP()) {
basesSeen++;
return new Pair<GenomeLoc, String>(context.getLocation(), (rod.getName().startsWith("snpmask") ? "N" : (SEQUENOM ? "[" + refBase + "/" + variant.getAlternativeBaseForSNP() + "]" : String.valueOf(variant.getAlternativeBaseForSNP()))));
return new Pair<GenomeLoc, String>(context.getLocation(), (rod.getName().startsWith("snpmask") ? "N" : String.valueOf(variant.getAlternativeBaseForSNP())));
}
}

View File

@ -0,0 +1,93 @@
package org.broadinstitute.sting.gatk.walkers.fasta;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.*;
@WalkerName("PickSequenomProbes")
@Requires(value={DataSource.REFERENCE})
@Reference(window=@Window(start=-200,stop=200))
public class PickSequenomProbes extends RefWalker<String, String> {
@Argument(required=false, shortName="snp_mask", doc="positions to be masked with N's")
protected String SNP_MASK = null;
private Object[] mask_array = null;
public void initialize() {
if ( SNP_MASK != null ) {
logger.info("Loading SNP mask... ");
mask_array = GenomeLocParser.parseIntervals(Arrays.asList(SNP_MASK)).toArray();
}
}
private boolean in_mask(GenomeLoc loc) {
return mask_array == null ? false : (Arrays.binarySearch(mask_array, loc) >= 0);
}
public String map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {
logger.debug("Probing " + ref.getLocus() + " " + ref.getWindow());
String refBase = String.valueOf(ref.getBase());
Iterator<ReferenceOrderedDatum> rods = rodData.getAllRods().iterator();
Variation variant = null;
while (rods.hasNext()) {
ReferenceOrderedDatum rod = rods.next();
// if we have multiple variants at a locus, just take the first one we see
if ( rod instanceof Variation ) {
variant = (Variation)rod;
break;
}
}
if ( variant == null )
return "";
String contig = context.getLocation().getContig();
long offset = context.getLocation().getStart();
char[] context_bases = ref.getBases();
long true_offset = offset - 200;
for (long i = 0; i < 401; i++) {
GenomeLoc loc = GenomeLocParser.parseGenomeLoc(contig + ":" + true_offset + "-" + true_offset);
if ( in_mask(loc) )
context_bases[(int)i] = 'N';
true_offset += 1;
}
String leading_bases = new String(Arrays.copyOfRange(context_bases, 0, 200));
String trailing_bases = new String(Arrays.copyOfRange(context_bases, 201, 401));
String assay_sequence;
if ( variant.isSNP() )
assay_sequence = leading_bases + "[" + refBase + "/" + variant.getAlternativeBaseForSNP() + "]" + trailing_bases;
else if ( variant.isInsertion() )
assay_sequence = leading_bases + refBase + "[-/" + variant.getAlternateBases() + "]" + trailing_bases;
else if ( variant.isDeletion() )
assay_sequence = leading_bases + refBase + "[" + variant.getAlternateBases() + "/-]" + trailing_bases.substring(variant.getAlternateBases().length());
else
return "";
String assay_id = new String(context.getLocation().toString() + "_" + ref.getWindow().toString()).replace(':', '_');
return assay_id + "\t" + assay_sequence + "\n";
}
public String reduceInit() {
return "";
}
public String reduce(String data, String sum) {
out.print(data);
return "";
}
public void onTraversalDone(String sum) {}
}

View File

@ -1,108 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.io.*;
import java.util.*;
@WalkerName("PickSequenomProbes")
@Requires(value={DataSource.REFERENCE})
@Reference(window=@Window(start=-200,stop=200))
public class PickSequenomProbes extends RefWalker<String, String>
{
@Argument(required=false, shortName="snp_mask", doc="positions to be masked with N's") public String SNP_MASK = null;
ArrayList<GenomeLoc> mask = null;
Object[] mask_array = null;
public void initialize()
{
System.out.printf("Loading SNP mask... ");
if (SNP_MASK != null)
{
mask = new ArrayList<GenomeLoc>();
mask.addAll(GenomeLocParser.intervalFileToList(SNP_MASK));
Object[] temp_array = mask.toArray();
mask_array = new GenomeLoc[temp_array.length];
for (int i = 0; i < temp_array.length; i++) { mask_array[i] = (GenomeLoc)temp_array[i]; }
Arrays.sort(mask_array);
}
System.out.printf("Done.\n");
}
private boolean in_mask(GenomeLoc loc)
{
return (Arrays.binarySearch(mask_array, loc) >= 0);
}
public String map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context)
{
String refBase = String.valueOf(ref.getBase());
System.out.printf("Probing " + ref.getLocus() + " " + ref.getWindow() + "\n");
Iterator<ReferenceOrderedDatum> rods = rodData.getAllRods().iterator();
Variation snp = null;
while (rods.hasNext())
{
ReferenceOrderedDatum rod = rods.next();
if (!(rod instanceof Variation))
continue;
Variation variant = (Variation) rod;
if (variant.isSNP())
{
snp = variant;
}
}
String contig = context.getLocation().getContig();
long offset = context.getLocation().getStart();
char[] context_bases = ref.getBases();
long true_offset = offset - 200;
for (long i = 0; i < 401; i++)
{
GenomeLoc loc = GenomeLocParser.parseGenomeLoc(context.getLocation().getContig() + ":" + true_offset + "-" + true_offset);
if (in_mask(loc)) { context_bases[(int)i] = 'N'; }
true_offset += 1;
}
char[] leading_bases = Arrays.copyOfRange(context_bases, 0, 200);
char[] trailing_bases = Arrays.copyOfRange(context_bases, 201, 401);
if (snp != null)
{
String assay_sequence = new String(leading_bases) + new String("[" + refBase + "/" + snp.getAlternativeBaseForSNP() + "]") + new String(trailing_bases);
String assay_id = new String(context.getLocation().toString() + "_" + ref.getWindow().toString()).replace(':', '_');
return assay_id + "\t" + assay_sequence + "\n";
}
else
{
return "";
}
}
public String reduceInit()
{
return "";
}
public String reduce(String data, String sum)
{
out.print(data);
return "";
}
public void onTraversalDone(String sum)
{
}
}