Merge both versions of the Sequenom assay design maker: use Jared's base code and add in indels. [Jared, this still emits the same output for SNPs as your original version)
Remove all sequenom stuff from the FastaAlternateReferenceMaker so it can just concentrate on making alternate references... git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1831 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
49af5269e5
commit
0c95d6906f
|
|
@ -70,7 +70,7 @@ public class SequenomROD extends TabularROD implements Variation {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public char getAlternativeBaseForSNP() {
|
public char getAlternativeBaseForSNP() {
|
||||||
return 'N';
|
return getAltBasesFWD().charAt(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -80,7 +80,7 @@ public class SequenomROD extends TabularROD implements Variation {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public char getReferenceForSNP() {
|
public char getReferenceForSNP() {
|
||||||
return getAltBasesFWD().charAt(0);
|
return 'N';
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isBiallelic() { return true; }
|
public boolean isBiallelic() { return true; }
|
||||||
|
|
@ -127,4 +127,4 @@ public class SequenomROD extends TabularROD implements Variation {
|
||||||
sb.append(getLocation().getContig() + "\t" + getLocation().getStart() + "\t" + getFWDAlleles().get(0));
|
sb.append(getLocation().getContig() + "\t" + getLocation().getStart() + "\t" + getFWDAlleles().get(0));
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,8 +16,6 @@ import java.util.Iterator;
|
||||||
@Requires(value={DataSource.REFERENCE})
|
@Requires(value={DataSource.REFERENCE})
|
||||||
public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
|
public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
|
||||||
|
|
||||||
@Argument(fullName="outputSequenomFormat", shortName="sequenom", doc="output results in sequenom format (overrides 'maskSNPs' argument)", required=false)
|
|
||||||
private Boolean SEQUENOM = false;
|
|
||||||
@Argument(fullName="outputIndelPositions", shortName="indelPositions", doc="output the positions of the indels in the new reference", required=false)
|
@Argument(fullName="outputIndelPositions", shortName="indelPositions", doc="output the positions of the indels in the new reference", required=false)
|
||||||
String indelsFile = null;
|
String indelsFile = null;
|
||||||
|
|
||||||
|
|
@ -41,7 +39,7 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
|
||||||
|
|
||||||
if (deletionBasesRemaining > 0) {
|
if (deletionBasesRemaining > 0) {
|
||||||
deletionBasesRemaining--;
|
deletionBasesRemaining--;
|
||||||
return new Pair<GenomeLoc, String>(context.getLocation(), (SEQUENOM ? (deletionBasesRemaining == 0 ? refBase.concat("/-]") : refBase) : ""));
|
return new Pair<GenomeLoc, String>(context.getLocation(), "");
|
||||||
}
|
}
|
||||||
|
|
||||||
Iterator<ReferenceOrderedDatum> rods = rodData.getAllRods().iterator();
|
Iterator<ReferenceOrderedDatum> rods = rodData.getAllRods().iterator();
|
||||||
|
|
@ -57,16 +55,16 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
|
||||||
if (indelsWriter != null)
|
if (indelsWriter != null)
|
||||||
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length()));
|
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length()));
|
||||||
// delete the next n bases, not this one
|
// delete the next n bases, not this one
|
||||||
return new Pair<GenomeLoc, String>(context.getLocation(), (SEQUENOM ? refBase.concat("[") : refBase));
|
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
||||||
} else if (!rod.getName().startsWith("snpmask") && variant.isInsertion()) {
|
} else if (!rod.getName().startsWith("snpmask") && variant.isInsertion()) {
|
||||||
basesSeen++;
|
basesSeen++;
|
||||||
if (indelsWriter != null)
|
if (indelsWriter != null)
|
||||||
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length()));
|
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length()));
|
||||||
basesSeen += variant.getAlternateBases().length();
|
basesSeen += variant.getAlternateBases().length();
|
||||||
return new Pair<GenomeLoc, String>(context.getLocation(), (SEQUENOM ? refBase.concat("[-/" + variant.getAlternateBases() + "]") : refBase.concat(variant.getAlternateBases())));
|
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(variant.getAlternateBases()));
|
||||||
} else if (variant.isSNP()) {
|
} else if (variant.isSNP()) {
|
||||||
basesSeen++;
|
basesSeen++;
|
||||||
return new Pair<GenomeLoc, String>(context.getLocation(), (rod.getName().startsWith("snpmask") ? "N" : (SEQUENOM ? "[" + refBase + "/" + variant.getAlternativeBaseForSNP() + "]" : String.valueOf(variant.getAlternativeBaseForSNP()))));
|
return new Pair<GenomeLoc, String>(context.getLocation(), (rod.getName().startsWith("snpmask") ? "N" : String.valueOf(variant.getAlternativeBaseForSNP())));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,93 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.fasta;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
|
@WalkerName("PickSequenomProbes")
|
||||||
|
@Requires(value={DataSource.REFERENCE})
|
||||||
|
@Reference(window=@Window(start=-200,stop=200))
|
||||||
|
public class PickSequenomProbes extends RefWalker<String, String> {
|
||||||
|
@Argument(required=false, shortName="snp_mask", doc="positions to be masked with N's")
|
||||||
|
protected String SNP_MASK = null;
|
||||||
|
|
||||||
|
private Object[] mask_array = null;
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
if ( SNP_MASK != null ) {
|
||||||
|
logger.info("Loading SNP mask... ");
|
||||||
|
mask_array = GenomeLocParser.parseIntervals(Arrays.asList(SNP_MASK)).toArray();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean in_mask(GenomeLoc loc) {
|
||||||
|
return mask_array == null ? false : (Arrays.binarySearch(mask_array, loc) >= 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
||||||
|
logger.debug("Probing " + ref.getLocus() + " " + ref.getWindow());
|
||||||
|
|
||||||
|
String refBase = String.valueOf(ref.getBase());
|
||||||
|
|
||||||
|
Iterator<ReferenceOrderedDatum> rods = rodData.getAllRods().iterator();
|
||||||
|
Variation variant = null;
|
||||||
|
while (rods.hasNext()) {
|
||||||
|
ReferenceOrderedDatum rod = rods.next();
|
||||||
|
|
||||||
|
// if we have multiple variants at a locus, just take the first one we see
|
||||||
|
if ( rod instanceof Variation ) {
|
||||||
|
variant = (Variation)rod;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( variant == null )
|
||||||
|
return "";
|
||||||
|
|
||||||
|
String contig = context.getLocation().getContig();
|
||||||
|
long offset = context.getLocation().getStart();
|
||||||
|
|
||||||
|
char[] context_bases = ref.getBases();
|
||||||
|
long true_offset = offset - 200;
|
||||||
|
for (long i = 0; i < 401; i++) {
|
||||||
|
GenomeLoc loc = GenomeLocParser.parseGenomeLoc(contig + ":" + true_offset + "-" + true_offset);
|
||||||
|
if ( in_mask(loc) )
|
||||||
|
context_bases[(int)i] = 'N';
|
||||||
|
true_offset += 1;
|
||||||
|
}
|
||||||
|
String leading_bases = new String(Arrays.copyOfRange(context_bases, 0, 200));
|
||||||
|
String trailing_bases = new String(Arrays.copyOfRange(context_bases, 201, 401));
|
||||||
|
|
||||||
|
String assay_sequence;
|
||||||
|
if ( variant.isSNP() )
|
||||||
|
assay_sequence = leading_bases + "[" + refBase + "/" + variant.getAlternativeBaseForSNP() + "]" + trailing_bases;
|
||||||
|
else if ( variant.isInsertion() )
|
||||||
|
assay_sequence = leading_bases + refBase + "[-/" + variant.getAlternateBases() + "]" + trailing_bases;
|
||||||
|
else if ( variant.isDeletion() )
|
||||||
|
assay_sequence = leading_bases + refBase + "[" + variant.getAlternateBases() + "/-]" + trailing_bases.substring(variant.getAlternateBases().length());
|
||||||
|
else
|
||||||
|
return "";
|
||||||
|
|
||||||
|
String assay_id = new String(context.getLocation().toString() + "_" + ref.getWindow().toString()).replace(':', '_');
|
||||||
|
return assay_id + "\t" + assay_sequence + "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String reduceInit() {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String reduce(String data, String sum) {
|
||||||
|
out.print(data);
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone(String sum) {}
|
||||||
|
}
|
||||||
|
|
@ -1,108 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
|
|
||||||
@WalkerName("PickSequenomProbes")
|
|
||||||
@Requires(value={DataSource.REFERENCE})
|
|
||||||
@Reference(window=@Window(start=-200,stop=200))
|
|
||||||
public class PickSequenomProbes extends RefWalker<String, String>
|
|
||||||
{
|
|
||||||
@Argument(required=false, shortName="snp_mask", doc="positions to be masked with N's") public String SNP_MASK = null;
|
|
||||||
|
|
||||||
ArrayList<GenomeLoc> mask = null;
|
|
||||||
Object[] mask_array = null;
|
|
||||||
public void initialize()
|
|
||||||
{
|
|
||||||
System.out.printf("Loading SNP mask... ");
|
|
||||||
if (SNP_MASK != null)
|
|
||||||
{
|
|
||||||
mask = new ArrayList<GenomeLoc>();
|
|
||||||
mask.addAll(GenomeLocParser.intervalFileToList(SNP_MASK));
|
|
||||||
|
|
||||||
Object[] temp_array = mask.toArray();
|
|
||||||
mask_array = new GenomeLoc[temp_array.length];
|
|
||||||
for (int i = 0; i < temp_array.length; i++) { mask_array[i] = (GenomeLoc)temp_array[i]; }
|
|
||||||
Arrays.sort(mask_array);
|
|
||||||
}
|
|
||||||
System.out.printf("Done.\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean in_mask(GenomeLoc loc)
|
|
||||||
{
|
|
||||||
return (Arrays.binarySearch(mask_array, loc) >= 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context)
|
|
||||||
{
|
|
||||||
String refBase = String.valueOf(ref.getBase());
|
|
||||||
|
|
||||||
System.out.printf("Probing " + ref.getLocus() + " " + ref.getWindow() + "\n");
|
|
||||||
|
|
||||||
Iterator<ReferenceOrderedDatum> rods = rodData.getAllRods().iterator();
|
|
||||||
|
|
||||||
Variation snp = null;
|
|
||||||
while (rods.hasNext())
|
|
||||||
{
|
|
||||||
ReferenceOrderedDatum rod = rods.next();
|
|
||||||
if (!(rod instanceof Variation))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Variation variant = (Variation) rod;
|
|
||||||
if (variant.isSNP())
|
|
||||||
{
|
|
||||||
snp = variant;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
String contig = context.getLocation().getContig();
|
|
||||||
long offset = context.getLocation().getStart();
|
|
||||||
|
|
||||||
char[] context_bases = ref.getBases();
|
|
||||||
long true_offset = offset - 200;
|
|
||||||
for (long i = 0; i < 401; i++)
|
|
||||||
{
|
|
||||||
GenomeLoc loc = GenomeLocParser.parseGenomeLoc(context.getLocation().getContig() + ":" + true_offset + "-" + true_offset);
|
|
||||||
if (in_mask(loc)) { context_bases[(int)i] = 'N'; }
|
|
||||||
true_offset += 1;
|
|
||||||
}
|
|
||||||
char[] leading_bases = Arrays.copyOfRange(context_bases, 0, 200);
|
|
||||||
char[] trailing_bases = Arrays.copyOfRange(context_bases, 201, 401);
|
|
||||||
|
|
||||||
if (snp != null)
|
|
||||||
{
|
|
||||||
String assay_sequence = new String(leading_bases) + new String("[" + refBase + "/" + snp.getAlternativeBaseForSNP() + "]") + new String(trailing_bases);
|
|
||||||
String assay_id = new String(context.getLocation().toString() + "_" + ref.getWindow().toString()).replace(':', '_');
|
|
||||||
return assay_id + "\t" + assay_sequence + "\n";
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String reduceInit()
|
|
||||||
{
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String reduce(String data, String sum)
|
|
||||||
{
|
|
||||||
out.print(data);
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public void onTraversalDone(String sum)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue