diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SequenomROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/SequenomROD.java index 953f2f11b..b838920c1 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SequenomROD.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SequenomROD.java @@ -70,7 +70,7 @@ public class SequenomROD extends TabularROD implements Variation { */ @Override public char getAlternativeBaseForSNP() { - return 'N'; + return getAltBasesFWD().charAt(0); } /** @@ -80,7 +80,7 @@ public class SequenomROD extends TabularROD implements Variation { */ @Override public char getReferenceForSNP() { - return getAltBasesFWD().charAt(0); + return 'N'; } public boolean isBiallelic() { return true; } @@ -127,4 +127,4 @@ public class SequenomROD extends TabularROD implements Variation { sb.append(getLocation().getContig() + "\t" + getLocation().getStart() + "\t" + getFWDAlleles().get(0)); return sb.toString(); } - } \ No newline at end of file + } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index 84e4a751a..820553e7c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -16,8 +16,6 @@ import java.util.Iterator; @Requires(value={DataSource.REFERENCE}) public class FastaAlternateReferenceWalker extends FastaReferenceWalker { - @Argument(fullName="outputSequenomFormat", shortName="sequenom", doc="output results in sequenom format (overrides 'maskSNPs' argument)", required=false) - private Boolean SEQUENOM = false; @Argument(fullName="outputIndelPositions", shortName="indelPositions", doc="output the positions of the indels in the new reference", required=false) String indelsFile = null; @@ -41,7 +39,7 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker { if (deletionBasesRemaining > 0) { deletionBasesRemaining--; - return new Pair(context.getLocation(), (SEQUENOM ? (deletionBasesRemaining == 0 ? refBase.concat("/-]") : refBase) : "")); + return new Pair(context.getLocation(), ""); } Iterator rods = rodData.getAllRods().iterator(); @@ -57,16 +55,16 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker { if (indelsWriter != null) indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length())); // delete the next n bases, not this one - return new Pair(context.getLocation(), (SEQUENOM ? refBase.concat("[") : refBase)); + return new Pair(context.getLocation(), refBase); } else if (!rod.getName().startsWith("snpmask") && variant.isInsertion()) { basesSeen++; if (indelsWriter != null) indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length())); basesSeen += variant.getAlternateBases().length(); - return new Pair(context.getLocation(), (SEQUENOM ? refBase.concat("[-/" + variant.getAlternateBases() + "]") : refBase.concat(variant.getAlternateBases()))); + return new Pair(context.getLocation(), refBase.concat(variant.getAlternateBases())); } else if (variant.isSNP()) { basesSeen++; - return new Pair(context.getLocation(), (rod.getName().startsWith("snpmask") ? "N" : (SEQUENOM ? "[" + refBase + "/" + variant.getAlternativeBaseForSNP() + "]" : String.valueOf(variant.getAlternativeBaseForSNP())))); + return new Pair(context.getLocation(), (rod.getName().startsWith("snpmask") ? "N" : String.valueOf(variant.getAlternativeBaseForSNP()))); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/fasta/PickSequenomProbes.java b/java/src/org/broadinstitute/sting/gatk/walkers/fasta/PickSequenomProbes.java new file mode 100755 index 000000000..81c370d0a --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/fasta/PickSequenomProbes.java @@ -0,0 +1,93 @@ +package org.broadinstitute.sting.gatk.walkers.fasta; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.genotype.Variation; + +import java.util.*; + + +@WalkerName("PickSequenomProbes") +@Requires(value={DataSource.REFERENCE}) +@Reference(window=@Window(start=-200,stop=200)) +public class PickSequenomProbes extends RefWalker { + @Argument(required=false, shortName="snp_mask", doc="positions to be masked with N's") + protected String SNP_MASK = null; + + private Object[] mask_array = null; + + public void initialize() { + if ( SNP_MASK != null ) { + logger.info("Loading SNP mask... "); + mask_array = GenomeLocParser.parseIntervals(Arrays.asList(SNP_MASK)).toArray(); + } + } + + private boolean in_mask(GenomeLoc loc) { + return mask_array == null ? false : (Arrays.binarySearch(mask_array, loc) >= 0); + } + + public String map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) { + + logger.debug("Probing " + ref.getLocus() + " " + ref.getWindow()); + + String refBase = String.valueOf(ref.getBase()); + + Iterator rods = rodData.getAllRods().iterator(); + Variation variant = null; + while (rods.hasNext()) { + ReferenceOrderedDatum rod = rods.next(); + + // if we have multiple variants at a locus, just take the first one we see + if ( rod instanceof Variation ) { + variant = (Variation)rod; + break; + } + } + + if ( variant == null ) + return ""; + + String contig = context.getLocation().getContig(); + long offset = context.getLocation().getStart(); + + char[] context_bases = ref.getBases(); + long true_offset = offset - 200; + for (long i = 0; i < 401; i++) { + GenomeLoc loc = GenomeLocParser.parseGenomeLoc(contig + ":" + true_offset + "-" + true_offset); + if ( in_mask(loc) ) + context_bases[(int)i] = 'N'; + true_offset += 1; + } + String leading_bases = new String(Arrays.copyOfRange(context_bases, 0, 200)); + String trailing_bases = new String(Arrays.copyOfRange(context_bases, 201, 401)); + + String assay_sequence; + if ( variant.isSNP() ) + assay_sequence = leading_bases + "[" + refBase + "/" + variant.getAlternativeBaseForSNP() + "]" + trailing_bases; + else if ( variant.isInsertion() ) + assay_sequence = leading_bases + refBase + "[-/" + variant.getAlternateBases() + "]" + trailing_bases; + else if ( variant.isDeletion() ) + assay_sequence = leading_bases + refBase + "[" + variant.getAlternateBases() + "/-]" + trailing_bases.substring(variant.getAlternateBases().length()); + else + return ""; + + String assay_id = new String(context.getLocation().toString() + "_" + ref.getWindow().toString()).replace(':', '_'); + return assay_id + "\t" + assay_sequence + "\n"; + } + + public String reduceInit() { + return ""; + } + + public String reduce(String data, String sum) { + out.print(data); + return ""; + } + + public void onTraversalDone(String sum) {} +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/PickSequenomProbes.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/PickSequenomProbes.java deleted file mode 100755 index c2e95a9b6..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/PickSequenomProbes.java +++ /dev/null @@ -1,108 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.io.*; -import java.util.*; - - -@WalkerName("PickSequenomProbes") -@Requires(value={DataSource.REFERENCE}) -@Reference(window=@Window(start=-200,stop=200)) -public class PickSequenomProbes extends RefWalker -{ - @Argument(required=false, shortName="snp_mask", doc="positions to be masked with N's") public String SNP_MASK = null; - - ArrayList mask = null; - Object[] mask_array = null; - public void initialize() - { - System.out.printf("Loading SNP mask... "); - if (SNP_MASK != null) - { - mask = new ArrayList(); - mask.addAll(GenomeLocParser.intervalFileToList(SNP_MASK)); - - Object[] temp_array = mask.toArray(); - mask_array = new GenomeLoc[temp_array.length]; - for (int i = 0; i < temp_array.length; i++) { mask_array[i] = (GenomeLoc)temp_array[i]; } - Arrays.sort(mask_array); - } - System.out.printf("Done.\n"); - } - - private boolean in_mask(GenomeLoc loc) - { - return (Arrays.binarySearch(mask_array, loc) >= 0); - } - - public String map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) - { - String refBase = String.valueOf(ref.getBase()); - - System.out.printf("Probing " + ref.getLocus() + " " + ref.getWindow() + "\n"); - - Iterator rods = rodData.getAllRods().iterator(); - - Variation snp = null; - while (rods.hasNext()) - { - ReferenceOrderedDatum rod = rods.next(); - if (!(rod instanceof Variation)) - continue; - - Variation variant = (Variation) rod; - if (variant.isSNP()) - { - snp = variant; - } - } - - String contig = context.getLocation().getContig(); - long offset = context.getLocation().getStart(); - - char[] context_bases = ref.getBases(); - long true_offset = offset - 200; - for (long i = 0; i < 401; i++) - { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc(context.getLocation().getContig() + ":" + true_offset + "-" + true_offset); - if (in_mask(loc)) { context_bases[(int)i] = 'N'; } - true_offset += 1; - } - char[] leading_bases = Arrays.copyOfRange(context_bases, 0, 200); - char[] trailing_bases = Arrays.copyOfRange(context_bases, 201, 401); - - if (snp != null) - { - String assay_sequence = new String(leading_bases) + new String("[" + refBase + "/" + snp.getAlternativeBaseForSNP() + "]") + new String(trailing_bases); - String assay_id = new String(context.getLocation().toString() + "_" + ref.getWindow().toString()).replace(':', '_'); - return assay_id + "\t" + assay_sequence + "\n"; - } - else - { - return ""; - } - } - - public String reduceInit() - { - return ""; - } - - public String reduce(String data, String sum) - { - out.print(data); - return ""; - } - - public void onTraversalDone(String sum) - { - } - -}