diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index f6d1dee5d..0dd494a5a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -14,14 +14,13 @@ import java.util.Iterator; @WalkerName("FastaAlternateReferenceMaker") @Requires(value={DataSource.REFERENCE}) -public class FastaAlternateReferenceWalker extends RefWalker, Pair> { +public class FastaAlternateReferenceWalker extends FastaReferenceWalker { @Argument(fullName="maskSNPs", shortName="mask", doc="print 'N' at SNP sites instead of the alternate allele", required=false) private Boolean MASK_SNPS = false; @Argument(fullName="outputSequenomFormat", shortName="sequenom", doc="output results in sequenom format (overrides 'maskSNPs' argument)", required=false) private Boolean SEQUENOM = false; - private StringBuffer sb = new StringBuffer(); int deletionBasesRemaining = 0; public Pair map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) { @@ -54,45 +53,4 @@ public class FastaAlternateReferenceWalker extends RefWalker(context.getLocation(), refBase); } - - public Pair reduceInit() { - return new Pair(null, ""); - } - - public Pair reduce(Pair value, Pair sum) { - // if there is no interval to the left, then this is the first one - if ( sum.first == null ) { - sum.first = value.first; - sum.second = value.second; - } - // if the intervals don't overlap, print out the leftmost one and start a new one - // (end of contig or new interval) - else if ( value.first.getStart() != sum.first.getStop() + 1 ) { - printFasta(sum.first, sum.second); - sum.first = value.first; - sum.second = value.second; - } - // otherwise, merge them - else { - sum.first = GenomeLocParser.setStop(sum.first,value.first.getStop()); - sum.second = sum.second.concat(value.second); - } - return sum; - } - - public void onTraversalDone(Pair sum) { - if (sum.second != null) - printFasta(sum.first, sum.second); - } - - private void printFasta(GenomeLoc loc, String s) { - out.println(">" + loc); - int lines = s.length() / 60; - int currentStart = 0; - for (int i=0; i < lines; i++) { - out.println(s.substring(currentStart, currentStart+60)); - currentStart += 60; - } - out.println(s.substring(currentStart)); - } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaReferenceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaReferenceWalker.java index 454e89fbf..be1c40d5c 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaReferenceWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaReferenceWalker.java @@ -12,50 +12,44 @@ import org.broadinstitute.sting.utils.Pair; // create a fasta sequence file from a reference and intervals @WalkerName("FastaReferenceMaker") -public class FastaReferenceWalker extends RefWalker, Pair> { +public class FastaReferenceWalker extends RefWalker, GenomeLoc> { - public Pair map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) { - return new Pair(context.getLocation(), ref.getBase()); - } + protected FastaSequence fasta; - public Pair reduceInit() { - return new Pair(null, ""); + public void initialize() { + fasta = new FastaSequence(out); } - public Pair reduce(Pair value, Pair sum) { + public Pair map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) { + return new Pair(context.getLocation(), String.valueOf(ref.getBase())); + } + + public GenomeLoc reduceInit() { + return null; + } + + public GenomeLoc reduce(Pair value, GenomeLoc sum) { // if there is no interval to the left, then this is the first one - if ( sum.first == null ) { - sum.first = value.first; - sum.second = value.second.toString(); + if ( sum == null ) { + sum = value.first; + fasta.append(value.second.toString()); } // if the intervals don't overlap, print out the leftmost one and start a new one // (end of contig or new interval) - else if ( value.first.getStart() != sum.first.getStop() + 1 ) { - printFasta(sum.first, sum.second); - sum.first = value.first; - sum.second = value.second.toString(); + else if ( value.first.getStart() != sum.getStop() + 1 ) { + fasta.flush(); + sum = value.first; + fasta.append(value.second.toString()); } // otherwise, merge them else { - sum.first = GenomeLocParser.setStop(sum.first,value.first.getStop()); - sum.second = new String(sum.second + value.second); + sum = GenomeLocParser.setStop(sum, value.first.getStop()); + fasta.append(value.second.toString()); } return sum; } - public void onTraversalDone(Pair sum) { - if (sum.second != null) - printFasta(sum.first, sum.second); - } - - private void printFasta(GenomeLoc loc, String s) { - out.println(">" + loc); - int lines = s.length() / 60; - int currentStart = 0; - for (int i=0; i < lines; i++) { - out.println(s.substring(currentStart, currentStart+60)); - currentStart += 60; - } - out.println(s.substring(currentStart)); + public void onTraversalDone(GenomeLoc sum) { + fasta.flush(); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaSequence.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaSequence.java new file mode 100755 index 000000000..b82cbdc4c --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaSequence.java @@ -0,0 +1,50 @@ +package org.broadinstitute.sting.playground.gatk.walkers.fasta; + +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.io.PrintStream; + +// fasta sequence holder class + +public class FastaSequence { + + private PrintStream out; + private StringBuffer sb = new StringBuffer(); + private long sequenceCounter = 1; + private boolean printedHeader = false; + + public FastaSequence(PrintStream out) { + this.out = out; + } + + public void append(String s) { + sb.append(s); + printFasta(false); + } + + public void flush() { + printFasta(true); + printedHeader = false; + } + + private void printFasta(boolean printAll) { + if ( sb.length() == 0 || (!printAll && sb.length() < 60) ) + return; + if ( !printedHeader ) { + out.println(">" + sequenceCounter++); + printedHeader = true; + } + int lines = sb.length() / 60; + int currentStart = 0; + for (int i=0; i < lines; i++) { + out.println(sb.substring(currentStart, currentStart+60)); + currentStart += 60; + } + if ( printAll ) { + out.println(sb.substring(currentStart)); + sb.setLength(0); + } else { + sb.delete(0, currentStart); + } + } +} \ No newline at end of file