Refactoring the code; also, now it prints continuously instead of potentially storing one long string.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1421 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2a01e71277
commit
0ec581080c
|
|
@ -14,14 +14,13 @@ import java.util.Iterator;
|
||||||
|
|
||||||
@WalkerName("FastaAlternateReferenceMaker")
|
@WalkerName("FastaAlternateReferenceMaker")
|
||||||
@Requires(value={DataSource.REFERENCE})
|
@Requires(value={DataSource.REFERENCE})
|
||||||
public class FastaAlternateReferenceWalker extends RefWalker<Pair<GenomeLoc, String>, Pair<GenomeLoc, String>> {
|
public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
|
||||||
|
|
||||||
@Argument(fullName="maskSNPs", shortName="mask", doc="print 'N' at SNP sites instead of the alternate allele", required=false)
|
@Argument(fullName="maskSNPs", shortName="mask", doc="print 'N' at SNP sites instead of the alternate allele", required=false)
|
||||||
private Boolean MASK_SNPS = false;
|
private Boolean MASK_SNPS = false;
|
||||||
@Argument(fullName="outputSequenomFormat", shortName="sequenom", doc="output results in sequenom format (overrides 'maskSNPs' argument)", required=false)
|
@Argument(fullName="outputSequenomFormat", shortName="sequenom", doc="output results in sequenom format (overrides 'maskSNPs' argument)", required=false)
|
||||||
private Boolean SEQUENOM = false;
|
private Boolean SEQUENOM = false;
|
||||||
|
|
||||||
private StringBuffer sb = new StringBuffer();
|
|
||||||
int deletionBasesRemaining = 0;
|
int deletionBasesRemaining = 0;
|
||||||
|
|
||||||
public Pair<GenomeLoc, String> map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {
|
public Pair<GenomeLoc, String> map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
@ -54,45 +53,4 @@ public class FastaAlternateReferenceWalker extends RefWalker<Pair<GenomeLoc, Str
|
||||||
// if we got here then we're just ref
|
// if we got here then we're just ref
|
||||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Pair<GenomeLoc, String> reduceInit() {
|
|
||||||
return new Pair<GenomeLoc, String>(null, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
public Pair<GenomeLoc, String> reduce(Pair<GenomeLoc, String> value, Pair<GenomeLoc, String> sum) {
|
|
||||||
// if there is no interval to the left, then this is the first one
|
|
||||||
if ( sum.first == null ) {
|
|
||||||
sum.first = value.first;
|
|
||||||
sum.second = value.second;
|
|
||||||
}
|
|
||||||
// if the intervals don't overlap, print out the leftmost one and start a new one
|
|
||||||
// (end of contig or new interval)
|
|
||||||
else if ( value.first.getStart() != sum.first.getStop() + 1 ) {
|
|
||||||
printFasta(sum.first, sum.second);
|
|
||||||
sum.first = value.first;
|
|
||||||
sum.second = value.second;
|
|
||||||
}
|
|
||||||
// otherwise, merge them
|
|
||||||
else {
|
|
||||||
sum.first = GenomeLocParser.setStop(sum.first,value.first.getStop());
|
|
||||||
sum.second = sum.second.concat(value.second);
|
|
||||||
}
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void onTraversalDone(Pair<GenomeLoc, String> sum) {
|
|
||||||
if (sum.second != null)
|
|
||||||
printFasta(sum.first, sum.second);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void printFasta(GenomeLoc loc, String s) {
|
|
||||||
out.println(">" + loc);
|
|
||||||
int lines = s.length() / 60;
|
|
||||||
int currentStart = 0;
|
|
||||||
for (int i=0; i < lines; i++) {
|
|
||||||
out.println(s.substring(currentStart, currentStart+60));
|
|
||||||
currentStart += 60;
|
|
||||||
}
|
|
||||||
out.println(s.substring(currentStart));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
@ -12,50 +12,44 @@ import org.broadinstitute.sting.utils.Pair;
|
||||||
// create a fasta sequence file from a reference and intervals
|
// create a fasta sequence file from a reference and intervals
|
||||||
|
|
||||||
@WalkerName("FastaReferenceMaker")
|
@WalkerName("FastaReferenceMaker")
|
||||||
public class FastaReferenceWalker extends RefWalker<Pair<GenomeLoc, Character>, Pair<GenomeLoc, String>> {
|
public class FastaReferenceWalker extends RefWalker<Pair<GenomeLoc, String>, GenomeLoc> {
|
||||||
|
|
||||||
public Pair<GenomeLoc, Character> map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {
|
protected FastaSequence fasta;
|
||||||
return new Pair<GenomeLoc, Character>(context.getLocation(), ref.getBase());
|
|
||||||
|
public void initialize() {
|
||||||
|
fasta = new FastaSequence(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Pair<GenomeLoc, String> reduceInit() {
|
public Pair<GenomeLoc, String> map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {
|
||||||
return new Pair<GenomeLoc, String>(null, "");
|
return new Pair<GenomeLoc, String>(context.getLocation(), String.valueOf(ref.getBase()));
|
||||||
}
|
}
|
||||||
|
|
||||||
public Pair<GenomeLoc, String> reduce(Pair<GenomeLoc, Character> value, Pair<GenomeLoc, String> sum) {
|
public GenomeLoc reduceInit() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public GenomeLoc reduce(Pair<GenomeLoc, String> value, GenomeLoc sum) {
|
||||||
// if there is no interval to the left, then this is the first one
|
// if there is no interval to the left, then this is the first one
|
||||||
if ( sum.first == null ) {
|
if ( sum == null ) {
|
||||||
sum.first = value.first;
|
sum = value.first;
|
||||||
sum.second = value.second.toString();
|
fasta.append(value.second.toString());
|
||||||
}
|
}
|
||||||
// if the intervals don't overlap, print out the leftmost one and start a new one
|
// if the intervals don't overlap, print out the leftmost one and start a new one
|
||||||
// (end of contig or new interval)
|
// (end of contig or new interval)
|
||||||
else if ( value.first.getStart() != sum.first.getStop() + 1 ) {
|
else if ( value.first.getStart() != sum.getStop() + 1 ) {
|
||||||
printFasta(sum.first, sum.second);
|
fasta.flush();
|
||||||
sum.first = value.first;
|
sum = value.first;
|
||||||
sum.second = value.second.toString();
|
fasta.append(value.second.toString());
|
||||||
}
|
}
|
||||||
// otherwise, merge them
|
// otherwise, merge them
|
||||||
else {
|
else {
|
||||||
sum.first = GenomeLocParser.setStop(sum.first,value.first.getStop());
|
sum = GenomeLocParser.setStop(sum, value.first.getStop());
|
||||||
sum.second = new String(sum.second + value.second);
|
fasta.append(value.second.toString());
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraversalDone(Pair<GenomeLoc, String> sum) {
|
public void onTraversalDone(GenomeLoc sum) {
|
||||||
if (sum.second != null)
|
fasta.flush();
|
||||||
printFasta(sum.first, sum.second);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void printFasta(GenomeLoc loc, String s) {
|
|
||||||
out.println(">" + loc);
|
|
||||||
int lines = s.length() / 60;
|
|
||||||
int currentStart = 0;
|
|
||||||
for (int i=0; i < lines; i++) {
|
|
||||||
out.println(s.substring(currentStart, currentStart+60));
|
|
||||||
currentStart += 60;
|
|
||||||
}
|
|
||||||
out.println(s.substring(currentStart));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,50 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.fasta;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
|
|
||||||
|
// fasta sequence holder class
|
||||||
|
|
||||||
|
public class FastaSequence {
|
||||||
|
|
||||||
|
private PrintStream out;
|
||||||
|
private StringBuffer sb = new StringBuffer();
|
||||||
|
private long sequenceCounter = 1;
|
||||||
|
private boolean printedHeader = false;
|
||||||
|
|
||||||
|
public FastaSequence(PrintStream out) {
|
||||||
|
this.out = out;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void append(String s) {
|
||||||
|
sb.append(s);
|
||||||
|
printFasta(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void flush() {
|
||||||
|
printFasta(true);
|
||||||
|
printedHeader = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printFasta(boolean printAll) {
|
||||||
|
if ( sb.length() == 0 || (!printAll && sb.length() < 60) )
|
||||||
|
return;
|
||||||
|
if ( !printedHeader ) {
|
||||||
|
out.println(">" + sequenceCounter++);
|
||||||
|
printedHeader = true;
|
||||||
|
}
|
||||||
|
int lines = sb.length() / 60;
|
||||||
|
int currentStart = 0;
|
||||||
|
for (int i=0; i < lines; i++) {
|
||||||
|
out.println(sb.substring(currentStart, currentStart+60));
|
||||||
|
currentStart += 60;
|
||||||
|
}
|
||||||
|
if ( printAll ) {
|
||||||
|
out.println(sb.substring(currentStart));
|
||||||
|
sb.setLength(0);
|
||||||
|
} else {
|
||||||
|
sb.delete(0, currentStart);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue