Added option to FastaReferenceWalker to change output FASTA file format's line width and to remove header lines; allows dumping raw sequence using intervals

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1628 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
andrewk 2009-09-15 18:00:30 +00:00
parent b69eb208a6
commit 00dfe014b7
2 changed files with 16 additions and 8 deletions

View File

@ -8,16 +8,20 @@ import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.cmdLine.Argument;
// create a fasta sequence file from a reference and intervals
@WalkerName("FastaReferenceMaker")
public class FastaReferenceWalker extends RefWalker<Pair<GenomeLoc, String>, GenomeLoc> {
@Argument(fullName="lineWidth", shortName="lw", doc="Maximum length of sequence to write per line", required=false) public int fastaLineWidth=60;
@Argument(fullName="rawOnelineSeq", shortName="raw", doc="Print sequences with no FASTA header lines, one line per interval (i.e. lineWidth = infinity) - CAUTION: adjacent intervals will automatically be merged", required=false) public boolean fastaRawSeqs=false;
protected FastaSequence fasta;
public void initialize() {
fasta = new FastaSequence(out);
if (fastaRawSeqs) fastaLineWidth = Integer.MAX_VALUE;
fasta = new FastaSequence(out, fastaLineWidth, fastaRawSeqs);
}
public Pair<GenomeLoc, String> map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {

View File

@ -13,9 +13,13 @@ public class FastaSequence {
private long sequenceCounter = 1;
private boolean printedHeader = false;
private String name = null;
private int lineWidth = -1;
private boolean noHeader = false;
public FastaSequence(PrintStream out) {
public FastaSequence(PrintStream out, int lineWidth, boolean noHeader) {
this.out = out;
this.lineWidth = lineWidth;
this.noHeader = noHeader;
}
public void setName(String name) {
@ -45,18 +49,18 @@ public class FastaSequence {
}
private void printFasta(boolean printAll) {
if ( sb.length() == 0 || (!printAll && sb.length() < 60) )
if ( sb.length() == 0 || (!printAll && sb.length() < lineWidth) )
return;
if ( !printedHeader ) {
if ( !printedHeader && !noHeader) {
if ( name == null ) out.println(">" + sequenceCounter);
else out.println(">" + name);
else out.println(">" + name);
printedHeader = true;
}
int lines = sb.length() / 60;
int lines = sb.length() / lineWidth;
int currentStart = 0;
for (int i=0; i < lines; i++) {
out.println(sb.substring(currentStart, currentStart+60));
currentStart += 60;
out.println(sb.substring(currentStart, currentStart+lineWidth));
currentStart += lineWidth;
}
if ( printAll ) {
out.println(sb.substring(currentStart));