Added option to FastaReferenceWalker to change output FASTA file format's line width and to remove header lines; allows dumping raw sequence using intervals

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1628 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
andrewk 2009-09-15 18:00:30 +00:00
parent b69eb208a6
commit 00dfe014b7
2 changed files with 16 additions and 8 deletions

View File

@ -8,16 +8,20 @@ import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.cmdLine.Argument;
// create a fasta sequence file from a reference and intervals // create a fasta sequence file from a reference and intervals
@WalkerName("FastaReferenceMaker") @WalkerName("FastaReferenceMaker")
public class FastaReferenceWalker extends RefWalker<Pair<GenomeLoc, String>, GenomeLoc> { public class FastaReferenceWalker extends RefWalker<Pair<GenomeLoc, String>, GenomeLoc> {
@Argument(fullName="lineWidth", shortName="lw", doc="Maximum length of sequence to write per line", required=false) public int fastaLineWidth=60;
@Argument(fullName="rawOnelineSeq", shortName="raw", doc="Print sequences with no FASTA header lines, one line per interval (i.e. lineWidth = infinity) - CAUTION: adjacent intervals will automatically be merged", required=false) public boolean fastaRawSeqs=false;
protected FastaSequence fasta; protected FastaSequence fasta;
public void initialize() { public void initialize() {
fasta = new FastaSequence(out); if (fastaRawSeqs) fastaLineWidth = Integer.MAX_VALUE;
fasta = new FastaSequence(out, fastaLineWidth, fastaRawSeqs);
} }
public Pair<GenomeLoc, String> map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) { public Pair<GenomeLoc, String> map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {

View File

@ -13,9 +13,13 @@ public class FastaSequence {
private long sequenceCounter = 1; private long sequenceCounter = 1;
private boolean printedHeader = false; private boolean printedHeader = false;
private String name = null; private String name = null;
private int lineWidth = -1;
private boolean noHeader = false;
public FastaSequence(PrintStream out) { public FastaSequence(PrintStream out, int lineWidth, boolean noHeader) {
this.out = out; this.out = out;
this.lineWidth = lineWidth;
this.noHeader = noHeader;
} }
public void setName(String name) { public void setName(String name) {
@ -45,18 +49,18 @@ public class FastaSequence {
} }
private void printFasta(boolean printAll) { private void printFasta(boolean printAll) {
if ( sb.length() == 0 || (!printAll && sb.length() < 60) ) if ( sb.length() == 0 || (!printAll && sb.length() < lineWidth) )
return; return;
if ( !printedHeader ) { if ( !printedHeader && !noHeader) {
if ( name == null ) out.println(">" + sequenceCounter); if ( name == null ) out.println(">" + sequenceCounter);
else out.println(">" + name); else out.println(">" + name);
printedHeader = true; printedHeader = true;
} }
int lines = sb.length() / 60; int lines = sb.length() / lineWidth;
int currentStart = 0; int currentStart = 0;
for (int i=0; i < lines; i++) { for (int i=0; i < lines; i++) {
out.println(sb.substring(currentStart, currentStart+60)); out.println(sb.substring(currentStart, currentStart+lineWidth));
currentStart += 60; currentStart += lineWidth;
} }
if ( printAll ) { if ( printAll ) {
out.println(sb.substring(currentStart)); out.println(sb.substring(currentStart));