From d4808433a1e602050a59883adee6481d2acf64ac Mon Sep 17 00:00:00 2001 From: ebanks Date: Sun, 16 Aug 2009 03:46:36 +0000 Subject: [PATCH] Added option to output the locations of indels in the alternate reference git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1424 348d0f76-0448-11de-a6fe-93d51630548a --- .../fasta/FastaAlternateReferenceWalker.java | 35 ++++++++++++++++++- .../gatk/walkers/fasta/FastaSequence.java | 7 +++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index 0dd494a5a..7c71a4bda 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -7,8 +7,10 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.cmdLine.Argument; +import java.io.*; import java.util.Iterator; + // create a fasta sequence file from a reference, intervals, and rod(s) of variants // if there are multiple variants at a site, we take the first one seen @@ -20,8 +22,23 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker { private Boolean MASK_SNPS = false; @Argument(fullName="outputSequenomFormat", shortName="sequenom", doc="output results in sequenom format (overrides 'maskSNPs' argument)", required=false) private Boolean SEQUENOM = false; + @Argument(fullName="outputIndelPositions", shortName="indels", doc="output the positions of the indels in the new reference", required=false) + String indelsFile = null; - int deletionBasesRemaining = 0; + private int deletionBasesRemaining = 0; + private long basesSeen = 0; + private PrintWriter indelsWriter = null; + + public void initialize() { + super.initialize(); + if ( indelsFile != null ) { + try { + indelsWriter = new PrintWriter(indelsFile); + } catch (IOException e) { + throw new RuntimeException("Unable to open indel positions output file: " + indelsFile); + } + } + } public Pair map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) { String refBase = String.valueOf(ref.getBase()); @@ -41,16 +58,32 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker { AllelicVariant variant = (AllelicVariant)rod; if ( variant.isDeletion() ) { deletionBasesRemaining = variant.length(); + basesSeen++; + if ( indelsWriter != null ) + indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.length())); // delete the next n bases, not this one return new Pair(context.getLocation(), (SEQUENOM ? refBase.concat("[-/") : refBase)); } else if ( variant.isInsertion() ) { + basesSeen++; + if ( indelsWriter != null ) + indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.length())); + basesSeen += variant.length(); return new Pair(context.getLocation(), (SEQUENOM ? refBase.concat("[+/"+variant.getAltBasesFWD()+"]") : refBase.concat(variant.getAltBasesFWD()))); } else if ( variant.isSNP() ) { + basesSeen++; return new Pair(context.getLocation(), (SEQUENOM || MASK_SNPS ? "N" : variant.getAltBasesFWD())); } } // if we got here then we're just ref + basesSeen++; return new Pair(context.getLocation(), refBase); } + + public void onTraversalDone(GenomeLoc sum) { + super.onTraversalDone(sum); + if ( indelsWriter != null ) + indelsWriter.close(); + } + } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaSequence.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaSequence.java index b82cbdc4c..2c3c72d49 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaSequence.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaSequence.java @@ -25,13 +25,18 @@ public class FastaSequence { public void flush() { printFasta(true); printedHeader = false; + sequenceCounter++; + } + + public String getCurrentID() { + return String.valueOf(sequenceCounter); } private void printFasta(boolean printAll) { if ( sb.length() == 0 || (!printAll && sb.length() < 60) ) return; if ( !printedHeader ) { - out.println(">" + sequenceCounter++); + out.println(">" + sequenceCounter); printedHeader = true; } int lines = sb.length() / 60;