From 5fab934f4e308cb8ae3f87e45900a418b48ab8c0 Mon Sep 17 00:00:00 2001 From: ebanks Date: Tue, 11 Aug 2009 18:01:06 +0000 Subject: [PATCH] - moved the reference maker to its own directory - added first version of a more complicated reference maker which takes in RODs and creates an alternative reference based on the variants (indels and/or SNPs) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1409 348d0f76-0448-11de-a6fe-93d51630548a --- .../fasta/FastaAlternateReferenceWalker.java | 89 +++++++++++++++++++ .../{ => fasta}/FastaReferenceWalker.java | 3 +- 2 files changed, 91 insertions(+), 1 deletion(-) create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaAlternateReferenceWalker.java rename java/src/org/broadinstitute/sting/playground/gatk/walkers/{ => fasta}/FastaReferenceWalker.java (95%) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaAlternateReferenceWalker.java new file mode 100755 index 000000000..30bb3ee12 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -0,0 +1,89 @@ +package org.broadinstitute.sting.playground.gatk.walkers.fasta; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.*; + +import java.util.Iterator; + +// create a fasta sequence file from a reference, intervals, and rod(s) of variants + +@WalkerName("FastaAlternateReferenceMaker") +@Requires(value={DataSource.REFERENCE}) +public class FastaAlternateReferenceWalker extends RefWalker, Pair> { + + private StringBuffer sb = new StringBuffer(); + int deletionBasesRemaining = 0; + + public Pair map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) { + if ( deletionBasesRemaining > 0 ) { + deletionBasesRemaining--; + return new Pair(context.getLocation(), ""); + } + + Iterator rods = rodData.getAllRods().iterator(); + while ( rods.hasNext() ) { + ReferenceOrderedDatum rod = rods.next(); + if ( !(rod instanceof AllelicVariant) ) + continue; + + // if we have multiple variants at a locus, just take the first damn one we see for now + AllelicVariant variant = (AllelicVariant)rod; + if ( variant.isDeletion() ) { + deletionBasesRemaining = variant.length(); + // delete the next n bases, not this one + return new Pair(context.getLocation(), String.valueOf(ref.getBase())); + } else if ( variant.isInsertion() ) { + return new Pair(context.getLocation(), String.valueOf(ref.getBase()).concat(variant.getAltBasesFWD())); + } else if ( variant.isSNP() ) { + return new Pair(context.getLocation(), variant.getAltBasesFWD()); + } + } + + // if we got here then we're just ref + return new Pair(context.getLocation(), String.valueOf(ref.getBase())); + } + + public Pair reduceInit() { + return new Pair(null, ""); + } + + public Pair reduce(Pair value, Pair sum) { + // if there is no interval to the left, then this is the first one + if ( sum.first == null ) { + sum.first = value.first; + sum.second = value.second; + } + // if the intervals don't overlap, print out the leftmost one and start a new one + // (end of contig or new interval) + else if ( value.first.getStart() != sum.first.getStop() + 1 ) { + printFasta(sum.first, sum.second); + sum.first = value.first; + sum.second = value.second; + } + // otherwise, merge them + else { + sum.first = GenomeLocParser.setStop(sum.first,value.first.getStop()); + sum.second = sum.second.concat(value.second); + } + return sum; + } + + public void onTraversalDone(Pair sum) { + if (sum.second != null) + printFasta(sum.first, sum.second); + } + + private void printFasta(GenomeLoc loc, String s) { + out.println(">" + loc); + int lines = s.length() / 60; + int currentStart = 0; + for (int i=0; i < lines; i++) { + out.println(s.substring(currentStart, currentStart+60)); + currentStart += 60; + } + out.println(s.substring(currentStart)); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/FastaReferenceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaReferenceWalker.java similarity index 95% rename from java/src/org/broadinstitute/sting/playground/gatk/walkers/FastaReferenceWalker.java rename to java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaReferenceWalker.java index 6adfa91d5..454e89fbf 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/FastaReferenceWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/fasta/FastaReferenceWalker.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.playground.gatk.walkers; +package org.broadinstitute.sting.playground.gatk.walkers.fasta; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -29,6 +29,7 @@ public class FastaReferenceWalker extends RefWalker, sum.second = value.second.toString(); } // if the intervals don't overlap, print out the leftmost one and start a new one + // (end of contig or new interval) else if ( value.first.getStart() != sum.first.getStop() + 1 ) { printFasta(sum.first, sum.second); sum.first = value.first;