- moved the reference maker to its own directory
- added first version of a more complicated reference maker which takes in RODs and creates an alternative reference based on the variants (indels and/or SNPs) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1409 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d69ae60b69
commit
5fab934f4e
|
|
@ -0,0 +1,89 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.fasta;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
// create a fasta sequence file from a reference, intervals, and rod(s) of variants
|
||||||
|
|
||||||
|
@WalkerName("FastaAlternateReferenceMaker")
|
||||||
|
@Requires(value={DataSource.REFERENCE})
|
||||||
|
public class FastaAlternateReferenceWalker extends RefWalker<Pair<GenomeLoc, String>, Pair<GenomeLoc, String>> {
|
||||||
|
|
||||||
|
private StringBuffer sb = new StringBuffer();
|
||||||
|
int deletionBasesRemaining = 0;
|
||||||
|
|
||||||
|
public Pair<GenomeLoc, String> map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
if ( deletionBasesRemaining > 0 ) {
|
||||||
|
deletionBasesRemaining--;
|
||||||
|
return new Pair<GenomeLoc, String>(context.getLocation(), "");
|
||||||
|
}
|
||||||
|
|
||||||
|
Iterator<ReferenceOrderedDatum> rods = rodData.getAllRods().iterator();
|
||||||
|
while ( rods.hasNext() ) {
|
||||||
|
ReferenceOrderedDatum rod = rods.next();
|
||||||
|
if ( !(rod instanceof AllelicVariant) )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// if we have multiple variants at a locus, just take the first damn one we see for now
|
||||||
|
AllelicVariant variant = (AllelicVariant)rod;
|
||||||
|
if ( variant.isDeletion() ) {
|
||||||
|
deletionBasesRemaining = variant.length();
|
||||||
|
// delete the next n bases, not this one
|
||||||
|
return new Pair<GenomeLoc, String>(context.getLocation(), String.valueOf(ref.getBase()));
|
||||||
|
} else if ( variant.isInsertion() ) {
|
||||||
|
return new Pair<GenomeLoc, String>(context.getLocation(), String.valueOf(ref.getBase()).concat(variant.getAltBasesFWD()));
|
||||||
|
} else if ( variant.isSNP() ) {
|
||||||
|
return new Pair<GenomeLoc, String>(context.getLocation(), variant.getAltBasesFWD());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if we got here then we're just ref
|
||||||
|
return new Pair<GenomeLoc, String>(context.getLocation(), String.valueOf(ref.getBase()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public Pair<GenomeLoc, String> reduceInit() {
|
||||||
|
return new Pair<GenomeLoc, String>(null, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Pair<GenomeLoc, String> reduce(Pair<GenomeLoc, String> value, Pair<GenomeLoc, String> sum) {
|
||||||
|
// if there is no interval to the left, then this is the first one
|
||||||
|
if ( sum.first == null ) {
|
||||||
|
sum.first = value.first;
|
||||||
|
sum.second = value.second;
|
||||||
|
}
|
||||||
|
// if the intervals don't overlap, print out the leftmost one and start a new one
|
||||||
|
// (end of contig or new interval)
|
||||||
|
else if ( value.first.getStart() != sum.first.getStop() + 1 ) {
|
||||||
|
printFasta(sum.first, sum.second);
|
||||||
|
sum.first = value.first;
|
||||||
|
sum.second = value.second;
|
||||||
|
}
|
||||||
|
// otherwise, merge them
|
||||||
|
else {
|
||||||
|
sum.first = GenomeLocParser.setStop(sum.first,value.first.getStop());
|
||||||
|
sum.second = sum.second.concat(value.second);
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone(Pair<GenomeLoc, String> sum) {
|
||||||
|
if (sum.second != null)
|
||||||
|
printFasta(sum.first, sum.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printFasta(GenomeLoc loc, String s) {
|
||||||
|
out.println(">" + loc);
|
||||||
|
int lines = s.length() / 60;
|
||||||
|
int currentStart = 0;
|
||||||
|
for (int i=0; i < lines; i++) {
|
||||||
|
out.println(s.substring(currentStart, currentStart+60));
|
||||||
|
currentStart += 60;
|
||||||
|
}
|
||||||
|
out.println(s.substring(currentStart));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
package org.broadinstitute.sting.playground.gatk.walkers.fasta;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -29,6 +29,7 @@ public class FastaReferenceWalker extends RefWalker<Pair<GenomeLoc, Character>,
|
||||||
sum.second = value.second.toString();
|
sum.second = value.second.toString();
|
||||||
}
|
}
|
||||||
// if the intervals don't overlap, print out the leftmost one and start a new one
|
// if the intervals don't overlap, print out the leftmost one and start a new one
|
||||||
|
// (end of contig or new interval)
|
||||||
else if ( value.first.getStart() != sum.first.getStop() + 1 ) {
|
else if ( value.first.getStart() != sum.first.getStop() + 1 ) {
|
||||||
printFasta(sum.first, sum.second);
|
printFasta(sum.first, sum.second);
|
||||||
sum.first = value.first;
|
sum.first = value.first;
|
||||||
Loading…
Reference in New Issue