- first pass at a basic indel filter (for now, based on size and homopolymer runs)
- fix simple indel rod printout git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1431 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
78439f7305
commit
b2a18a9d61
|
|
@ -66,8 +66,7 @@ public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVaria
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
sb.append(getLocation().getContig() + "\t" + getLocation().getStart() + "\t");
|
sb.append(getLocation().getContig() + "\t" + getLocation().getStart() + "\t");
|
||||||
String indel = getFWDAlleles().get(0);
|
sb.append(length() + "\t" + (isInsertion() ? "I" : "D") + "\t" + getFWDAlleles().get(0));
|
||||||
sb.append((indel.length()-1) + "\t" + (isInsertion() ? "I" : "D") + "\t" + indel.substring(1));
|
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,82 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* filter an indel callset based on given criteria
|
||||||
|
*/
|
||||||
|
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="indels",type=AllelicVariant.class)})
|
||||||
|
@Reference(window=@Window(start=-20,stop=20))
|
||||||
|
public class IndelFilterWalker extends RefWalker<Integer, Integer> {
|
||||||
|
@Argument(fullName="homopolymerRunMax", shortName="homopolMax", doc="filter indels within homopolymer runs greater than the given length (max 20)", required=false)
|
||||||
|
Integer HOMOPOLYMER_MAX = 20;
|
||||||
|
@Argument(fullName="homopolymerRunMin", shortName="homopolMin", doc="filter indels within homopolymer runs less than the given length", required=false)
|
||||||
|
Integer HOMOPOLYMER_MIN = 0;
|
||||||
|
@Argument(fullName="sizeMax", shortName="sizeMax", doc="filter indels greater than a certain size", required=false)
|
||||||
|
Integer SIZE_MAX = 100;
|
||||||
|
@Argument(fullName="sizeMin", shortName="sizeMin", doc="filter indels less than a certain size", required=false)
|
||||||
|
Integer SIZE_MIN = 0;
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
if ( HOMOPOLYMER_MAX > 20 )
|
||||||
|
HOMOPOLYMER_MAX = 20;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
AllelicVariant indel = (AllelicVariant)tracker.lookup("indels", null);
|
||||||
|
|
||||||
|
if ( indel == null || !indel.isIndel() )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if ( indel.length() < SIZE_MIN || indel.length() > SIZE_MAX )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
int homopol = homopolymerRunSize(ref, indel);
|
||||||
|
if ( homopol < HOMOPOLYMER_MIN || homopol > HOMOPOLYMER_MAX )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
out.println(indel);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
return sum + value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone(Integer result) {
|
||||||
|
out.printf("output %d indels.\n", result);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int homopolymerRunSize(ReferenceContext ref, AllelicVariant indel) {
|
||||||
|
char[] bases = ref.getBases();
|
||||||
|
GenomeLoc window = ref.getWindow();
|
||||||
|
GenomeLoc locus = ref.getLocus();
|
||||||
|
|
||||||
|
int refBasePos = (int)(locus.getStart() - window.getStart());
|
||||||
|
char indelBase = indel.isDeletion() ? bases[refBasePos+1] : indel.getAltBasesFWD().charAt(0);
|
||||||
|
int leftRun = 0;
|
||||||
|
for ( int i = refBasePos; i >= 0; i--) {
|
||||||
|
if ( bases[i] != indelBase )
|
||||||
|
break;
|
||||||
|
leftRun++;
|
||||||
|
}
|
||||||
|
|
||||||
|
indelBase = indel.isDeletion() ? bases[refBasePos+indel.length()] : indel.getAltBasesFWD().charAt(indel.getAltBasesFWD().length()-1);
|
||||||
|
int rightRun = 0;
|
||||||
|
for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.length() : 1); i < bases.length; i++) {
|
||||||
|
if ( bases[i] != indelBase )
|
||||||
|
break;
|
||||||
|
rightRun++;
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println(String.valueOf(bases) + ": " + leftRun + " / " + rightRun);
|
||||||
|
return Math.max(leftRun, rightRun);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue