Fixes for HRun annotation in case of indels:

a) In case of a deletion value was completely broken, we'd report 0 or -1.
b) For indels, we report maximum of forward and backward values - I've seen empirically many sites which are not strand biased but which seem to be artifacts and the homopolymer run is always to the right only (because we left align by convention).




git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5260 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
delangel 2011-02-17 18:57:21 +00:00
parent fb9f92d09c
commit f1d708f4d4
1 changed files with 10 additions and 6 deletions

View File

@ -27,7 +27,7 @@ public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
int run; int run;
if ( vc.isSNP() ) { if ( vc.isSNP() ) {
run = computeHomopolymerRun(vc.getAlternateAllele(0).getBases()[0], ref); run = computeHomopolymerRun(vc.getAlternateAllele(0).getBases()[0], ref, true);
} else if ( vc.isIndel() && ANNOTATE_INDELS ) { } else if ( vc.isIndel() && ANNOTATE_INDELS ) {
run = computeIndelHomopolymerRun(vc,ref); run = computeIndelHomopolymerRun(vc,ref);
} else { } else {
@ -45,7 +45,7 @@ public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
public boolean useZeroQualityReads() { return false; } public boolean useZeroQualityReads() { return false; }
private static int computeHomopolymerRun(byte altAllele, ReferenceContext ref) { private static int computeHomopolymerRun(byte altAllele, ReferenceContext ref, boolean domin) {
// TODO -- this needs to be computed in a more accurate manner // TODO -- this needs to be computed in a more accurate manner
// We currently look only at direct runs of the alternate allele adjacent to this position // We currently look only at direct runs of the alternate allele adjacent to this position
@ -70,7 +70,11 @@ public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
rightRun++; rightRun++;
} }
return Math.max(leftRun, rightRun); if (domin)
return Math.min(leftRun, rightRun);
else
return Math.max(leftRun, rightRun);
} }
private static int computeIndelHomopolymerRun(VariantContext vc, ReferenceContext ref) { private static int computeIndelHomopolymerRun(VariantContext vc, ReferenceContext ref) {
@ -81,13 +85,13 @@ public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
if ( vc.isDeletion() ) { if ( vc.isDeletion() ) {
// check that deleted bases are the same // check that deleted bases are the same
byte dBase = bases[refBasePos]; byte dBase = bases[refBasePos];
for ( int i = 0; i < vc.getAlternateAllele(0).length(); i ++ ) { for ( int i = 0; i < vc.getReference().length(); i ++ ) {
if ( bases[refBasePos+i] != dBase ) { if ( bases[refBasePos+i] != dBase ) {
return 0; return 0;
} }
} }
return computeHomopolymerRun(dBase,ref)-1; // remove the extra match from the base itself return computeHomopolymerRun(dBase, ref, false); // do max in both directions
} else { } else {
// check that inserted bases are the same // check that inserted bases are the same
byte insBase = vc.getAlternateAllele(0).getBases()[0]; byte insBase = vc.getAlternateAllele(0).getBases()[0];
@ -97,7 +101,7 @@ public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
} }
} }
return computeHomopolymerRun(insBase,ref); return computeHomopolymerRun(insBase,ref, false);
} }
} }
} }