First pass updates to annotations to work with indels. HomopolymerRun indel behavior is currently turned off by a global boolean until it's ready to go live.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3368 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-05-17 21:02:13 +00:00
parent 0791beab8f
commit ebd0fabf86
2 changed files with 88 additions and 5 deletions

View File

@ -5,8 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.*;
import java.util.*;
@ -15,9 +14,20 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, ExperimentalA
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, StratifiedAlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
// for now, we don't support indels
if ( g == null || !g.isCalled() || vc.getType() != VariantContext.Type.SNP )
if ( g == null || !g.isCalled() )
return null;
if ( vc.isSNP() ) {
return annotateSNP(tracker,ref,stratifiedContext,vc,g);
} else if ( vc.isIndel() ) {
return annotateIndel(tracker,ref,stratifiedContext,vc,g);
} else {
return null;
}
}
public Map<String,Object> annotateSNP(RefMetaDataTracker tracker, ReferenceContext ref, StratifiedAlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
Set<Allele> altAlleles = vc.getAlternateAlleles();
if ( altAlleles.size() == 0 )
return null;
@ -47,6 +57,41 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, ExperimentalA
return map;
}
public Map<String,Object> annotateIndel(RefMetaDataTracker tracker, ReferenceContext ref, StratifiedAlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
ReadBackedExtendedEventPileup pileup = stratifiedContext.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup();
if ( pileup == null ) {
return null;
}
// get identities and lengths for indel events
// TODO -- Insertions and deletions represented at the same locus
HashMap<Integer,Integer> countsBySize = new HashMap<Integer,Integer>();
for ( Allele al : vc.getAlternateAlleles() ) {
countsBySize.put(al.length(),0);
}
for ( ExtendedEventPileupElement e : pileup ) {
if ( countsBySize.keySet().contains(e.getEventLength()) ) { // if proper length
if ( e.isDeletion() && vc.isDeletion() || e.isInsertion() && vc.isInsertion() ) {
countsBySize.put(e.getEventLength(),countsBySize.get(e.getEventLength())+1);
}
}
}
StringBuffer sb = new StringBuffer();
char type = vc.isDeletion() ? 'D' : 'I';
for ( int len : countsBySize.keySet() ) {
if ( sb.length() > 0 ) {
sb.append(',');
}
sb.append(String.format("%d%s%d",len,type,countsBySize.get(len)));
}
Map<String,Object> map = new HashMap<String,Object>();
map.put(getKeyName(),sb.toString());
return map;
}
public String getKeyName() { return "AD"; }
public VCFFormatHeaderLine getDescription() { return new VCFFormatHeaderLine(getKeyName(), 1, VCFFormatHeaderLine.FORMAT_TYPE.Integer, "Depth in genotypes for each ALT allele, in the same order as listed"); }

View File

@ -15,12 +15,22 @@ import java.util.HashMap;
public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
private boolean ANNOTATE_INDELS = false;
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
if ( !vc.isBiallelic() || !vc.isSNP() )
if ( !vc.isBiallelic() )
return null;
int run = computeHomopolymerRun(vc.getAlternateAllele(0).toString().charAt(0), ref);
int run;
if ( vc.isSNP() ) {
run = computeHomopolymerRun(vc.getAlternateAllele(0).toString().charAt(0), ref);
} else if ( vc.isIndel() && ANNOTATE_INDELS ) {
run = computeIndelHomopolymerRun(vc,ref);
} else {
return null;
}
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%d", run));
return map;
@ -59,4 +69,32 @@ public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
return Math.max(leftRun, rightRun);
}
private static int computeIndelHomopolymerRun(VariantContext vc, ReferenceContext ref) {
char[] bases = ref.getBases();
GenomeLoc locus = ref.getLocus();
GenomeLoc window = ref.getWindow();
int refBasePos = (int) (locus.getStart() - window.getStart())+1;
if ( vc.isDeletion() ) {
// check that deleted bases are the same
char dBase = bases[refBasePos];
for ( int i = 0; i < vc.getAlternateAllele(0).length(); i ++ ) {
if ( bases[refBasePos+i] != dBase ) {
return 0;
}
}
return computeHomopolymerRun(dBase,ref);
} else {
// check that inserted bases are the same
char insBase = (char) vc.getAlternateAllele(0).getBases()[0];
for ( byte b : vc.getAlternateAllele(0).getBases() ) {
if ( insBase != (char) b ) {
return 0;
}
}
return computeHomopolymerRun(insBase,ref);
}
}
}