From ebd0fabf86bb2c18cc9a17e913bb9898f105118a Mon Sep 17 00:00:00 2001 From: chartl Date: Mon, 17 May 2010 21:02:13 +0000 Subject: [PATCH] First pass updates to annotations to work with indels. HomopolymerRun indel behavior is currently turned off by a global boolean until it's ready to go live. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3368 348d0f76-0448-11de-a6fe-93d51630548a --- .../annotator/DepthPerAlleleBySample.java | 51 +++++++++++++++++-- .../walkers/annotator/HomopolymerRun.java | 42 ++++++++++++++- 2 files changed, 88 insertions(+), 5 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 6c9519554..f3fef4497 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -5,8 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.*; import java.util.*; @@ -15,9 +14,20 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, ExperimentalA public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, StratifiedAlignmentContext stratifiedContext, VariantContext vc, Genotype g) { // for now, we don't support indels - if ( g == null || !g.isCalled() || vc.getType() != VariantContext.Type.SNP ) + if ( g == null || !g.isCalled() ) return null; + if ( vc.isSNP() ) { + return annotateSNP(tracker,ref,stratifiedContext,vc,g); + } else if ( vc.isIndel() ) { + return annotateIndel(tracker,ref,stratifiedContext,vc,g); + } else { + return null; + } + } + + public Map annotateSNP(RefMetaDataTracker tracker, ReferenceContext ref, StratifiedAlignmentContext stratifiedContext, VariantContext vc, Genotype g) { + Set altAlleles = vc.getAlternateAlleles(); if ( altAlleles.size() == 0 ) return null; @@ -47,6 +57,41 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, ExperimentalA return map; } + public Map annotateIndel(RefMetaDataTracker tracker, ReferenceContext ref, StratifiedAlignmentContext stratifiedContext, VariantContext vc, Genotype g) { + ReadBackedExtendedEventPileup pileup = stratifiedContext.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup(); + if ( pileup == null ) { + return null; + } + // get identities and lengths for indel events + // TODO -- Insertions and deletions represented at the same locus + HashMap countsBySize = new HashMap(); + for ( Allele al : vc.getAlternateAlleles() ) { + countsBySize.put(al.length(),0); + } + + for ( ExtendedEventPileupElement e : pileup ) { + if ( countsBySize.keySet().contains(e.getEventLength()) ) { // if proper length + if ( e.isDeletion() && vc.isDeletion() || e.isInsertion() && vc.isInsertion() ) { + countsBySize.put(e.getEventLength(),countsBySize.get(e.getEventLength())+1); + } + } + } + + StringBuffer sb = new StringBuffer(); + char type = vc.isDeletion() ? 'D' : 'I'; + + for ( int len : countsBySize.keySet() ) { + if ( sb.length() > 0 ) { + sb.append(','); + } + sb.append(String.format("%d%s%d",len,type,countsBySize.get(len))); + } + + Map map = new HashMap(); + map.put(getKeyName(),sb.toString()); + return map; + } + public String getKeyName() { return "AD"; } public VCFFormatHeaderLine getDescription() { return new VCFFormatHeaderLine(getKeyName(), 1, VCFFormatHeaderLine.FORMAT_TYPE.Integer, "Depth in genotypes for each ALT allele, in the same order as listed"); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 3d1a75cc8..508fb2a57 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -15,12 +15,22 @@ import java.util.HashMap; public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation { + private boolean ANNOTATE_INDELS = false; + public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { - if ( !vc.isBiallelic() || !vc.isSNP() ) + if ( !vc.isBiallelic() ) return null; - int run = computeHomopolymerRun(vc.getAlternateAllele(0).toString().charAt(0), ref); + int run; + if ( vc.isSNP() ) { + run = computeHomopolymerRun(vc.getAlternateAllele(0).toString().charAt(0), ref); + } else if ( vc.isIndel() && ANNOTATE_INDELS ) { + run = computeIndelHomopolymerRun(vc,ref); + } else { + return null; + } + Map map = new HashMap(); map.put(getKeyName(), String.format("%d", run)); return map; @@ -59,4 +69,32 @@ public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation { return Math.max(leftRun, rightRun); } + + private static int computeIndelHomopolymerRun(VariantContext vc, ReferenceContext ref) { + char[] bases = ref.getBases(); + GenomeLoc locus = ref.getLocus(); + GenomeLoc window = ref.getWindow(); + int refBasePos = (int) (locus.getStart() - window.getStart())+1; + if ( vc.isDeletion() ) { + // check that deleted bases are the same + char dBase = bases[refBasePos]; + for ( int i = 0; i < vc.getAlternateAllele(0).length(); i ++ ) { + if ( bases[refBasePos+i] != dBase ) { + return 0; + } + } + + return computeHomopolymerRun(dBase,ref); + } else { + // check that inserted bases are the same + char insBase = (char) vc.getAlternateAllele(0).getBases()[0]; + for ( byte b : vc.getAlternateAllele(0).getBases() ) { + if ( insBase != (char) b ) { + return 0; + } + } + + return computeHomopolymerRun(insBase,ref); + } + } } \ No newline at end of file