First pass updates to annotations to work with indels. HomopolymerRun indel behavior is currently turned off by a global boolean until it's ready to go live.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3368 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0791beab8f
commit
ebd0fabf86
|
|
@ -5,8 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.*;
|
|||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -15,9 +14,20 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, ExperimentalA
|
|||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, StratifiedAlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
// for now, we don't support indels
|
||||
if ( g == null || !g.isCalled() || vc.getType() != VariantContext.Type.SNP )
|
||||
if ( g == null || !g.isCalled() )
|
||||
return null;
|
||||
|
||||
if ( vc.isSNP() ) {
|
||||
return annotateSNP(tracker,ref,stratifiedContext,vc,g);
|
||||
} else if ( vc.isIndel() ) {
|
||||
return annotateIndel(tracker,ref,stratifiedContext,vc,g);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String,Object> annotateSNP(RefMetaDataTracker tracker, ReferenceContext ref, StratifiedAlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
|
||||
Set<Allele> altAlleles = vc.getAlternateAlleles();
|
||||
if ( altAlleles.size() == 0 )
|
||||
return null;
|
||||
|
|
@ -47,6 +57,41 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, ExperimentalA
|
|||
return map;
|
||||
}
|
||||
|
||||
public Map<String,Object> annotateIndel(RefMetaDataTracker tracker, ReferenceContext ref, StratifiedAlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
ReadBackedExtendedEventPileup pileup = stratifiedContext.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup();
|
||||
if ( pileup == null ) {
|
||||
return null;
|
||||
}
|
||||
// get identities and lengths for indel events
|
||||
// TODO -- Insertions and deletions represented at the same locus
|
||||
HashMap<Integer,Integer> countsBySize = new HashMap<Integer,Integer>();
|
||||
for ( Allele al : vc.getAlternateAlleles() ) {
|
||||
countsBySize.put(al.length(),0);
|
||||
}
|
||||
|
||||
for ( ExtendedEventPileupElement e : pileup ) {
|
||||
if ( countsBySize.keySet().contains(e.getEventLength()) ) { // if proper length
|
||||
if ( e.isDeletion() && vc.isDeletion() || e.isInsertion() && vc.isInsertion() ) {
|
||||
countsBySize.put(e.getEventLength(),countsBySize.get(e.getEventLength())+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
StringBuffer sb = new StringBuffer();
|
||||
char type = vc.isDeletion() ? 'D' : 'I';
|
||||
|
||||
for ( int len : countsBySize.keySet() ) {
|
||||
if ( sb.length() > 0 ) {
|
||||
sb.append(',');
|
||||
}
|
||||
sb.append(String.format("%d%s%d",len,type,countsBySize.get(len)));
|
||||
}
|
||||
|
||||
Map<String,Object> map = new HashMap<String,Object>();
|
||||
map.put(getKeyName(),sb.toString());
|
||||
return map;
|
||||
}
|
||||
|
||||
public String getKeyName() { return "AD"; }
|
||||
|
||||
public VCFFormatHeaderLine getDescription() { return new VCFFormatHeaderLine(getKeyName(), 1, VCFFormatHeaderLine.FORMAT_TYPE.Integer, "Depth in genotypes for each ALT allele, in the same order as listed"); }
|
||||
|
|
|
|||
|
|
@ -15,12 +15,22 @@ import java.util.HashMap;
|
|||
|
||||
public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
|
||||
|
||||
private boolean ANNOTATE_INDELS = false;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
|
||||
if ( !vc.isBiallelic() || !vc.isSNP() )
|
||||
if ( !vc.isBiallelic() )
|
||||
return null;
|
||||
|
||||
int run = computeHomopolymerRun(vc.getAlternateAllele(0).toString().charAt(0), ref);
|
||||
int run;
|
||||
if ( vc.isSNP() ) {
|
||||
run = computeHomopolymerRun(vc.getAlternateAllele(0).toString().charAt(0), ref);
|
||||
} else if ( vc.isIndel() && ANNOTATE_INDELS ) {
|
||||
run = computeIndelHomopolymerRun(vc,ref);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyName(), String.format("%d", run));
|
||||
return map;
|
||||
|
|
@ -59,4 +69,32 @@ public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
|
|||
|
||||
return Math.max(leftRun, rightRun);
|
||||
}
|
||||
|
||||
private static int computeIndelHomopolymerRun(VariantContext vc, ReferenceContext ref) {
|
||||
char[] bases = ref.getBases();
|
||||
GenomeLoc locus = ref.getLocus();
|
||||
GenomeLoc window = ref.getWindow();
|
||||
int refBasePos = (int) (locus.getStart() - window.getStart())+1;
|
||||
if ( vc.isDeletion() ) {
|
||||
// check that deleted bases are the same
|
||||
char dBase = bases[refBasePos];
|
||||
for ( int i = 0; i < vc.getAlternateAllele(0).length(); i ++ ) {
|
||||
if ( bases[refBasePos+i] != dBase ) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return computeHomopolymerRun(dBase,ref);
|
||||
} else {
|
||||
// check that inserted bases are the same
|
||||
char insBase = (char) vc.getAlternateAllele(0).getBases()[0];
|
||||
for ( byte b : vc.getAlternateAllele(0).getBases() ) {
|
||||
if ( insBase != (char) b ) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return computeHomopolymerRun(insBase,ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue