diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java new file mode 100644 index 000000000..aeb01bf3e --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java @@ -0,0 +1,82 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.variantcontext.Allele; + +import java.util.*; + +/** + * Created with IntelliJ IDEA. + * User: rpoplin + * Date: 6/28/12 + */ + +public class ClippingRankSumTest extends RankSumTest { + + public List getKeyNames() { return Arrays.asList("ClippingRankSum"); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("ClippingRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases")); } + + protected void fillQualsFromPileup(byte ref, List alts, ReadBackedPileup pileup, List refQuals, List altQuals) { + for ( final PileupElement p : pileup ) { + if ( isUsableBase(p) ) { + if ( p.getBase() == ref ) { + refQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead())); + } else if ( alts.contains(p.getBase()) ) { + altQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead())); + } + } + } + } + + protected void fillQualsFromPileup(final Allele ref, final List alts, final int refLoc, final Map> stratifiedContext, final List refQuals, final List altQuals) { + for ( final Map.Entry> alleleBin : stratifiedContext.entrySet() ) { + final boolean matchesRef = ref.equals(alleleBin.getKey()); + final boolean matchesAlt = alts.contains(alleleBin.getKey()); + if ( !matchesRef && !matchesAlt ) + continue; + + for ( final GATKSAMRecord read : alleleBin.getValue() ) { + if ( matchesRef ) + refQuals.add((double)AlignmentUtils.getNumHardClippedBases(read)); + else + altQuals.add((double)AlignmentUtils.getNumHardClippedBases(read)); + } + } + } + + protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List refQuals, List altQuals) { + // equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ? + HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); + for (final PileupElement p: pileup) { + if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) { + // retrieve likelihood information corresponding to this read + LinkedHashMap el = indelLikelihoodMap.get(p); + // by design, first element in LinkedHashMap was ref allele + double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; + + for (Allele a : el.keySet()) { + + if (a.isReference()) + refLikelihood =el.get(a); + else { + double like = el.get(a); + if (like >= altLikelihood) + altLikelihood = like; + } + } + if (refLikelihood > altLikelihood + INDEL_LIKELIHOOD_THRESH) + refQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead())); + else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH) + altQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead())); + } + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index cecbd1bc4..e5e747c2d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -354,6 +354,18 @@ public class AlignmentUtils { return n; } + public static int getNumHardClippedBases(final SAMRecord r) { + int n = 0; + final Cigar cigar = r.getCigar(); + if (cigar == null) return 0; + + for (final CigarElement e : cigar.getCigarElements()) + if (e.getOperator() == CigarOperator.H) + n += e.getLength(); + + return n; + } + public static byte[] alignmentToByteArray(final Cigar cigar, final byte[] read, final byte[] ref) { final byte[] alignment = new byte[read.length];