Adding the Clipping rank sum test: If alternate-supporting reads have more hard clipping than reference-supporting reads this is evidence for error.
This commit is contained in:
parent
d12ec92a55
commit
05791ebf80
|
|
@ -0,0 +1,82 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 6/28/12
|
||||
*/
|
||||
|
||||
public class ClippingRankSumTest extends RankSumTest {
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("ClippingRankSum"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("ClippingRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases")); }
|
||||
|
||||
protected void fillQualsFromPileup(byte ref, List<Byte> alts, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if ( isUsableBase(p) ) {
|
||||
if ( p.getBase() == ref ) {
|
||||
refQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead()));
|
||||
} else if ( alts.contains(p.getBase()) ) {
|
||||
altQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, final List<Double> altQuals) {
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
|
||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
||||
final boolean matchesAlt = alts.contains(alleleBin.getKey());
|
||||
if ( !matchesRef && !matchesAlt )
|
||||
continue;
|
||||
|
||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
||||
if ( matchesRef )
|
||||
refQuals.add((double)AlignmentUtils.getNumHardClippedBases(read));
|
||||
else
|
||||
altQuals.add((double)AlignmentUtils.getNumHardClippedBases(read));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
|
||||
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
for (final PileupElement p: pileup) {
|
||||
if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) {
|
||||
// retrieve likelihood information corresponding to this read
|
||||
LinkedHashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
||||
// by design, first element in LinkedHashMap was ref allele
|
||||
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (Allele a : el.keySet()) {
|
||||
|
||||
if (a.isReference())
|
||||
refLikelihood =el.get(a);
|
||||
else {
|
||||
double like = el.get(a);
|
||||
if (like >= altLikelihood)
|
||||
altLikelihood = like;
|
||||
}
|
||||
}
|
||||
if (refLikelihood > altLikelihood + INDEL_LIKELIHOOD_THRESH)
|
||||
refQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead()));
|
||||
else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH)
|
||||
altQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -354,6 +354,18 @@ public class AlignmentUtils {
|
|||
return n;
|
||||
}
|
||||
|
||||
public static int getNumHardClippedBases(final SAMRecord r) {
|
||||
int n = 0;
|
||||
final Cigar cigar = r.getCigar();
|
||||
if (cigar == null) return 0;
|
||||
|
||||
for (final CigarElement e : cigar.getCigarElements())
|
||||
if (e.getOperator() == CigarOperator.H)
|
||||
n += e.getLength();
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
public static byte[] alignmentToByteArray(final Cigar cigar, final byte[] read, final byte[] ref) {
|
||||
|
||||
final byte[] alignment = new byte[read.length];
|
||||
|
|
|
|||
Loading…
Reference in New Issue