Second step of interface cleanup for variant annotator: several bug fixes, don't hash pileup elements to Maps because the hashCode() for a pileup element is not implemented and strange things can happen. Still several things to do, not done yet

This commit is contained in:
Guillermo del Angel 2012-08-19 21:18:18 -04:00
parent d9641e3d57
commit 963ad03f8b
13 changed files with 90 additions and 78 deletions

View File

@ -53,13 +53,14 @@ public class ErrorModel {
PairHMMIndelErrorModel pairModel = null;
LinkedHashMap<Allele, Haplotype> haplotypeMap = null;
HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = null;
double[][] perReadLikelihoods = null;
double[] model = new double[maxQualityScore+1];
Arrays.fill(model,Double.NEGATIVE_INFINITY);
boolean hasCalledAlleles = false;
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
if (refSampleVC != null) {
for (Allele allele : refSampleVC.getAlleles()) {
@ -72,7 +73,6 @@ public class ErrorModel {
if (refSampleVC.isIndel()) {
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
indelLikelihoodMap = new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
}
}
@ -92,12 +92,12 @@ public class ErrorModel {
Allele refAllele = refSampleVC.getReference();
if (refSampleVC.isIndel()) {
if ( refSampleVC.isIndel()) {
final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
//perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
if (!haplotypeMap.isEmpty())
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, perReadAlleleLikelihoodMap, readCounts);
}
int idx = 0;
for (PileupElement refPileupElement : refSamplePileup) {

View File

@ -49,7 +49,8 @@ public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends General
final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean useBQAedPileup,
final ReferenceContext ref,
final boolean ignoreLaneInformation) {
final boolean ignoreLaneInformation,
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
}

View File

@ -32,7 +32,7 @@ public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnot
// use fast SNP-based version if we don't have per-read allele likelihoods
for ( final PileupElement p : pileup ) {
if ( isUsableBase(p) ) {
if ( allAlleles.get(0).equals(Allele.create(p.getBase())) ) {
if ( allAlleles.get(0).equals(Allele.create(p.getBase(),true)) ) {
refQuals.add((double)p.getQual());
} else if ( allAlleles.contains(Allele.create(p.getBase()))) {
altQuals.add((double)p.getQual());
@ -42,17 +42,14 @@ public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnot
return;
}
for (Map.Entry<PileupElement,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
if (!isUsableBase(el.getKey()))
continue;
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
for (Map<Allele,Double> el : alleleLikelihoodMap.getLikelihoodMapValues()) {
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el);
if (a.isNoCall())
continue; // read is non-informative
if (a.isReference())
refQuals.add(-10.0*(double)el.getValue().get(a));
refQuals.add(-10.0*(double)el.get(a));
else if (allAlleles.contains(a))
altQuals.add(-10.0*(double)el.getValue().get(a));
altQuals.add(-10.0*(double)el.get(a));
}

View File

@ -31,18 +31,18 @@ public class ClippingRankSumTest extends RankSumTest {
final ReadBackedPileup pileup,
final PerReadAlleleLikelihoodMap likelihoodMap, final List<Double> refQuals, final List<Double> altQuals) {
// todo - only support non-pileup case for now, e.g. active-region based version
if (pileup != null)
if (pileup != null || likelihoodMap == null)
return;
for (Map.Entry<PileupElement,Map<Allele,Double>> el : likelihoodMap.getLikelihoodReadMap().entrySet()) {
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : likelihoodMap.getLikelihoodReadMap().entrySet()) {
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
if (a.isNoCall())
continue; // read is non-informative
if (a.isReference())
refQuals.add((double)AlignmentUtils.getNumHardClippedBases(el.getKey().getRead()));
refQuals.add((double)AlignmentUtils.getNumHardClippedBases(el.getKey()));
else if (allAlleles.contains(a))
altQuals.add((double)AlignmentUtils.getNumHardClippedBases(el.getKey().getRead()));
altQuals.add((double)AlignmentUtils.getNumHardClippedBases(el.getKey()));
}
}

View File

@ -59,7 +59,7 @@ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnno
return null;
for ( Map.Entry<String, PerReadAlleleLikelihoodMap> sample : perReadAlleleLikelihoodMap.entrySet() )
depth += sample.getValue().getLikelihoodReadMap().size();
depth += sample.getValue().getNumberOfStoredElements();
}
else
return null;

View File

@ -66,12 +66,13 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
int[][] table;
if (stratifiedPerReadAlleleLikelihoodMap != null) {
table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
}
else if (vc.isSNP() && stratifiedContexts != null) {
if (vc.isSNP() && stratifiedContexts != null) {
table = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
}
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
// either SNP with no alignment context, or indels: per-read likelihood map needed
table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
}
else
// for non-snp variants, we need per-read likelihoods.
// for snps, we can get same result from simple pileup
@ -216,14 +217,16 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
int[][] table = new int[2][2];
for (PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
for (Map.Entry<PileupElement,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
final boolean matchesRef = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(ref);
final boolean matchesAlt = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(alt);
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
if ( el.getKey().isReducedRead() ) // ignore reduced reads
continue;
final boolean matchesRef = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(ref,true);
final boolean matchesAlt = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(alt,true);
if ( !matchesRef && !matchesAlt )
continue;
boolean isFW = el.getKey().getRead().getReadNegativeStrandFlag();
boolean isFW = el.getKey().getReadNegativeStrandFlag();
int row = matchesRef ? 0 : 1;
int column = isFW ? 0 : 1;

View File

@ -359,13 +359,13 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
if (perReadAlleleLikelihoodMap.isEmpty())
return null;
for (Map.Entry<PileupElement,Map<Allele,Double>> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
for (Map<Allele,Double> el : perReadAlleleLikelihoodMap.getLikelihoodMapValues()) {
// retrieve likelihood information corresponding to this read
// Score all the reads in the pileup, even the filtered ones
final double[] scores = new double[el.getValue().size()];
final double[] scores = new double[el.size()];
int i = 0;
for (Map.Entry<Allele, Double> a : el.getValue().entrySet()) {
for (Map.Entry<Allele, Double> a : el.entrySet()) {
scores[i++] = -a.getValue();
if (DEBUG) {
System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]);

View File

@ -35,7 +35,7 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn
// no per-read likelihoods available:
for ( final PileupElement p : pileup ) {
if ( isUsableBase(p) ) {
if ( allAlleles.get(0).equals(Allele.create(p.getBase())) ) {
if ( allAlleles.get(0).equals(Allele.create(p.getBase(), true)) ) {
refQuals.add((double)p.getMappingQual());
} else if ( allAlleles.contains(Allele.create(p.getBase()))) {
altQuals.add((double)p.getMappingQual());
@ -44,17 +44,14 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn
}
return;
}
for (Map.Entry<PileupElement,Map<Allele,Double>> el : likelihoodMap.getLikelihoodReadMap().entrySet()) {
if (!isUsableBase(el.getKey()))
continue;
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : likelihoodMap.getLikelihoodReadMap().entrySet()) {
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
if (a.isNoCall())
continue; // read is non-informative
if (a.isReference())
refQuals.add((double)el.getKey().getMappingQual());
refQuals.add((double)el.getKey().getMappingQuality());
else if (allAlleles.contains(a))
altQuals.add((double)el.getKey().getMappingQual());
altQuals.add((double)el.getKey().getMappingQuality());
}

View File

@ -42,11 +42,6 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
if ( genotypes == null || genotypes.size() == 0 )
return null;
if (stratifiedContexts != null && stratifiedContexts.size() == 0)
return null;
if (perReadAlleleLikelihoodMap != null && perReadAlleleLikelihoodMap.size() == 0)
return null;
int depth = 0;
for ( final Genotype genotype : genotypes ) {
@ -67,7 +62,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
if (perReadAlleleLikelihoods == null || perReadAlleleLikelihoods.isEmpty())
continue;
depth += perReadAlleleLikelihoods.getLikelihoodReadMap().size();
depth += perReadAlleleLikelihoods.getNumberOfStoredElements();
}
}

View File

@ -47,7 +47,7 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
AlignmentContext context = sample.getValue();
for (PileupElement p : context.getBasePileup() )
index = fillMappingQualitiesFromPileupAndUpdateIndex(p, index, qualities);
index = fillMappingQualitiesFromPileupAndUpdateIndex(p.getRead(), index, qualities);
}
}
else if (perReadAlleleLikelihoodMap != null) {
@ -59,8 +59,8 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
qualities = new int[totalSize];
for ( PerReadAlleleLikelihoodMap perReadLikelihoods : perReadAlleleLikelihoodMap.values() ) {
for (PileupElement p : perReadLikelihoods.getStoredPileupElements())
index = fillMappingQualitiesFromPileupAndUpdateIndex(p, index, qualities);
for (GATKSAMRecord read : perReadLikelihoods.getStoredElements())
index = fillMappingQualitiesFromPileupAndUpdateIndex(read, index, qualities);
}
@ -76,10 +76,10 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
return map;
}
private static int fillMappingQualitiesFromPileupAndUpdateIndex(final PileupElement p, final int inputIdx, final int[] qualities) {
private static int fillMappingQualitiesFromPileupAndUpdateIndex(final GATKSAMRecord read, final int inputIdx, final int[] qualities) {
int outputIdx = inputIdx;
if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
qualities[outputIdx++] = p.getMappingQual();
if ( read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
qualities[outputIdx++] = read.getMappingQuality();
return outputIdx;
}

View File

@ -49,19 +49,22 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
PerReadAlleleLikelihoodMap indelLikelihoodMap = null;
ReadBackedPileup pileup = null;
if (stratifiedPerReadAlleleLikelihoodMap != null && !stratifiedPerReadAlleleLikelihoodMap.isEmpty()) {
indelLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
if (indelLikelihoodMap == null)
continue;
if (indelLikelihoodMap.isEmpty())
continue;
}
else if (stratifiedContexts != null) {
if (stratifiedContexts != null) {
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
if ( context == null )
continue;
pileup = context.getBasePileup();
if ( context != null )
pileup = context.getBasePileup();
}
if (stratifiedPerReadAlleleLikelihoodMap != null )
indelLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
if (indelLikelihoodMap != null && indelLikelihoodMap.isEmpty())
indelLikelihoodMap = null;
// treat an empty likelihood map as a null reference - will simplify contract with fillQualsFromPileup
if (indelLikelihoodMap == null && pileup == null)
continue;
fillQualsFromPileup(vc.getAlleles(), vc.getStart(), pileup, indelLikelihoodMap, refQuals, altQuals );
}
final MannWhitneyU mannWhitneyU = new MannWhitneyU();
@ -92,7 +95,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
return map;
}
protected abstract void fillQualsFromPileup(final List<Allele> alleles,
protected abstract void fillQualsFromPileup(final List<Allele> alleles,
final int refLoc,
final ReadBackedPileup readBackedPileup,
final PerReadAlleleLikelihoodMap alleleLikelihoodMap,

View File

@ -47,7 +47,7 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
readPos = getFinalReadPosition(p.getRead(),readPos);
if ( allAlleles.get(0).equals(Allele.create(p.getBase())) ) {
if ( allAlleles.get(0).equals(Allele.create(p.getBase(), true)) ) {
refQuals.add((double)readPos);
} else if ( allAlleles.contains(Allele.create(p.getBase()))) {
altQuals.add((double)readPos);
@ -57,9 +57,18 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
return;
}
for (Map.Entry<PileupElement,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
int readPos = getOffsetFromClippedReadStart(el.getKey().getRead(), el.getKey().getOffset());
readPos = getFinalReadPosition(el.getKey().getRead(),readPos);
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
final GATKSAMRecord read = el.getKey();
final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true );
if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED )
continue;
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, false, 0, 0 );
final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read );
if (readPos > numAlignedBases / 2)
readPos = numAlignedBases - (readPos + 1);
// int readPos = getOffsetFromClippedReadStart(el.getKey(), el.getKey().getOffset());
// readPos = getFinalReadPosition(el.getKey().getRead(),readPos);
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
if (a.isNoCall())

View File

@ -37,21 +37,21 @@ public class PerReadAlleleLikelihoodMap {
public static final double INDEL_LIKELIHOOD_THRESH = 0.1;
private List<Allele> alleles;
private Map<PileupElement,Map<Allele,Double>> likelihoodReadMap;
private Map<GATKSAMRecord,Map<Allele,Double>> likelihoodReadMap;
public PerReadAlleleLikelihoodMap() {
likelihoodReadMap = new LinkedHashMap<PileupElement,Map<Allele,Double>>();
likelihoodReadMap = new LinkedHashMap<GATKSAMRecord,Map<Allele,Double>>();
alleles = new ArrayList<Allele>();
}
public void add(PileupElement p, Allele a, Double likelihood) {
public void add(GATKSAMRecord read, Allele a, Double likelihood) {
Map<Allele,Double> likelihoodMap;
if (likelihoodReadMap.containsKey(p)){
if (likelihoodReadMap.containsKey(read)){
// seen pileup element before
likelihoodMap = likelihoodReadMap.get(p);
likelihoodMap = likelihoodReadMap.get(read);
}
else {
likelihoodMap = new HashMap<Allele, Double>();
likelihoodReadMap.put(p,likelihoodMap);
likelihoodReadMap.put(read,likelihoodMap);
}
likelihoodMap.put(a,likelihood);
@ -64,20 +64,19 @@ public class PerReadAlleleLikelihoodMap {
return likelihoodReadMap.size();
}
public void add(GATKSAMRecord read, Allele a, Double likelihood) {
PileupElement p = new PileupElement(read,-1,false,false,false,false,false,false);
add(p,a,likelihood);
public void add(PileupElement p, Allele a, Double likelihood) {
add(p.getRead(),a,likelihood);
}
public boolean containsPileupElement(PileupElement p) {
return likelihoodReadMap.containsKey(p);
return likelihoodReadMap.containsKey(p.getRead());
}
public boolean isEmpty() {
return likelihoodReadMap.isEmpty();
}
public Map<PileupElement,Map<Allele,Double>> getLikelihoodReadMap() {
public Map<GATKSAMRecord,Map<Allele,Double>> getLikelihoodReadMap() {
return likelihoodReadMap;
}
public void clear() {
@ -85,9 +84,17 @@ public class PerReadAlleleLikelihoodMap {
likelihoodReadMap.clear();
}
public Set<PileupElement> getStoredPileupElements() {
public Set<GATKSAMRecord> getStoredElements() {
return likelihoodReadMap.keySet();
}
public Collection<Map<Allele,Double>> getLikelihoodMapValues() {
return likelihoodReadMap.values();
}
public int getNumberOfStoredElements() {
return likelihoodReadMap.size();
}
/**
* Returns list of reads greedily associated with a particular allele.
* Needs to loop for each read, and assign to each allele
@ -100,10 +107,10 @@ public class PerReadAlleleLikelihoodMap {
}
public Map<Allele,Double> getLikelihoodsAssociatedWithPileupElement(PileupElement p) {
if (!likelihoodReadMap.containsKey(p))
if (!likelihoodReadMap.containsKey(p.getRead()))
return null;
return likelihoodReadMap.get(p);
return likelihoodReadMap.get(p.getRead());
}
public static Allele getMostLikelyAllele(Map<Allele,Double> alleleMap) {