Refactoring so that ConsensusAlleleCounter can use regular pileups and can operate correctly. This involved adding utility functions to ReadBackedPileup to count # of insertions/deletions right after current position. Added unit test for IndelGenotypeLikelihoods, esp. ConsensusAlleleCounter logic

This commit is contained in:
Guillermo del Angel 2012-03-28 21:01:31 -04:00
parent 62ee31afba
commit e0ab4e4b30
3 changed files with 75 additions and 15 deletions

View File

@ -109,15 +109,10 @@ public class ConsensusAlleleCounter {
delCount += indelPileup.getNumberOfDeletions(); delCount += indelPileup.getNumberOfDeletions();
} }
else { else {
// todo - this should be version to be used when extended events are removed final ReadBackedPileup indelPileup = context.getBasePileup();
// todo - maybe we should create utility functions in ReadBackedPileup definition to do the equivalent thing? insCount += indelPileup.getNumberOfInsertionsAfterThisElement();
for (PileupElement p: context.getBasePileup()) { delCount += indelPileup.getNumberOfDeletionsAfterThisElement();
if (p.isBeforeDeletion()) }
delCount++;
else if (p.isBeforeInsertion())
insCount++;
}
}
} }
if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping) if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
@ -127,10 +122,20 @@ public class ConsensusAlleleCounter {
// todo -- warning, can be duplicating expensive partition here // todo -- warning, can be duplicating expensive partition here
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup(); final ReadBackedPileup indelPileup;
final int nIndelReads = indelPileup.getNumberOfInsertions() + indelPileup.getNumberOfDeletions(); final int nIndelReads, nReadsOverall;
final int nReadsOverall = indelPileup.getNumberOfElements();
if (context.hasExtendedEventPileup()) {
indelPileup = context.getExtendedEventPileup();
nIndelReads = ((ReadBackedExtendedEventPileup)indelPileup).getNumberOfInsertions() + indelPileup.getNumberOfDeletions();
nReadsOverall = indelPileup.getNumberOfElements();
}
else {
indelPileup = context.getBasePileup();
nIndelReads = indelPileup.getNumberOfInsertionsAfterThisElement() + indelPileup.getNumberOfDeletionsAfterThisElement();
nReadsOverall = indelPileup.getNumberOfElements();
}
if ( nIndelReads == 0 || (nIndelReads / (1.0 * nReadsOverall)) < minFractionInOneSample) { if ( nIndelReads == 0 || (nIndelReads / (1.0 * nReadsOverall)) < minFractionInOneSample) {
// if ( nIndelReads > 0 ) // if ( nIndelReads > 0 )
// logger.info("Skipping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall); // logger.info("Skipping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
@ -139,7 +144,8 @@ public class ConsensusAlleleCounter {
// logger.info("### Keeping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall); // logger.info("### Keeping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
} }
for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
for (PileupElement p : indelPileup) {
final GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead()); final GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
if (read == null) if (read == null)
continue; continue;
@ -154,7 +160,8 @@ public class ConsensusAlleleCounter {
} }
*/ */
String indelString = p.getEventBases(); String indelString = p.getEventBases();
if (p.isInsertion()) {
if (isInsertion(p)) {
boolean foundKey = false; boolean foundKey = false;
// copy of hashmap into temp arrayList // copy of hashmap into temp arrayList
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>(); ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
@ -222,7 +229,7 @@ public class ConsensusAlleleCounter {
} }
} }
else if (p.isDeletion()) { else if (isDeletion(p)) {
indelString = String.format("D%d",p.getEventLength()); indelString = String.format("D%d",p.getEventLength());
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0; int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
consensusIndelStrings.put(indelString,cnt+1); consensusIndelStrings.put(indelString,cnt+1);
@ -234,6 +241,25 @@ public class ConsensusAlleleCounter {
return consensusIndelStrings; return consensusIndelStrings;
} }
// todo - helper routines to check for extended pileup elements, to remove when extended events are removed
private static final boolean isInsertion(final PileupElement p) {
if (p instanceof ExtendedEventPileupElement)
return ((ExtendedEventPileupElement) p).isInsertion();
else
return p.isBeforeInsertion();
}
private static boolean isDeletion(final PileupElement p) {
if (p instanceof ExtendedEventPileupElement)
return p.isDeletion();
else
return p.isBeforeDeletion();
}
private List<Allele> consensusCountsToAlleles(final ReferenceContext ref, private List<Allele> consensusCountsToAlleles(final ReferenceContext ref,
final Map<String, Integer> consensusIndelStrings) { final Map<String, Integer> consensusIndelStrings) {
final GenomeLoc loc = ref.getLocus(); final GenomeLoc loc = ref.getLocus();

View File

@ -873,6 +873,26 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
return reads; return reads;
} }
@Override
public int getNumberOfDeletionsAfterThisElement() {
int count = 0;
for (PileupElement p: this) {
if (p.isBeforeDeletion())
count++;
}
return count;
}
@Override
public int getNumberOfInsertionsAfterThisElement() {
int count = 0;
for (PileupElement p: this) {
if (p.isBeforeInsertion())
count++;
}
return count;
}
/** /**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
* *

View File

@ -174,6 +174,20 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
*/ */
public int getNumberOfDeletions(); public int getNumberOfDeletions();
/**
* Simple useful routine to count the number of deletion bases in at the next position this pileup
*
* @return
*/
public int getNumberOfDeletionsAfterThisElement();
/**
* Simple useful routine to count the number of insertions right after this pileup
*
* @return
*/
public int getNumberOfInsertionsAfterThisElement();
public int getNumberOfMappingQualityZeroReads(); public int getNumberOfMappingQualityZeroReads();
/** /**