update mapping quality score and edit distance attribute for reads when they are cleaned

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@763 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-05-20 17:51:31 +00:00
parent 57918de753
commit 34f9820299
1 changed files with 40 additions and 3 deletions

View File

@ -106,6 +106,32 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
}
}
private static int numMismatches(SAMRecord r, String refSeq, int refIdx) {
int readIdx = 0;
int mismatches = 0;
String readSeq = r.getReadString();
Cigar c = r.getCigar();
for (int i = 0 ; i < c.numCigarElements() ; i++) {
CigarElement ce = c.getCigarElement(i);
switch ( ce.getOperator() ) {
case M:
for (int j = 0 ; j < ce.getLength() ; j++, refIdx++, readIdx++ ) {
if ( Character.toUpperCase(readSeq.charAt(readIdx)) != Character.toUpperCase(refSeq.charAt(refIdx)) )
mismatches++;
}
break;
case I:
readIdx += ce.getLength();
break;
case D:
refIdx += ce.getLength();
break;
}
}
return mismatches;
}
private static int mismatchQualitySum(AlignedRead aRead, String ref, int refIndex) {
String read = aRead.getReadString();
String quals = aRead.getBaseQualityString();
@ -224,7 +250,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
}
}
// if the best alternate consensus has a smaller sum of quality score mismatches, then clean!
// if the best alternate consensus has a smaller sum of quality score mismatches (more than the LOD threshold), then clean!
if ( bestConsensus != null && ((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0 >= LOD_THRESHOLD ) {
logger.info("CLEAN: " + bestConsensus.str );
if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) {
@ -239,9 +265,20 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
} catch (Exception e) {}
}
// We need to update the mapping quality score of the cleaned reads;
// however we don't have enough info to use the proper MAQ scoring system.
// For now, we'll use a heuristic:
// the mapping quality score is improved by the LOD difference in mismatching
// bases between the reference and alternate consensus
int improvement = (totalMismatchSum - bestConsensus.mismatchSum) / 10;
// clean the appropriate reads
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes )
updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.getSecond(), altReads.get(indexPair.getFirst()), (int)leftmostIndex);
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes ) {
AlignedRead aRead = altReads.get(indexPair.getFirst());
updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.getSecond(), aRead, (int)leftmostIndex);
aRead.getRead().setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + improvement, 255));
aRead.getRead().setAttribute("NM", numMismatches(aRead.getRead(), reference, aRead.getRead().getAlignmentStart()-(int)leftmostIndex));
}
}
// write them out