update mapping quality score and edit distance attribute for reads when they are cleaned
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@763 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
57918de753
commit
34f9820299
|
|
@ -106,6 +106,32 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int numMismatches(SAMRecord r, String refSeq, int refIdx) {
|
||||||
|
int readIdx = 0;
|
||||||
|
int mismatches = 0;
|
||||||
|
String readSeq = r.getReadString();
|
||||||
|
Cigar c = r.getCigar();
|
||||||
|
for (int i = 0 ; i < c.numCigarElements() ; i++) {
|
||||||
|
CigarElement ce = c.getCigarElement(i);
|
||||||
|
switch ( ce.getOperator() ) {
|
||||||
|
case M:
|
||||||
|
for (int j = 0 ; j < ce.getLength() ; j++, refIdx++, readIdx++ ) {
|
||||||
|
if ( Character.toUpperCase(readSeq.charAt(readIdx)) != Character.toUpperCase(refSeq.charAt(refIdx)) )
|
||||||
|
mismatches++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case I:
|
||||||
|
readIdx += ce.getLength();
|
||||||
|
break;
|
||||||
|
case D:
|
||||||
|
refIdx += ce.getLength();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return mismatches;
|
||||||
|
}
|
||||||
|
|
||||||
private static int mismatchQualitySum(AlignedRead aRead, String ref, int refIndex) {
|
private static int mismatchQualitySum(AlignedRead aRead, String ref, int refIndex) {
|
||||||
String read = aRead.getReadString();
|
String read = aRead.getReadString();
|
||||||
String quals = aRead.getBaseQualityString();
|
String quals = aRead.getBaseQualityString();
|
||||||
|
|
@ -224,7 +250,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if the best alternate consensus has a smaller sum of quality score mismatches, then clean!
|
// if the best alternate consensus has a smaller sum of quality score mismatches (more than the LOD threshold), then clean!
|
||||||
if ( bestConsensus != null && ((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0 >= LOD_THRESHOLD ) {
|
if ( bestConsensus != null && ((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0 >= LOD_THRESHOLD ) {
|
||||||
logger.info("CLEAN: " + bestConsensus.str );
|
logger.info("CLEAN: " + bestConsensus.str );
|
||||||
if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) {
|
if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) {
|
||||||
|
|
@ -239,9 +265,20 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
} catch (Exception e) {}
|
} catch (Exception e) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We need to update the mapping quality score of the cleaned reads;
|
||||||
|
// however we don't have enough info to use the proper MAQ scoring system.
|
||||||
|
// For now, we'll use a heuristic:
|
||||||
|
// the mapping quality score is improved by the LOD difference in mismatching
|
||||||
|
// bases between the reference and alternate consensus
|
||||||
|
int improvement = (totalMismatchSum - bestConsensus.mismatchSum) / 10;
|
||||||
|
|
||||||
// clean the appropriate reads
|
// clean the appropriate reads
|
||||||
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes )
|
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes ) {
|
||||||
updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.getSecond(), altReads.get(indexPair.getFirst()), (int)leftmostIndex);
|
AlignedRead aRead = altReads.get(indexPair.getFirst());
|
||||||
|
updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.getSecond(), aRead, (int)leftmostIndex);
|
||||||
|
aRead.getRead().setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + improvement, 255));
|
||||||
|
aRead.getRead().setAttribute("NM", numMismatches(aRead.getRead(), reference, aRead.getRead().getAlignmentStart()-(int)leftmostIndex));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// write them out
|
// write them out
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue