use knowledge from other reads to find a consensus

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@932 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-06-07 21:22:17 +00:00
parent 596773e6c6
commit 3a8219a469
1 changed files with 39 additions and 10 deletions

View File

@ -184,7 +184,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
// decide which reads potentially need to be cleaned // decide which reads potentially need to be cleaned
for ( SAMRecord read : reads ) { for ( SAMRecord read : reads ) {
// first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence // first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence
if ( AlignmentUtils.getNumAlignmentBlocks(read) == 2 ) int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read);
if ( numBlocks == 2 )
read.setCigar(indelRealignment(read.getCigar(), reference, read.getReadString(), read.getAlignmentStart()-(int)leftmostIndex, 0)); read.setCigar(indelRealignment(read.getCigar(), reference, read.getReadString(), read.getAlignmentStart()-(int)leftmostIndex, 0));
AlignedRead aRead = new AlignedRead(read); AlignedRead aRead = new AlignedRead(read);
@ -197,6 +198,12 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
totalMismatchSum += mismatchScore; totalMismatchSum += mismatchScore;
aRead.setMismatchScoreToReference(mismatchScore); aRead.setMismatchScoreToReference(mismatchScore);
} }
// otherwise, if it has an indel, let's see if that's the best consensus
else if ( numBlocks == 2 ) {
aRead.doNotRealign();
altReads.add(aRead);
altAlignmentsToTest.add(true);
}
// otherwise, we can emit it as is // otherwise, we can emit it as is
else { else {
refReads.add(read); refReads.add(read);
@ -211,19 +218,27 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
// do a pairwise alignment against the reference // do a pairwise alignment against the reference
AlignedRead aRead = altReads.get(index); AlignedRead aRead = altReads.get(index);
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getReadString()); int indexOnRef;
int refIdx = swConsensus.getAlignmentStart2wrt1(); Cigar c;
if ( refIdx < 0 ) if ( aRead.isRealignable() ) {
continue; SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getReadString());
indexOnRef = swConsensus.getAlignmentStart2wrt1();
c = swConsensus.getCigar();
} else {
indexOnRef = aRead.getAlignmentStart() - (int)leftmostIndex;
c = aRead.getCigar();
}
if ( indexOnRef < 0 )
continue;
// create the new consensus // create the new consensus
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
sb.append(reference.substring(0, refIdx)); sb.append(reference.substring(0, indexOnRef));
Cigar c = swConsensus.getCigar();
logger.debug("CIGAR = " + cigarToString(c)); logger.debug("CIGAR = " + cigarToString(c));
int indelCount = 0; int indelCount = 0;
int altIdx = 0; int altIdx = 0;
int refIdx = indexOnRef;
boolean ok_flag = true; boolean ok_flag = true;
for ( int i = 0 ; i < c.numCigarElements() ; i++ ) { for ( int i = 0 ; i < c.numCigarElements() ; i++ ) {
CigarElement ce = c.getCigarElement(i); CigarElement ce = c.getCigarElement(i);
@ -255,9 +270,11 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
String altConsensus = sb.toString(); String altConsensus = sb.toString();
// for each imperfect match to the reference, score it against this alternative // for each imperfect match to the reference, score it against this alternative
Consensus consensus = new Consensus(altConsensus, c, swConsensus.getAlignmentStart2wrt1()); Consensus consensus = new Consensus(altConsensus, c, indexOnRef);
for ( int j = 0; j < altReads.size(); j++ ) { for ( int j = 0; j < altReads.size(); j++ ) {
AlignedRead toTest = altReads.get(j); AlignedRead toTest = altReads.get(j);
if ( !toTest.isRealignable() )
continue;
Pair<Integer, Integer> altAlignment = findBestOffset(altConsensus, toTest); Pair<Integer, Integer> altAlignment = findBestOffset(altConsensus, toTest);
// the mismatch score is the min of its alignment vs. the reference and vs. the alternate // the mismatch score is the min of its alignment vs. the reference and vs. the alternate
@ -289,7 +306,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
bestConsensus.cigar = indelRealignment(bestConsensus.cigar, reference, bestConsensus.str, bestConsensus.positionOnReference, bestConsensus.positionOnReference); bestConsensus.cigar = indelRealignment(bestConsensus.cigar, reference, bestConsensus.str, bestConsensus.positionOnReference, bestConsensus.positionOnReference);
// clean the appropriate reads // start cleaning the appropriate reads
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes ) { for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes ) {
AlignedRead aRead = altReads.get(indexPair.first); AlignedRead aRead = altReads.get(indexPair.first);
updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.second, aRead, (int)leftmostIndex); updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.second, aRead, (int)leftmostIndex);
@ -344,7 +361,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
// the mapping quality score is improved by the LOD difference in mismatching // the mapping quality score is improved by the LOD difference in mismatching
// bases between the reference and alternate consensus // bases between the reference and alternate consensus
// clean the appropriate reads // finish cleaning the appropriate reads
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes ) { for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes ) {
AlignedRead aRead = altReads.get(indexPair.first); AlignedRead aRead = altReads.get(indexPair.first);
aRead.finalizeUpdate(); aRead.finalizeUpdate();
@ -642,9 +659,13 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
private int newStart = -1; private int newStart = -1;
private int mismatchScoreToReference; private int mismatchScoreToReference;
// used for perfectly matching reads with indels which we want to try as the consensus
private boolean doNotRealign;
public AlignedRead(SAMRecord read) { public AlignedRead(SAMRecord read) {
this.read = read; this.read = read;
mismatchScoreToReference = 0; mismatchScoreToReference = 0;
doNotRealign = false;
} }
public SAMRecord getRead() { public SAMRecord getRead() {
@ -663,6 +684,14 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
return (newCigar != null ? newCigar : read.getCigar()); return (newCigar != null ? newCigar : read.getCigar());
} }
public void doNotRealign() {
doNotRealign = true;
}
public boolean isRealignable() {
return !doNotRealign;
}
// tentatively sets the new Cigar, but it needs to be confirmed later // tentatively sets the new Cigar, but it needs to be confirmed later
public void setCigar(Cigar cigar) { public void setCigar(Cigar cigar) {
newCigar = cigar; newCigar = cigar;