Some alt consenses may have cigar string starting with an insertion. Not a bug, strictly speaking, since the cleaner had been detecting this and crashing deliberately. Now it knows how to deal with this special case though. Also, uppercase the ref before using it in SW aligner!

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2722 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2010-01-28 18:53:02 +00:00
parent 956b570c8e
commit db429e1096
1 changed files with 74 additions and 18 deletions

View File

@ -214,6 +214,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
private void clean(List<SAMRecord> reads, String reference, GenomeLoc interval) {
reference = reference.toUpperCase();
long leftmostIndex = interval.getStart();
ArrayList<SAMRecord> refReads = new ArrayList<SAMRecord>(); // reads that perfectly match ref
ArrayList<AlignedRead> altReads = new ArrayList<AlignedRead>(); // reads that don't perfectly match
@ -222,18 +224,26 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
Set<Consensus> altConsenses = new LinkedHashSet<Consensus>(); // list of alt consenses
int totalMismatchSum = 0;
//boolean DEBUG = false;
//boolean FULL_DEBUG = false;
// for ( SAMRecord read : reads ) { if ( read.getReadName().equals("HWUSI-EAS667_1:1:17:1784:105#0")) { FULL_DEBUG = true; break; } }
// decide which reads potentially need to be cleaned
for ( SAMRecord read : reads ) {
// if ( debugOn ) {
// System.out.println(read.getReadName()+" "+read.getCigarString()+" "+read.getAlignmentStart()+"-"+read.getAlignmentEnd());
// System.out.println(reference.substring((int)(read.getAlignmentStart()-leftmostIndex),(int)(read.getAlignmentEnd()-leftmostIndex)));
// System.out.println(read.getReadString());
// }
// if ( read.getReadName().equals("HWUSI-EAS667_1:1:17:1784:105#0")) DEBUG = true;
// else DEBUG=false;
// if ( DEBUG ) {
// System.out.println("Staring with alignment:");
// System.out.println(read.getReadName()+" "+read.getCigarString()+" "+read.getAlignmentStart()+"-"+read.getAlignmentEnd());
// System.out.println(reference.substring((int)(read.getAlignmentStart()-leftmostIndex),(int)(read.getAlignmentEnd()-leftmostIndex)));
// System.out.println(read.getReadString());
// }
// we currently can not deal with clipped reads correctly (or screwy record)
if ( read.getCigar().numCigarElements() == 0 || readIsClipped(read) ) {
// if ( DEBUG ) System.out.println("MATCHES REF; skipped.");
refReads.add(read);
continue;
}
@ -243,7 +253,11 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
// first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence
int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read);
if ( numBlocks == 2 ) {
// if ( FULL_DEBUG ) System.out.println("STARTING WITH "+read.getCigar()+" for "+ aRead.getRead().getReadName()+" at "+aRead.getRead().getAlignmentStart());
Cigar newCigar = indelRealignment(read.getCigar(), reference, read.getReadString(), read.getAlignmentStart()-(int)leftmostIndex, 0);
// if ( FULL_DEBUG ) System.out.println("INDEL REALIGNED TO "+newCigar.toString()+" for "+ aRead.getRead().getReadName());
if ( aRead.setCigar(newCigar) ) {
leftMovedIndels.add(aRead);
}
@ -261,17 +275,21 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
// if it has an indel, let's see if that's the best consensus
if ( numBlocks == 2 ) {
Consensus c = createAlternateConsensus(aRead.getAlignmentStart() - (int)leftmostIndex, aRead.getCigar(), reference, aRead.getRead().getReadBases());
// if ( FULL_DEBUG ) System.out.println("ALT CONSENSUS CREATED FROM INDEL. "+(c==null?"<NULL>":c.cigar.toString())+" for "+ aRead.getRead().getReadName()+
// " at "+aRead.getRead().getAlignmentStart());
if ( c==null) {} //System.out.println("ERROR: Failed to create alt consensus for read "+aRead.getRead().getReadName());
else altConsenses.add(c);
}
else {
// if ( debugOn ) System.out.println("Going to test...");
altAlignmentsToTest.add(aRead);
// if ( DEBUG ) System.out.println("SCORE > 0; saved for testing.");
}
}
// otherwise, we can emit it as is
else {
// if ( debugOn ) System.out.println("Emitting as is...");
// if ( DEBUG ) System.out.println("SCORE 0; added to REF.");
refReads.add(read);
}
}
@ -285,6 +303,10 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
if ( c != null) {
// if ( debugOn ) System.out.println("NEW consensus generated by SW: "+c.str ) ;
altConsenses.add(c);
// if ( FULL_DEBUG == true && c.cigar.getCigarElement(0).getOperator() == CigarOperator.I ) {
// System.out.println("ALT CONSENSUS FROM SW: "+c.cigar.toString()+" from read "+aRead.getRead().getReadName());
// System.out.println(c.str + " at "+c.positionOnReference);
// }
} else {
// if ( debugOn ) System.out.println("FAILED to create Alt consensus from SW");
}
@ -298,8 +320,13 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
// do a pairwise alignment against the reference
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(StringUtil.stringToBytes(reference), aRead.getRead().getReadBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getRead().getReadBases());
if ( c != null)
if ( c != null) {
altConsenses.add(c);
// if ( FULL_DEBUG == true ) {
// System.out.println("ALT CONSENSUS FROM SW: "+c.cigar.toString()+" from read "+aRead.getRead().getReadName());
// System.out.println(c.str + " at "+c.positionOnReference);
// }
}
}
}
@ -311,6 +338,10 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
while ( iter.hasNext() ) {
Consensus consensus = iter.next();
// if ( FULL_DEBUG ) {
// System.out.println("CHECKING CONSENSUS: "+consensus.cigar.toString());
// System.out.println(consensus.str + " at "+consensus.positionOnReference);
// }
// if ( debugOn ) System.out.println("Consensus: "+consensus.str);
for ( int j = 0; j < altReads.size(); j++ ) {
@ -343,8 +374,18 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
double improvement = (bestConsensus == null ? -1 : ((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0);
if ( improvement >= LOD_THRESHOLD ) {
// if ( FULL_DEBUG ) {
// System.out.println("BEST CONSENSUS: "+bestConsensus.cigar.toString());
// System.out.println(bestConsensus.str + " at "+bestConsensus.positionOnReference);
// }
bestConsensus.cigar = indelRealignment(bestConsensus.cigar, reference, bestConsensus.str, bestConsensus.positionOnReference, bestConsensus.positionOnReference);
// if ( FULL_DEBUG ) {
// System.out.println("BEST CONSENSUS REALIGNED TO: "+bestConsensus.cigar.toString());
// System.out.println(bestConsensus.str + " at "+bestConsensus.positionOnReference);
// }
// start cleaning the appropriate reads
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes ) {
AlignedRead aRead = altReads.get(indexPair.first);
@ -519,8 +560,25 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
CigarElement altCE1 = altCigar.getCigarElement(0);
CigarElement altCE2 = altCigar.getCigarElement(1);
int leadingMatchingBlockLength = 0; // length of the leading M element or 0 if the leading element is I
CigarElement indelCE = null;
if ( altCE1.getOperator() == CigarOperator.I ) {
indelCE=altCE1;
if ( altCE2.getOperator() != CigarOperator.M )
throw new StingException("When first element of the alt consensus is I, the second one must be M. Actual: "+altCigar.toString());
}
else {
if ( altCE1.getOperator() != CigarOperator.M )
throw new StingException("First element of the alt consensus cigar must be M or I. Actual: "+altCigar.toString());
if ( altCE2.getOperator() == CigarOperator.I || altCE2.getOperator() == CigarOperator.D ) indelCE=altCE2;
else
throw new StingException("When first element of the alt consensus is M, the second one must be I or D. Actual: "+altCigar.toString());
leadingMatchingBlockLength = altCE1.getLength();
}
// the easiest thing to do is to take each case separately
int endOfFirstBlock = altPosOnRef + altCE1.getLength();
int endOfFirstBlock = altPosOnRef + leadingMatchingBlockLength;
boolean sawAlignmentStart = false;
// for reads starting before the indel
@ -539,30 +597,28 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
int indelOffsetOnRef = 0, indelOffsetOnRead = 0;
// forward along the indel
if ( altCE2.getOperator() == CigarOperator.I ) {
if ( indelCE.getOperator() == CigarOperator.I ) {
// for reads that end in an insertion
if ( myPosOnAlt + aRead.getReadLength() < endOfFirstBlock + altCE2.getLength() ) {
if ( myPosOnAlt + aRead.getReadLength() < endOfFirstBlock + indelCE.getLength() ) {
readCigar.add(new CigarElement(myPosOnAlt + aRead.getReadLength() - endOfFirstBlock, CigarOperator.I));
aRead.setCigar(readCigar);
return;
}
// for reads that start in an insertion
if ( !sawAlignmentStart && myPosOnAlt < endOfFirstBlock + altCE2.getLength() ) {
if ( !sawAlignmentStart && myPosOnAlt < endOfFirstBlock + indelCE.getLength() ) {
aRead.setAlignmentStart(leftmostIndex + endOfFirstBlock);
readCigar.add(new CigarElement(altCE2.getLength() - (myPosOnAlt - endOfFirstBlock), CigarOperator.I));
readCigar.add(new CigarElement(indelCE.getLength() - (myPosOnAlt - endOfFirstBlock), CigarOperator.I));
indelOffsetOnRead = myPosOnAlt - endOfFirstBlock;
sawAlignmentStart = true;
} else if ( sawAlignmentStart ) {
readCigar.add(altCE2);
indelOffsetOnRead = altCE2.getLength();
readCigar.add(indelCE);
indelOffsetOnRead = indelCE.getLength();
}
} else if ( altCE2.getOperator() == CigarOperator.D ) {
} else if ( indelCE.getOperator() == CigarOperator.D ) {
if ( sawAlignmentStart )
readCigar.add(altCE2);
indelOffsetOnRef = altCE2.getLength();
} else {
throw new RuntimeException("Operator of middle block is not I or D: " + altCE2.getOperator());
readCigar.add(indelCE);
indelOffsetOnRef = indelCE.getLength();
}
// for reads that start after the indel