Massive change to the indel realigner code. We now properly deal with soft-clipped reads. Also, improved left-alignment code.
Small change for Ryan to get hard-clipped reads working for the recalibrator. PLEASE DO NOT RELEASE THIS WEEK. I still have some more testing to do and need Mark to run WG jobs. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3430 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f3e2aae570
commit
772f558ae0
|
|
@ -315,9 +315,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
read.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START ) {
|
read.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START ) {
|
||||||
readsNotToClean.add(read);
|
readsNotToClean.add(read);
|
||||||
} else {
|
} else {
|
||||||
readsToClean.add(read, ref.getBasesAsChars());
|
readsToClean.add(read, ref.getBases());
|
||||||
// add the rods to the list of known variants
|
// add the rods to the list of known variants
|
||||||
populateKnownIndels(metaDataTracker, null); // todo -- fixme!
|
populateKnownIndels(metaDataTracker, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( readsToClean.size() + readsNotToClean.size() >= MAX_READS ) {
|
if ( readsToClean.size() + readsNotToClean.size() >= MAX_READS ) {
|
||||||
|
|
@ -420,8 +420,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int mismatchQualitySumIgnoreCigar(final AlignedRead aRead, final byte[] refSeq, int refIndex, int quitAboveThisValue) {
|
private static int mismatchQualitySumIgnoreCigar(final AlignedRead aRead, final byte[] refSeq, int refIndex, int quitAboveThisValue) {
|
||||||
final byte[] readSeq = aRead.getRead().getReadBases();
|
final byte[] readSeq = aRead.getReadBases();
|
||||||
final byte[] quals = aRead.getRead().getBaseQualities();
|
final byte[] quals = aRead.getBaseQualities();
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
for (int readIndex = 0 ; readIndex < readSeq.length ; refIndex++, readIndex++ ) {
|
for (int readIndex = 0 ; readIndex < readSeq.length ; refIndex++, readIndex++ ) {
|
||||||
if ( refIndex >= refSeq.length ) {
|
if ( refIndex >= refSeq.length ) {
|
||||||
|
|
@ -432,7 +432,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
} else {
|
} else {
|
||||||
byte refChr = refSeq[refIndex];
|
byte refChr = refSeq[refIndex];
|
||||||
byte readChr = readSeq[readIndex];
|
byte readChr = readSeq[readIndex];
|
||||||
if ( !BaseUtils.isRegularBase((char)readChr) || !BaseUtils.isRegularBase((char)refChr) )
|
if ( !BaseUtils.isRegularBase(readChr) || !BaseUtils.isRegularBase(refChr) )
|
||||||
continue; // do not count Ns/Xs/etc ?
|
continue; // do not count Ns/Xs/etc ?
|
||||||
if ( readChr != refChr ) {
|
if ( readChr != refChr ) {
|
||||||
sum += (int)quals[readIndex];
|
sum += (int)quals[readIndex];
|
||||||
|
|
@ -445,14 +445,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean readIsClipped(final SAMRecord read) {
|
|
||||||
final Cigar c = read.getCigar();
|
|
||||||
final int n = c.numCigarElements();
|
|
||||||
if ( c.getCigarElement(n-1).getOperator() == CigarOperator.S ||
|
|
||||||
c.getCigarElement(0).getOperator() == CigarOperator.S) return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void clean(ReadBin readsToClean) {
|
private void clean(ReadBin readsToClean) {
|
||||||
|
|
||||||
final List<SAMRecord> reads = readsToClean.getReads();
|
final List<SAMRecord> reads = readsToClean.getReads();
|
||||||
|
|
@ -466,7 +458,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
final ArrayList<AlignedRead> altReads = new ArrayList<AlignedRead>(); // reads that don't perfectly match
|
final ArrayList<AlignedRead> altReads = new ArrayList<AlignedRead>(); // reads that don't perfectly match
|
||||||
final LinkedList<AlignedRead> altAlignmentsToTest = new LinkedList<AlignedRead>(); // should we try to make an alt consensus from the read?
|
final LinkedList<AlignedRead> altAlignmentsToTest = new LinkedList<AlignedRead>(); // should we try to make an alt consensus from the read?
|
||||||
final Set<Consensus> altConsenses = new LinkedHashSet<Consensus>(); // list of alt consenses
|
final Set<Consensus> altConsenses = new LinkedHashSet<Consensus>(); // list of alt consenses
|
||||||
long totalAlignerMismatchSum = 0, totalRawMismatchSum = 0;
|
long totalRawMismatchSum = 0;
|
||||||
|
|
||||||
// if there are any known indels for this region, get them
|
// if there are any known indels for this region, get them
|
||||||
for ( VariantContext knownIndel : knownIndelsToTry.values() ) {
|
for ( VariantContext knownIndel : knownIndelsToTry.values() ) {
|
||||||
|
|
@ -487,8 +479,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
// System.out.println(read.getReadString());
|
// System.out.println(read.getReadString());
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// we currently can not deal with clipped reads correctly (or a screwy record)
|
// we can not deal with screwy records
|
||||||
if ( read.getCigar().numCigarElements() == 0 || readIsClipped(read) ) {
|
if ( read.getCigar().numCigarElements() == 0 ) {
|
||||||
refReads.add(read);
|
refReads.add(read);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
@ -498,12 +490,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
// first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence
|
// first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence
|
||||||
int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read);
|
int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read);
|
||||||
if ( numBlocks == 2 ) {
|
if ( numBlocks == 2 ) {
|
||||||
Cigar newCigar = AlignmentUtils.leftAlignIndel(read.getCigar(), reference, read.getReadBases(), read.getAlignmentStart()-(int)leftmostIndex, 0);
|
Cigar newCigar = AlignmentUtils.leftAlignIndel(unclipCigar(read.getCigar()), reference, read.getReadBases(), read.getAlignmentStart()-(int)leftmostIndex, 0);
|
||||||
aRead.setCigar(newCigar);
|
aRead.setCigar(newCigar, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
final int startOnRef = read.getAlignmentStart()-(int)leftmostIndex;
|
final int startOnRef = read.getAlignmentStart()-(int)leftmostIndex;
|
||||||
totalAlignerMismatchSum += AlignmentUtils.mismatchingQualities(aRead.getRead(), reference, startOnRef);
|
|
||||||
final int rawMismatchScore = mismatchQualitySumIgnoreCigar(aRead, reference, startOnRef, Integer.MAX_VALUE);
|
final int rawMismatchScore = mismatchQualitySumIgnoreCigar(aRead, reference, startOnRef, Integer.MAX_VALUE);
|
||||||
// if ( debugOn ) System.out.println("mismatchScore="+mismatchScore);
|
// if ( debugOn ) System.out.println("mismatchScore="+mismatchScore);
|
||||||
|
|
||||||
|
|
@ -511,14 +502,16 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
if ( rawMismatchScore > 0 ) {
|
if ( rawMismatchScore > 0 ) {
|
||||||
altReads.add(aRead);
|
altReads.add(aRead);
|
||||||
//logger.debug("Adding " + aRead.getRead().getReadName() + " with raw mismatch score " + rawMismatchScore + " to non-ref reads");
|
//logger.debug("Adding " + aRead.getRead().getReadName() + " with raw mismatch score " + rawMismatchScore + " to non-ref reads");
|
||||||
|
|
||||||
if ( !read.getDuplicateReadFlag() )
|
if ( !read.getDuplicateReadFlag() )
|
||||||
totalRawMismatchSum += rawMismatchScore;
|
totalRawMismatchSum += rawMismatchScore;
|
||||||
aRead.setMismatchScoreToReference(rawMismatchScore);
|
aRead.setMismatchScoreToReference(rawMismatchScore);
|
||||||
|
aRead.setAlignerMismatchScore(AlignmentUtils.mismatchingQualities(aRead.getRead(), reference, startOnRef));
|
||||||
|
|
||||||
// if it has an indel, let's see if that's the best consensus
|
// if it has an indel, let's see if that's the best consensus
|
||||||
if ( numBlocks == 2 ) {
|
if ( numBlocks == 2 ) {
|
||||||
Consensus c = createAlternateConsensus(startOnRef, aRead.getCigar(), reference, aRead.getRead().getReadBases());
|
Consensus c = createAlternateConsensus(startOnRef, aRead.getCigar(), reference, aRead.getReadBases());
|
||||||
if ( c == null ) {} //System.out.println("ERROR: Failed to create alt consensus for read "+aRead.getRead().getReadName());
|
if ( c != null )
|
||||||
else
|
|
||||||
altConsenses.add(c);
|
altConsenses.add(c);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -539,8 +532,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
if ( altAlignmentsToTest.size() <= MAX_READS_FOR_CONSENSUSES ) {
|
if ( altAlignmentsToTest.size() <= MAX_READS_FOR_CONSENSUSES ) {
|
||||||
for ( AlignedRead aRead : altAlignmentsToTest ) {
|
for ( AlignedRead aRead : altAlignmentsToTest ) {
|
||||||
// do a pairwise alignment against the reference
|
// do a pairwise alignment against the reference
|
||||||
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getRead().getReadBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getReadBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
||||||
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getRead().getReadBases());
|
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getReadBases());
|
||||||
if ( c != null ) {
|
if ( c != null ) {
|
||||||
// if ( debugOn ) System.out.println("NEW consensus generated by SW: "+c.str ) ;
|
// if ( debugOn ) System.out.println("NEW consensus generated by SW: "+c.str ) ;
|
||||||
altConsenses.add(c);
|
altConsenses.add(c);
|
||||||
|
|
@ -555,8 +548,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
int index = generator.nextInt(altAlignmentsToTest.size());
|
int index = generator.nextInt(altAlignmentsToTest.size());
|
||||||
AlignedRead aRead = altAlignmentsToTest.remove(index);
|
AlignedRead aRead = altAlignmentsToTest.remove(index);
|
||||||
// do a pairwise alignment against the reference
|
// do a pairwise alignment against the reference
|
||||||
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getRead().getReadBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getReadBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
||||||
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getRead().getReadBases());
|
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getReadBases());
|
||||||
if ( c != null )
|
if ( c != null )
|
||||||
altConsenses.add(c);
|
altConsenses.add(c);
|
||||||
}
|
}
|
||||||
|
|
@ -569,7 +562,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
|
|
||||||
while ( iter.hasNext() ) {
|
while ( iter.hasNext() ) {
|
||||||
Consensus consensus = iter.next();
|
Consensus consensus = iter.next();
|
||||||
//logger.debug("Trying new consensus: " + AlignmentUtils.cigarToString(consensus.cigar) + " " + new String(consensus.str));
|
//logger.debug("Trying new consensus: " + consensus.cigar + " " + new String(consensus.str));
|
||||||
|
|
||||||
// if ( DEBUG ) {
|
// if ( DEBUG ) {
|
||||||
// System.out.println("Checking consensus with alignment at "+consensus.positionOnReference+" cigar "+consensus.cigar);
|
// System.out.println("Checking consensus with alignment at "+consensus.positionOnReference+" cigar "+consensus.cigar);
|
||||||
|
|
@ -590,14 +583,14 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
// the mismatch score is the min of its alignment vs. the reference and vs. the alternate
|
// the mismatch score is the min of its alignment vs. the reference and vs. the alternate
|
||||||
int myScore = altAlignment.second;
|
int myScore = altAlignment.second;
|
||||||
|
|
||||||
if ( myScore >= toTest.getMismatchScoreToReference() )
|
if ( myScore > toTest.getAlignerMismatchScore() || myScore >= toTest.getMismatchScoreToReference() )
|
||||||
myScore = toTest.getMismatchScoreToReference();
|
myScore = toTest.getMismatchScoreToReference();
|
||||||
// keep track of reads that align better to the alternate consensus.
|
// keep track of reads that align better to the alternate consensus.
|
||||||
// By pushing alignments with equal scores to the alternate, it means we'll over-call (het -> hom non ref) but are less likely to under-call (het -> ref, het non ref -> het)
|
// By pushing alignments with equal scores to the alternate, it means we'll over-call (het -> hom non ref) but are less likely to under-call (het -> ref, het non ref -> het)
|
||||||
else
|
else
|
||||||
consensus.readIndexes.add(new Pair<Integer, Integer>(j, altAlignment.first));
|
consensus.readIndexes.add(new Pair<Integer, Integer>(j, altAlignment.first));
|
||||||
|
|
||||||
//logger.debug(AlignmentUtils.cigarToString(consensus.cigar) + " vs. " + toTest.getRead().getReadName() + "-" + toTest.getRead().getReadString() + " => " + myScore + " vs. " + altAlignment.first);
|
//logger.debug(consensus.cigar + " vs. " + toTest.getRead().getReadName() + "-" + toTest.getRead().getReadString() + " => " + myScore + " vs. " + toTest.getMismatchScoreToReference());
|
||||||
if ( !toTest.getRead().getDuplicateReadFlag() )
|
if ( !toTest.getRead().getDuplicateReadFlag() )
|
||||||
consensus.mismatchSum += myScore;
|
consensus.mismatchSum += myScore;
|
||||||
|
|
||||||
|
|
@ -613,7 +606,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
if ( bestConsensus != null )
|
if ( bestConsensus != null )
|
||||||
bestConsensus.readIndexes.clear();
|
bestConsensus.readIndexes.clear();
|
||||||
bestConsensus = consensus;
|
bestConsensus = consensus;
|
||||||
//logger.debug("New consensus " + AlignmentUtils.cigarToString(bestConsensus.cigar) + " is now best consensus");
|
//logger.debug("New consensus " + bestConsensus.cigar + " is now best consensus");
|
||||||
} else {
|
} else {
|
||||||
// we do not need this alt consensus, release memory right away!!
|
// we do not need this alt consensus, release memory right away!!
|
||||||
consensus.readIndexes.clear();
|
consensus.readIndexes.clear();
|
||||||
|
|
@ -626,7 +619,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
// 3) didn't just move around the mismatching columns (i.e. it actually reduces entropy),
|
// 3) didn't just move around the mismatching columns (i.e. it actually reduces entropy),
|
||||||
// then clean!
|
// then clean!
|
||||||
final double improvement = (bestConsensus == null ? -1 : ((double)(totalRawMismatchSum - bestConsensus.mismatchSum))/10.0);
|
final double improvement = (bestConsensus == null ? -1 : ((double)(totalRawMismatchSum - bestConsensus.mismatchSum))/10.0);
|
||||||
if ( improvement >= LOD_THRESHOLD && bestConsensus.mismatchSum <= totalAlignerMismatchSum ) {
|
if ( improvement >= LOD_THRESHOLD ) {
|
||||||
|
|
||||||
bestConsensus.cigar = AlignmentUtils.leftAlignIndel(bestConsensus.cigar, reference, bestConsensus.str, bestConsensus.positionOnReference, bestConsensus.positionOnReference);
|
bestConsensus.cigar = AlignmentUtils.leftAlignIndel(bestConsensus.cigar, reference, bestConsensus.str, bestConsensus.positionOnReference, bestConsensus.positionOnReference);
|
||||||
|
|
||||||
|
|
@ -646,8 +639,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
} catch (Exception e) {}
|
} catch (Exception e) {}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
//System.out.println("CLEAN: " + AlignmentUtils.cigarToString(bestConsensus.cigar) + " " + bestConsensus.str.toString() + " " + bestConsensus.cigar.numCigarElements() );
|
//logger.debug("CLEAN: " + bestConsensus.cigar + " " + bestConsensus.str.toString() + " " + bestConsensus.cigar.numCigarElements() );
|
||||||
//logger.debug("CLEAN: " + AlignmentUtils.cigarToString(bestConsensus.cigar) + " " + bestConsensus.str );
|
|
||||||
if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) {
|
if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) {
|
||||||
// NOTE: indels are printed out in the format specified for the low-coverage pilot1
|
// NOTE: indels are printed out in the format specified for the low-coverage pilot1
|
||||||
// indel calls (tab-delimited): chr position size type sequence
|
// indel calls (tab-delimited): chr position size type sequence
|
||||||
|
|
@ -705,7 +697,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
} else if ( statsOutput != null ) {
|
} else if ( statsOutput != null ) {
|
||||||
try {
|
try {
|
||||||
statsOutput.write(String.format("%s\tFAIL\t%.1f\t%d%n",
|
statsOutput.write(String.format("%s\tFAIL\t%.1f\t%d%n",
|
||||||
readsToClean.getLocation().toString(), improvement, bestConsensus.mismatchSum - totalAlignerMismatchSum));
|
readsToClean.getLocation().toString(), improvement));
|
||||||
statsOutput.flush();
|
statsOutput.flush();
|
||||||
} catch (Exception e) {}
|
} catch (Exception e) {}
|
||||||
}
|
}
|
||||||
|
|
@ -720,7 +712,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for (int i = 0; i < indexOnRef; i++)
|
for (int i = 0; i < indexOnRef; i++)
|
||||||
sb.append((char)reference[i]);
|
sb.append((char)reference[i]);
|
||||||
//logger.debug("CIGAR = " + AlignmentUtils.cigarToString(c));
|
|
||||||
|
|
||||||
int indelCount = 0;
|
int indelCount = 0;
|
||||||
int altIdx = 0;
|
int altIdx = 0;
|
||||||
|
|
@ -735,6 +726,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
refIdx += elementLength;
|
refIdx += elementLength;
|
||||||
break;
|
break;
|
||||||
case M:
|
case M:
|
||||||
|
altIdx += elementLength;
|
||||||
|
case N:
|
||||||
if ( reference.length < refIdx + elementLength )
|
if ( reference.length < refIdx + elementLength )
|
||||||
ok_flag = false;
|
ok_flag = false;
|
||||||
else {
|
else {
|
||||||
|
|
@ -742,7 +735,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
sb.append((char)reference[refIdx+j]);
|
sb.append((char)reference[refIdx+j]);
|
||||||
}
|
}
|
||||||
refIdx += elementLength;
|
refIdx += elementLength;
|
||||||
altIdx += elementLength;
|
|
||||||
break;
|
break;
|
||||||
case I:
|
case I:
|
||||||
for (int j = 0; j < elementLength; j++) {
|
for (int j = 0; j < elementLength; j++) {
|
||||||
|
|
@ -756,6 +748,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
altIdx += elementLength;
|
altIdx += elementLength;
|
||||||
indelCount++;
|
indelCount++;
|
||||||
break;
|
break;
|
||||||
|
case S:
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// make sure that there is at most only a single indel and it aligns appropriately!
|
// make sure that there is at most only a single indel and it aligns appropriately!
|
||||||
|
|
@ -867,7 +862,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
else {
|
else {
|
||||||
if ( altCE1.getOperator() != CigarOperator.M )
|
if ( altCE1.getOperator() != CigarOperator.M )
|
||||||
throw new StingException("First element of the alt consensus cigar must be M or I. Actual: "+altCigar.toString());
|
throw new StingException("First element of the alt consensus cigar must be M or I. Actual: "+altCigar.toString());
|
||||||
if ( altCE2.getOperator() == CigarOperator.I || altCE2.getOperator() == CigarOperator.D ) indelCE=altCE2;
|
if ( altCE2.getOperator() == CigarOperator.I || altCE2.getOperator() == CigarOperator.D )
|
||||||
|
indelCE=altCE2;
|
||||||
else
|
else
|
||||||
throw new StingException("When first element of the alt consensus is M, the second one must be I or D. Actual: "+altCigar.toString());
|
throw new StingException("When first element of the alt consensus is M, the second one must be I or D. Actual: "+altCigar.toString());
|
||||||
leadingMatchingBlockLength = altCE1.getLength();
|
leadingMatchingBlockLength = altCE1.getLength();
|
||||||
|
|
@ -925,7 +921,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int readRemaining = aRead.getReadLength();
|
int readRemaining = aRead.getReadBases().length;
|
||||||
for ( CigarElement ce : readCigar.getCigarElements() ) {
|
for ( CigarElement ce : readCigar.getCigarElements() ) {
|
||||||
if ( ce.getOperator() != CigarOperator.D )
|
if ( ce.getOperator() != CigarOperator.D )
|
||||||
readRemaining -= ce.getLength();
|
readRemaining -= ce.getLength();
|
||||||
|
|
@ -951,8 +947,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
int refIdx = read.getOriginalAlignmentStart() - (int)leftmostIndex;
|
int refIdx = read.getOriginalAlignmentStart() - (int)leftmostIndex;
|
||||||
final byte[] readStr = read.getRead().getReadBases();
|
final byte[] readStr = read.getReadBases();
|
||||||
final byte[] quals = read.getRead().getBaseQualities();
|
final byte[] quals = read.getBaseQualities();
|
||||||
|
|
||||||
for (int j=0; j < readStr.length; j++, refIdx++ ) {
|
for (int j=0; j < readStr.length; j++, refIdx++ ) {
|
||||||
if ( refIdx < 0 || refIdx >= reference.length ) {
|
if ( refIdx < 0 || refIdx >= reference.length ) {
|
||||||
|
|
@ -988,8 +984,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
case D:
|
case D:
|
||||||
refIdx += elementLength;
|
refIdx += elementLength;
|
||||||
break;
|
break;
|
||||||
|
case S:
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1033,11 +1031,27 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
return reduces;
|
return reduces;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Cigar unclipCigar(Cigar cigar) {
|
||||||
|
ArrayList<CigarElement> elements = new ArrayList<CigarElement>(cigar.numCigarElements());
|
||||||
|
for ( CigarElement ce : cigar.getCigarElements() ) {
|
||||||
|
if ( !isClipOperator(ce.getOperator()) )
|
||||||
|
elements.add(ce);
|
||||||
|
}
|
||||||
|
return new Cigar(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isClipOperator(CigarOperator op) {
|
||||||
|
return op == CigarOperator.S || op == CigarOperator.H || op == CigarOperator.P;
|
||||||
|
}
|
||||||
|
|
||||||
private class AlignedRead {
|
private class AlignedRead {
|
||||||
private final SAMRecord read;
|
private final SAMRecord read;
|
||||||
|
private byte[] readBases = null;
|
||||||
|
private byte[] baseQuals = null;
|
||||||
private Cigar newCigar = null;
|
private Cigar newCigar = null;
|
||||||
private int newStart = -1;
|
private int newStart = -1;
|
||||||
private int mismatchScoreToReference;
|
private int mismatchScoreToReference = 0;
|
||||||
|
private long alignerMismatchScore = 0;
|
||||||
|
|
||||||
public AlignedRead(SAMRecord read) {
|
public AlignedRead(SAMRecord read) {
|
||||||
this.read = read;
|
this.read = read;
|
||||||
|
|
@ -1049,25 +1063,101 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getReadLength() {
|
public int getReadLength() {
|
||||||
return read.getReadLength();
|
return readBases != null ? readBases.length : read.getReadLength();
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte[] getReadBases() {
|
||||||
|
if ( readBases == null )
|
||||||
|
getUnclippedBases();
|
||||||
|
return readBases;
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte[] getBaseQualities() {
|
||||||
|
if ( baseQuals == null )
|
||||||
|
getUnclippedBases();
|
||||||
|
return baseQuals;
|
||||||
|
}
|
||||||
|
|
||||||
|
// pull out the bases that aren't clipped out
|
||||||
|
private void getUnclippedBases() {
|
||||||
|
readBases = new byte[getReadLength()];
|
||||||
|
baseQuals = new byte[getReadLength()];
|
||||||
|
byte[] actualReadBases = read.getReadBases();
|
||||||
|
byte[] actualBaseQuals = read.getBaseQualities();
|
||||||
|
int fromIndex = 0, toIndex = 0;
|
||||||
|
|
||||||
|
for ( CigarElement ce : read.getCigar().getCigarElements() ) {
|
||||||
|
int elementLength = ce.getLength();
|
||||||
|
switch ( ce.getOperator() ) {
|
||||||
|
case S:
|
||||||
|
fromIndex += elementLength;
|
||||||
|
break;
|
||||||
|
case M:
|
||||||
|
case I:
|
||||||
|
System.arraycopy(actualReadBases, fromIndex, readBases, toIndex, elementLength);
|
||||||
|
System.arraycopy(actualBaseQuals, fromIndex, baseQuals, toIndex, elementLength);
|
||||||
|
fromIndex += elementLength;
|
||||||
|
toIndex += elementLength;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if we got clipped, trim the array
|
||||||
|
if ( fromIndex != toIndex ) {
|
||||||
|
byte[] trimmedRB = new byte[toIndex];
|
||||||
|
byte[] trimmedBQ = new byte[toIndex];
|
||||||
|
System.arraycopy(readBases, 0, trimmedRB, 0, toIndex);
|
||||||
|
System.arraycopy(baseQuals, 0, trimmedBQ, 0, toIndex);
|
||||||
|
readBases = trimmedRB;
|
||||||
|
baseQuals = trimmedBQ;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Cigar getCigar() {
|
public Cigar getCigar() {
|
||||||
return (newCigar != null ? newCigar : read.getCigar());
|
return (newCigar != null ? newCigar : read.getCigar());
|
||||||
}
|
}
|
||||||
|
|
||||||
// tentatively sets the new Cigar, but it needs to be confirmed later
|
public void setCigar(Cigar cigar) {
|
||||||
// returns true if the new cigar is a valid change (i.e. not same as original and doesn't remove indel)
|
setCigar(cigar, true);
|
||||||
public boolean setCigar(Cigar cigar) {
|
}
|
||||||
if ( getCigar().equals(cigar) )
|
|
||||||
return false;
|
|
||||||
|
|
||||||
String str = AlignmentUtils.cigarToString(cigar);
|
// tentatively sets the new Cigar, but it needs to be confirmed later
|
||||||
|
public void setCigar(Cigar cigar, boolean fixClippedCigar) {
|
||||||
|
if ( fixClippedCigar && getReadBases().length < read.getReadLength() )
|
||||||
|
cigar = reclipCigar(cigar);
|
||||||
|
|
||||||
|
// no change?
|
||||||
|
if ( getCigar().equals(cigar) )
|
||||||
|
return;
|
||||||
|
|
||||||
|
// no indel?
|
||||||
|
String str = cigar.toString();
|
||||||
if ( !str.contains("D") && !str.contains("I") )
|
if ( !str.contains("D") && !str.contains("I") )
|
||||||
return false;
|
return;
|
||||||
|
|
||||||
newCigar = cigar;
|
newCigar = cigar;
|
||||||
return true;
|
}
|
||||||
|
|
||||||
|
// pull out the bases that aren't clipped out
|
||||||
|
private Cigar reclipCigar(Cigar cigar) {
|
||||||
|
ArrayList<CigarElement> elements = new ArrayList<CigarElement>();
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
int n = read.getCigar().numCigarElements();
|
||||||
|
while ( i < n && isClipOperator(read.getCigar().getCigarElement(i).getOperator()) )
|
||||||
|
elements.add(read.getCigar().getCigarElement(i++));
|
||||||
|
|
||||||
|
elements.addAll(cigar.getCigarElements());
|
||||||
|
|
||||||
|
i++;
|
||||||
|
while ( i < n && !isClipOperator(read.getCigar().getCigarElement(i).getOperator()) )
|
||||||
|
i++;
|
||||||
|
|
||||||
|
while ( i < n && isClipOperator(read.getCigar().getCigarElement(i).getOperator()) )
|
||||||
|
elements.add(read.getCigar().getCigarElement(i++));
|
||||||
|
|
||||||
|
return new Cigar(elements);
|
||||||
}
|
}
|
||||||
|
|
||||||
// tentatively sets the new start, but it needs to be confirmed later
|
// tentatively sets the new start, but it needs to be confirmed later
|
||||||
|
|
@ -1120,6 +1210,14 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
public int getMismatchScoreToReference() {
|
public int getMismatchScoreToReference() {
|
||||||
return mismatchScoreToReference;
|
return mismatchScoreToReference;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setAlignerMismatchScore(long score) {
|
||||||
|
alignerMismatchScore = score;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getAlignerMismatchScore() {
|
||||||
|
return alignerMismatchScore;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class Consensus {
|
private class Consensus {
|
||||||
|
|
@ -1155,12 +1253,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
private class ReadBin {
|
private class ReadBin {
|
||||||
|
|
||||||
private final ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
private final ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||||
private char[] reference = null;
|
private byte[] reference = null;
|
||||||
private GenomeLoc loc = null;
|
private GenomeLoc loc = null;
|
||||||
|
|
||||||
public ReadBin() { }
|
public ReadBin() { }
|
||||||
|
|
||||||
public void add(SAMRecord read, char[] ref) {
|
public void add(SAMRecord read, byte[] ref) {
|
||||||
reads.add(read);
|
reads.add(read);
|
||||||
|
|
||||||
// set up the reference
|
// set up the reference
|
||||||
|
|
@ -1173,7 +1271,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
if ( neededBases > ref.length )
|
if ( neededBases > ref.length )
|
||||||
throw new StingException("Read " + read.getReadName() + " does not overlap the previous read in this interval; please ensure that you are using the same input bam that was used in the RealignerTargetCreator step");
|
throw new StingException("Read " + read.getReadName() + " does not overlap the previous read in this interval; please ensure that you are using the same input bam that was used in the RealignerTargetCreator step");
|
||||||
if ( neededBases > 0 ) {
|
if ( neededBases > 0 ) {
|
||||||
char[] newReference = new char[reference.length + neededBases];
|
byte[] newReference = new byte[reference.length + neededBases];
|
||||||
System.arraycopy(reference, 0, newReference, 0, reference.length);
|
System.arraycopy(reference, 0, newReference, 0, reference.length);
|
||||||
System.arraycopy(ref, ref.length-neededBases, newReference, reference.length, neededBases);
|
System.arraycopy(ref, ref.length-neededBases, newReference, reference.length, neededBases);
|
||||||
reference = newReference;
|
reference = newReference;
|
||||||
|
|
@ -1185,14 +1283,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
public List<SAMRecord> getReads() { return reads; }
|
public List<SAMRecord> getReads() { return reads; }
|
||||||
|
|
||||||
public byte[] getRereference() {
|
public byte[] getRereference() {
|
||||||
// upper case it
|
return reference;
|
||||||
for ( int i = 0; i < reference.length; i++ )
|
|
||||||
reference[i] = Character.toUpperCase(reference[i]);
|
|
||||||
|
|
||||||
// convert it to a byte array
|
|
||||||
byte[] refArray = new byte[reference.length];
|
|
||||||
StringUtil.charsToBytes(reference, 0, reference.length, refArray, 0);
|
|
||||||
return refArray;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public GenomeLoc getLocation() { return loc; }
|
public GenomeLoc getLocation() { return loc; }
|
||||||
|
|
|
||||||
|
|
@ -33,9 +33,11 @@ import net.sf.samtools.util.StringUtil;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.utils.pileup.*;
|
import org.broadinstitute.sting.utils.pileup.*;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
|
||||||
public class AlignmentUtils {
|
public class AlignmentUtils {
|
||||||
|
|
||||||
|
|
@ -230,58 +232,6 @@ public class AlignmentUtils {
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String alignmentToString(final Cigar cigar,final String seq, final String ref, final int posOnRef ) {
|
|
||||||
return alignmentToString( cigar, seq, ref, posOnRef, 0 );
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String cigarToString(Cigar cig) {
|
|
||||||
return cig.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String alignmentToString(final Cigar cigar,final String seq, final String ref, final int posOnRef, final int posOnRead ) {
|
|
||||||
int readPos = posOnRead;
|
|
||||||
int refPos = posOnRef;
|
|
||||||
|
|
||||||
StringBuilder refLine = new StringBuilder();
|
|
||||||
StringBuilder readLine = new StringBuilder();
|
|
||||||
|
|
||||||
for ( int i = 0 ; i < posOnRead ; i++ ) {
|
|
||||||
refLine.append( ref.charAt( refPos - readPos + i ) );
|
|
||||||
readLine.append( seq.charAt(i) ) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( int i = 0 ; i < cigar.numCigarElements() ; i++ ) {
|
|
||||||
|
|
||||||
final CigarElement ce = cigar.getCigarElement(i);
|
|
||||||
|
|
||||||
switch(ce.getOperator()) {
|
|
||||||
case I:
|
|
||||||
for ( int j = 0 ; j < ce.getLength(); j++ ) {
|
|
||||||
refLine.append('+');
|
|
||||||
readLine.append( seq.charAt( readPos++ ) );
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case D:
|
|
||||||
for ( int j = 0 ; j < ce.getLength(); j++ ) {
|
|
||||||
readLine.append('*');
|
|
||||||
refLine.append( ref.charAt( refPos++ ) );
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case M:
|
|
||||||
for ( int j = 0 ; j < ce.getLength(); j++ ) {
|
|
||||||
refLine.append(ref.charAt( refPos++ ) );
|
|
||||||
readLine.append( seq.charAt( readPos++ ) );
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default: throw new StingException("Unsupported cigar operator: "+ce.getOperator() );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
refLine.append('\n');
|
|
||||||
refLine.append(readLine);
|
|
||||||
refLine.append('\n');
|
|
||||||
return refLine.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static char[] alignmentToCharArray( final Cigar cigar, final char[] read, final char[] ref ) {
|
public static char[] alignmentToCharArray( final Cigar cigar, final char[] read, final char[] ref ) {
|
||||||
|
|
||||||
final char[] alignment = new char[read.length];
|
final char[] alignment = new char[read.length];
|
||||||
|
|
@ -291,11 +241,12 @@ public class AlignmentUtils {
|
||||||
for ( int iii = 0 ; iii < cigar.numCigarElements() ; iii++ ) {
|
for ( int iii = 0 ; iii < cigar.numCigarElements() ; iii++ ) {
|
||||||
|
|
||||||
final CigarElement ce = cigar.getCigarElement(iii);
|
final CigarElement ce = cigar.getCigarElement(iii);
|
||||||
|
final int elementLength = ce.getLength();
|
||||||
|
|
||||||
switch( ce.getOperator() ) {
|
switch( ce.getOperator() ) {
|
||||||
case I:
|
case I:
|
||||||
case S:
|
case S:
|
||||||
for ( int jjj = 0 ; jjj < ce.getLength(); jjj++ ) {
|
for ( int jjj = 0 ; jjj < elementLength; jjj++ ) {
|
||||||
alignment[alignPos++] = '+';
|
alignment[alignPos++] = '+';
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
@ -304,12 +255,15 @@ public class AlignmentUtils {
|
||||||
refPos++;
|
refPos++;
|
||||||
break;
|
break;
|
||||||
case M:
|
case M:
|
||||||
for ( int jjj = 0 ; jjj < ce.getLength(); jjj++ ) {
|
for ( int jjj = 0 ; jjj < elementLength; jjj++ ) {
|
||||||
alignment[alignPos] = ref[refPos];
|
alignment[alignPos] = ref[refPos];
|
||||||
alignPos++;
|
alignPos++;
|
||||||
refPos++;
|
refPos++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case H:
|
||||||
|
case P:
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new StingException( "Unsupported cigar operator: " + ce.getOperator() );
|
throw new StingException( "Unsupported cigar operator: " + ce.getOperator() );
|
||||||
}
|
}
|
||||||
|
|
@ -372,6 +326,152 @@ public class AlignmentUtils {
|
||||||
* @return a cigar, in which indel is guaranteed to be placed at the leftmost possible position across a repeat (if any)
|
* @return a cigar, in which indel is guaranteed to be placed at the leftmost possible position across a repeat (if any)
|
||||||
*/
|
*/
|
||||||
public static Cigar leftAlignIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex) {
|
public static Cigar leftAlignIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex) {
|
||||||
|
|
||||||
|
int indexOfIndel = -1;
|
||||||
|
for ( int i = 0; i < cigar.numCigarElements(); i++ ) {
|
||||||
|
CigarElement ce = cigar.getCigarElement(i);
|
||||||
|
if ( ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I ) {
|
||||||
|
// if there is more than 1 indel, don't left align
|
||||||
|
if ( indexOfIndel != -1 )
|
||||||
|
return cigar;
|
||||||
|
indexOfIndel = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if there is no indel or if the alignment starts with an insertion (so that there
|
||||||
|
// is no place on the read to move that insertion further left), we are done
|
||||||
|
if ( indexOfIndel < 1 ) return cigar;
|
||||||
|
|
||||||
|
final int indelLength = cigar.getCigarElement(indexOfIndel).getLength();
|
||||||
|
|
||||||
|
byte[] altString = createIndelString(cigar, indexOfIndel, refSeq, readSeq, refIndex, readIndex);
|
||||||
|
|
||||||
|
Cigar newCigar = cigar;
|
||||||
|
for ( int i = 0; i < indelLength; i++ ) {
|
||||||
|
newCigar = moveCigarLeft(newCigar, indexOfIndel);
|
||||||
|
byte[] newAltString = createIndelString(newCigar, indexOfIndel, refSeq, readSeq, refIndex, readIndex);
|
||||||
|
|
||||||
|
// check to make sure we haven't run off the end of the read
|
||||||
|
boolean reachedEndOfRead = cigarHasZeroSizeElement(newCigar);
|
||||||
|
|
||||||
|
if ( Arrays.equals(altString, newAltString) ) {
|
||||||
|
cigar = newCigar;
|
||||||
|
i = -1;
|
||||||
|
if ( reachedEndOfRead )
|
||||||
|
cigar = cleanUpCigar(cigar);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( reachedEndOfRead )
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cigar;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean cigarHasZeroSizeElement(Cigar c) {
|
||||||
|
for ( CigarElement ce : c.getCigarElements() ) {
|
||||||
|
if ( ce.getLength() == 0 )
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Cigar cleanUpCigar(Cigar c) {
|
||||||
|
ArrayList<CigarElement> elements = new ArrayList<CigarElement>(c.numCigarElements()-1);
|
||||||
|
for ( CigarElement ce : c.getCigarElements() ) {
|
||||||
|
if ( ce.getLength() != 0 &&
|
||||||
|
(elements.size() != 0 || ce.getOperator() != CigarOperator.D) ) {
|
||||||
|
elements.add(ce);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new Cigar(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Cigar moveCigarLeft(Cigar cigar, int indexOfIndel) {
|
||||||
|
// get the first few elements
|
||||||
|
ArrayList<CigarElement> elements = new ArrayList<CigarElement>(cigar.numCigarElements());
|
||||||
|
for ( int i = 0; i < indexOfIndel - 1; i++)
|
||||||
|
elements.add(cigar.getCigarElement(i));
|
||||||
|
|
||||||
|
// get the indel element and move it left one base
|
||||||
|
CigarElement ce = cigar.getCigarElement(indexOfIndel-1);
|
||||||
|
elements.add(new CigarElement(ce.getLength()-1, ce.getOperator()));
|
||||||
|
elements.add(cigar.getCigarElement(indexOfIndel));
|
||||||
|
ce = cigar.getCigarElement(indexOfIndel+1);
|
||||||
|
elements.add(new CigarElement(ce.getLength()+1, ce.getOperator()));
|
||||||
|
|
||||||
|
// get the last few elements
|
||||||
|
for ( int i = indexOfIndel + 2; i < cigar.numCigarElements(); i++)
|
||||||
|
elements.add(cigar.getCigarElement(i));
|
||||||
|
return new Cigar(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static byte[] createIndelString(final Cigar cigar, final int indexOfIndel, final byte[] refSeq, final byte[] readSeq, int refIndex, int readIndex) {
|
||||||
|
CigarElement indel = cigar.getCigarElement(indexOfIndel);
|
||||||
|
int indelLength = indel.getLength();
|
||||||
|
|
||||||
|
// the indel-based reference string
|
||||||
|
byte[] alt = new byte[refSeq.length + (indelLength * (indel.getOperator() == CigarOperator.D ? -1 : 1))];
|
||||||
|
|
||||||
|
for ( int i = 0; i < indexOfIndel; i++ ) {
|
||||||
|
CigarElement ce = cigar.getCigarElement(i);
|
||||||
|
int length = ce.getLength();
|
||||||
|
|
||||||
|
switch( ce.getOperator() ) {
|
||||||
|
case M:
|
||||||
|
readIndex += length;
|
||||||
|
refIndex += length;
|
||||||
|
break;
|
||||||
|
case S:
|
||||||
|
readIndex += length;
|
||||||
|
break;
|
||||||
|
case N:
|
||||||
|
refIndex += length;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// add the bases before the indel
|
||||||
|
System.arraycopy(refSeq, 0, alt, 0, refIndex);
|
||||||
|
int currentPos = refIndex;
|
||||||
|
|
||||||
|
// take care of the indel
|
||||||
|
if ( indel.getOperator() == CigarOperator.D ) {
|
||||||
|
refIndex += indelLength;
|
||||||
|
} else {
|
||||||
|
System.arraycopy(readSeq, readIndex, alt, currentPos, indelLength);
|
||||||
|
currentPos += indelLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
// add the bases after the indel
|
||||||
|
System.arraycopy(refSeq, refIndex, alt, currentPos, refSeq.length - refIndex);
|
||||||
|
|
||||||
|
return alt;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Takes the alignment of the read sequence <code>readSeq</code> to the reference sequence <code>refSeq</code>
|
||||||
|
* starting at 0-based position <code>refIndex</code> on the <code>refSeq</code> and specified by its <code>cigar</code>.
|
||||||
|
* The last argument <code>readIndex</code> specifies 0-based position on the read where the alignment described by the
|
||||||
|
* <code>cigar</code> starts. Usually cigars specify alignments of the whole read to the ref, so that readIndex is normally 0.
|
||||||
|
* Use non-zero readIndex only when the alignment cigar represents alignment of a part of the read. The refIndex in this case
|
||||||
|
* should be the position where the alignment of that part of the read starts at. In other words, both refIndex and readIndex are
|
||||||
|
* always the positions where the cigar starts on the ref and on the read, respectively.
|
||||||
|
*
|
||||||
|
* If the alignment has an indel, then this method attempts moving this indel left across a stretch of repetitive bases. For instance, if the original cigar
|
||||||
|
* specifies that (any) one AT is deleted from a repeat sequence TATATATA, the output cigar will always mark the leftmost AT
|
||||||
|
* as deleted. If there is no indel in the original cigar, or the indel position is determined unambiguously (i.e. inserted/deleted sequence
|
||||||
|
* is not repeated), the original cigar is returned.
|
||||||
|
* @param cigar structure of the original alignment
|
||||||
|
* @param refSeq reference sequence the read is aligned to
|
||||||
|
* @param readSeq read sequence
|
||||||
|
* @param refIndex 0-based alignment start position on ref
|
||||||
|
* @param readIndex 0-based alignment start position on read
|
||||||
|
* @return a cigar, in which indel is guaranteed to be placed at the leftmost possible position across a repeat (if any)
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
public static Cigar leftAlignIndelOld(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex) {
|
||||||
if ( cigar.numCigarElements() < 2 ) return cigar; // no indels, nothing to do
|
if ( cigar.numCigarElements() < 2 ) return cigar; // no indels, nothing to do
|
||||||
|
|
||||||
final CigarElement ce1 = cigar.getCigarElement(0);
|
final CigarElement ce1 = cigar.getCigarElement(0);
|
||||||
|
|
@ -512,4 +612,5 @@ public class AlignmentUtils {
|
||||||
}
|
}
|
||||||
return cigar;
|
return cigar;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue