From a0231f073f9d53146eaf0e401eeed6a53c2d0ccf Mon Sep 17 00:00:00 2001 From: ebanks Date: Sun, 12 Sep 2010 05:06:37 +0000 Subject: [PATCH] Damnit. Enabling the Picard code to recalculate all of the relevant SAMRecord attribute tags means that I need to have reference bases over all read bases even after realignment (and there are some big indels in dbsnp). Fortunately, I have my trusty IndexedFastaSequenceFile reader handy! Re-enabling the previously broken performance test. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4255 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/indels/IndelRealigner.java | 43 ++++++++++++------- .../indels/IndelRealignerPerformanceTest.java | 3 +- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index d3ba2d9b5..603c154d4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -430,8 +430,8 @@ public class IndelRealigner extends ReadWalker { if ( reads.size() == 0 ) return; - final byte[] reference = readsToClean.getReference(referenceReader); - final long leftmostIndex = readsToClean.getLocation().getStart(); + byte[] reference = readsToClean.getReference(referenceReader); + int leftmostIndex = (int)readsToClean.getLocation().getStart(); final ArrayList refReads = new ArrayList(); // reads that perfectly match ref final ArrayList altReads = new ArrayList(); // reads that don't perfectly match @@ -472,7 +472,7 @@ public class IndelRealigner extends ReadWalker { for ( int j = 0; j < altReads.size(); j++ ) { AlignedRead toTest = altReads.get(j); - Pair altAlignment = findBestOffset(consensus.str, toTest, (int)leftmostIndex); + Pair altAlignment = findBestOffset(consensus.str, toTest, leftmostIndex); // the mismatch score is the min of its alignment vs. the reference and vs. the alternate int myScore = altAlignment.second; @@ -520,7 +520,7 @@ public class IndelRealigner extends ReadWalker { // start cleaning the appropriate reads for ( Pair indexPair : bestConsensus.readIndexes ) { AlignedRead aRead = altReads.get(indexPair.first); - if ( !updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.second, aRead, (int)leftmostIndex) ) + if ( !updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.second, aRead, leftmostIndex) ) return; } if ( !USE_KNOWN_INDELS_ONLY && !alternateReducesEntropy(altReads, reference, leftmostIndex) ) { @@ -588,11 +588,22 @@ public class IndelRealigner extends ReadWalker { SAMRecord read = aRead.getRead(); read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254)); - // fix the attribute tags + // before we fix the attribute tags we first need make sure we have enough of the reference sequence + int neededBasesToLeft = Math.max(leftmostIndex - read.getAlignmentStart(), 0); + int neededBasesToRight = Math.max(read.getAlignmentEnd() - leftmostIndex - reference.length + 1, 0); + int neededBases = Math.max(neededBasesToLeft, neededBasesToRight); + if ( neededBases > 0 ) { + int padLeft = Math.max(leftmostIndex-neededBases, 1); + int padRight = Math.min(leftmostIndex+reference.length+neededBases, referenceReader.getSequenceDictionary().getSequence(currentInterval.getContig()).getSequenceLength()); + reference = referenceReader.getSubsequenceAt(currentInterval.getContig(), padLeft, padRight).getBases(); + leftmostIndex = padLeft; + } + + // now, fix the attribute tags if ( read.getAttribute(SAMTag.NM.name()) != null ) - read.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(read, reference, (int)leftmostIndex-1)); + read.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(read, reference, leftmostIndex-1)); if ( read.getAttribute(SAMTag.UQ.name()) != null ) - read.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(read, reference, (int)leftmostIndex-1)); + read.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(read, reference, leftmostIndex-1)); // TODO -- this is only temporary until Tim adds code to recalculate this value if ( read.getAttribute(SAMTag.MD.name()) != null ) read.setAttribute(SAMTag.MD.name(), null); @@ -613,12 +624,12 @@ public class IndelRealigner extends ReadWalker { } } - private void generateAlternateConsensesFromKnownIndels(final Set altConsensesToPopulate, final long leftmostIndex, final byte[] reference) { + private void generateAlternateConsensesFromKnownIndels(final Set altConsensesToPopulate, final int leftmostIndex, final byte[] reference) { for ( VariantContext knownIndel : knownIndelsToTry.values() ) { if ( knownIndel == null || !knownIndel.isIndel() ) continue; byte[] indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length()); - int start = (int)(knownIndel.getStart() - leftmostIndex) + 1; + int start = knownIndel.getStart() - leftmostIndex + 1; Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel.isDeletion()); if ( c != null ) altConsensesToPopulate.add(c); @@ -630,7 +641,7 @@ public class IndelRealigner extends ReadWalker { final ArrayList altReadsToPopulate, final LinkedList altAlignmentsToTest, final Set altConsenses, - final long leftmostIndex, + final int leftmostIndex, final byte[] reference) { long totalRawMismatchSum = 0L; @@ -648,11 +659,11 @@ public class IndelRealigner extends ReadWalker { // first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read); if ( numBlocks == 2 ) { - Cigar newCigar = AlignmentUtils.leftAlignIndel(unclipCigar(read.getCigar()), reference, read.getReadBases(), read.getAlignmentStart()-(int)leftmostIndex, 0); + Cigar newCigar = AlignmentUtils.leftAlignIndel(unclipCigar(read.getCigar()), reference, read.getReadBases(), read.getAlignmentStart()-leftmostIndex, 0); aRead.setCigar(newCigar, false); } - final int startOnRef = read.getAlignmentStart()-(int)leftmostIndex; + final int startOnRef = read.getAlignmentStart()-leftmostIndex; final int rawMismatchScore = mismatchQualitySumIgnoreCigar(aRead, reference, startOnRef, Integer.MAX_VALUE); // if this doesn't match perfectly to the reference, let's try to clean it @@ -952,7 +963,7 @@ public class IndelRealigner extends ReadWalker { return true; } - private boolean alternateReducesEntropy(final List reads, final byte[] reference, final long leftmostIndex) { + private boolean alternateReducesEntropy(final List reads, final byte[] reference, final int leftmostIndex) { final int[] originalMismatchBases = new int[reference.length]; final int[] cleanedMismatchBases = new int[reference.length]; final int[] totalOriginalBases = new int[reference.length]; @@ -967,7 +978,7 @@ public class IndelRealigner extends ReadWalker { if ( read.getRead().getAlignmentBlocks().size() > 1 ) continue; - int refIdx = read.getOriginalAlignmentStart() - (int)leftmostIndex; + int refIdx = read.getOriginalAlignmentStart() - leftmostIndex; final byte[] readStr = read.getReadBases(); final byte[] quals = read.getBaseQualities(); @@ -983,7 +994,7 @@ public class IndelRealigner extends ReadWalker { } // reset and now do the calculation based on the cleaning - refIdx = read.getAlignmentStart() - (int)leftmostIndex; + refIdx = read.getAlignmentStart() - leftmostIndex; int altIdx = 0; Cigar c = read.getCigar(); for (int j = 0 ; j < c.numCigarElements() ; j++) { @@ -1031,7 +1042,7 @@ public class IndelRealigner extends ReadWalker { if ( snpsOutput != null ) { if ( didMismatch ) { sb.append(reads.get(0).getRead().getReferenceName() + ":"); - sb.append(((int)leftmostIndex + i)); + sb.append((leftmostIndex + i)); if ( stillMismatches ) sb.append(" SAME_SNP\n"); else diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java index 866b20df0..44ff3b0f9 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java @@ -37,7 +37,6 @@ public class IndelRealignerPerformanceTest extends WalkerTest { " -targetIntervals " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.realigner.intervals", 0, new ArrayList(0)); - // executeTest("testIndelRealignerWholeExome", spec2); - // TODO: Fix me Eric + executeTest("testIndelRealignerWholeExome", spec2); } } \ No newline at end of file