From 75543a3f2217b6aa27e70ee181d4250d1a359d58 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 13 Jul 2012 18:50:27 -0400 Subject: [PATCH] ReadClipper.clipRead's claim that it doesn't modify the original read was false. Ultimately, GATKSAMRecord.clone (as documented) creates a soft copy of the read - so modifying e.g. the bases of the cloned read means that you modify the bases of the original read too. Because of this, when the BQSRv2 Context covariate was writing Ns over the low quality tails of the reads they got propagated out to the output BAM file (very bad). I've updated the ReadClipper docs and cleaned up the code (no reason to use a clone of the read anymore given that we are already modifying the original). For now, the simplest thing is to have the Context covariate store the original bases, overwrite low quality Ns, compute covariates, and rewrite the original bases; we can update later if needed. --- .../gatk/walkers/bqsr/ContextCovariate.java | 5 ++ .../sting/utils/clipping/ReadClipper.java | 50 +++++++++---------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java index 7d55c620b..17da89f29 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java @@ -82,6 +82,8 @@ public class ContextCovariate implements StandardCovariate { @Override public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { + // store the original bases and then write Ns over low quality ones + final byte[] originalBases = read.getReadBases(); final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag(); @@ -99,6 +101,9 @@ public class ContextCovariate implements StandardCovariate { final int indelKey = indelKeys.get(i); values.addCovariate(mismatchKeys.get(i), indelKey, indelKey, (negativeStrand ? readLength - i - 1 : i)); } + + // put the original bases back in + read.setReadBases(originalBases); } // Used to get the covariate's value from input csv file during on-the-fly recalibration diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index d438de900..ba9267222 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -110,37 +110,32 @@ public class ReadClipper { } /** - * Creates a new read that's been clipped according to ops and the chosen algorithm. - * The original read is unmodified. + * Clips a read according to ops and the chosen algorithm. * * @param algorithm What mode of clipping do you want to apply for the stacked operations. - * @return a new read with the clipping applied. + * @return the read with the clipping applied. */ public GATKSAMRecord clipRead(ClippingRepresentation algorithm) { if (ops == null) return getRead(); - else { - try { - GATKSAMRecord clippedRead = (GATKSAMRecord) read.clone(); - for (ClippingOp op : getOps()) { - //check if the clipped read can still be clipped in the range requested - if (op.start < clippedRead.getReadLength()) { - ClippingOp fixedOperation = op; - if (op.stop >= clippedRead.getReadLength()) - fixedOperation = new ClippingOp(op.start, clippedRead.getReadLength() - 1); - clippedRead = fixedOperation.apply(algorithm, clippedRead); - } - } - wasClipped = true; - ops.clear(); - if ( clippedRead.isEmpty() ) - return GATKSAMRecord.emptyRead(clippedRead); - return clippedRead; - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); // this should never happen + GATKSAMRecord clippedRead = read; + for (ClippingOp op : getOps()) { + final int readLength = clippedRead.getReadLength(); + //check if the clipped read can still be clipped in the range requested + if (op.start < readLength) { + ClippingOp fixedOperation = op; + if (op.stop >= readLength) + fixedOperation = new ClippingOp(op.start, readLength - 1); + + clippedRead = fixedOperation.apply(algorithm, clippedRead); } } + wasClipped = true; + ops.clear(); + if ( clippedRead.isEmpty() ) + return GATKSAMRecord.emptyRead(clippedRead); + return clippedRead; } @@ -241,20 +236,21 @@ public class ReadClipper { if (read.isEmpty()) return read; - byte [] quals = read.getBaseQualities(); + final byte [] quals = read.getBaseQualities(); + final int readLength = read.getReadLength(); int leftClipIndex = 0; - int rightClipIndex = read.getReadLength() - 1; + int rightClipIndex = readLength - 1; // check how far we can clip both sides while (rightClipIndex >= 0 && quals[rightClipIndex] <= lowQual) rightClipIndex--; - while (leftClipIndex < read.getReadLength() && quals[leftClipIndex] <= lowQual) leftClipIndex++; + while (leftClipIndex < readLength && quals[leftClipIndex] <= lowQual) leftClipIndex++; // if the entire read should be clipped, then return an empty read. if (leftClipIndex > rightClipIndex) return GATKSAMRecord.emptyRead(read); - if (rightClipIndex < read.getReadLength() - 1) { - this.addOp(new ClippingOp(rightClipIndex + 1, read.getReadLength() - 1)); + if (rightClipIndex < readLength - 1) { + this.addOp(new ClippingOp(rightClipIndex + 1, readLength - 1)); } if (leftClipIndex > 0 ) { this.addOp(new ClippingOp(0, leftClipIndex - 1));