From c6ded4d23c69247700504ac5314fb56e27dfb2e1 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 5 Mar 2012 17:54:42 -0500 Subject: [PATCH] Bug fix for hard clipping reads when base insertion and base deletion qualities are present in the read. Updating HaplotypeCaller integration tests to reflect all the recent changes. --- .../sting/utils/clipping/ClippingOp.java | 10 ++++++ .../sting/utils/fragments/FragmentUtils.java | 36 +++++++++++-------- .../sting/utils/sam/GATKSAMRecord.java | 4 +++ 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index fb133d902..62a67a1f2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -4,6 +4,7 @@ import com.google.java.contract.Requires; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -314,6 +315,15 @@ public class ClippingOp { if (start == 0) hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar)); + if (read.hasBaseIndelQualities()) { + byte[] newBaseInsertionQuals = new byte[newLength]; + byte[] newBaseDeletionQuals = new byte[newLength]; + System.arraycopy(read.getBaseInsertionQualities(), copyStart, newBaseInsertionQuals, 0, newLength); + System.arraycopy(read.getBaseDeletionQualities(), copyStart, newBaseDeletionQuals, 0, newLength); + hardClippedRead.setBaseQualities(newBaseInsertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION); + hardClippedRead.setBaseQualities(newBaseDeletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION); + } + return hardClippedRead; } diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java index 7104b1edd..eea45567f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java @@ -151,23 +151,16 @@ public class FragmentUtils { final int numBases = firstReadStop + secondRead.getReadLength(); final byte[] bases = new byte[numBases]; final byte[] quals = new byte[numBases]; - // BUGBUG: too verbose, clean this up. final byte[] insertionQuals = new byte[numBases]; final byte[] deletionQuals = new byte[numBases]; final byte[] firstReadBases = firstRead.getReadBases(); final byte[] firstReadQuals = firstRead.getBaseQualities(); - final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities(); - final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities(); final byte[] secondReadBases = secondRead.getReadBases(); final byte[] secondReadQuals = secondRead.getBaseQualities(); - final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities(); - final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities(); for(int iii = 0; iii < firstReadStop; iii++) { bases[iii] = firstReadBases[iii]; quals[iii] = firstReadQuals[iii]; - insertionQuals[iii] = firstReadInsertionQuals[iii]; - deletionQuals[iii] = firstReadDeletionQuals[iii]; } for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) { if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) { @@ -175,22 +168,16 @@ public class FragmentUtils { } bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] ); quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] ); - insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score - deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score } for(int iii = firstRead.getReadLength(); iii < numBases; iii++) { bases[iii] = secondReadBases[iii-firstReadStop]; quals[iii] = secondReadQuals[iii-firstReadStop]; - insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop]; - deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop]; } final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader()); returnRead.setAlignmentStart(firstRead.getUnclippedStart()); returnRead.setReadBases( bases ); - returnRead.setBaseQualities( quals, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ); - returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION ); - returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION ); + returnRead.setBaseQualities( quals ); returnRead.setReadGroup( firstRead.getReadGroup() ); returnRead.setReferenceName( firstRead.getReferenceName() ); final CigarElement c = new CigarElement(bases.length, CigarOperator.M); @@ -199,6 +186,27 @@ public class FragmentUtils { returnRead.setCigar( new Cigar( cList )); returnRead.setMappingQuality( firstRead.getMappingQuality() ); + if( firstRead.hasBaseIndelQualities() || secondRead.hasBaseIndelQualities() ) { + final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities(); + final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities(); + final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities(); + final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities(); + for(int iii = 0; iii < firstReadStop; iii++) { + insertionQuals[iii] = firstReadInsertionQuals[iii]; + deletionQuals[iii] = firstReadDeletionQuals[iii]; + } + for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) { + insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest *base* quality score + deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest *base* quality score + } + for(int iii = firstRead.getReadLength(); iii < numBases; iii++) { + insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop]; + deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop]; + } + returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION ); + returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION ); + } + final ArrayList returnList = new ArrayList(); returnList.add(returnRead); return returnList; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index f5a9b2f45..648dafb81 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -194,6 +194,10 @@ public class GATKSAMRecord extends BAMRecord { } } + public boolean hasBaseIndelQualities() { + return getAttribute( BQSR_BASE_INSERTION_QUALITIES ) != null || getAttribute( BQSR_BASE_DELETION_QUALITIES ) != null; + } + public byte[] getBaseInsertionQualities() { byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_INSERTION_QUALITIES ) ); if( quals == null ) {