From 7d7ce6cf00db73c437cf709b05501b607a219b7e Mon Sep 17 00:00:00 2001 From: delangel Date: Thu, 5 May 2011 17:08:34 +0000 Subject: [PATCH] Two embarassing bug fixes: a) Forgot to convert from phred to log-prob when computing gap penalties from recal table. b) Forgot to uncomment code to correctly deal with hard-clipped bases in a read. But because of this, had to do a short term workaround to at least temporarily return class from hardClipAdaptorSequence to GATKSAMRecord. Otherwise, I get exceptions when casting because somehow some reads in HiSeq get to be SAMRecord (which GATKSAMRecord inherits from) but some reads get to be BAMRecords (which can't be cast into GATKSAMRecord), not sure why. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5771 348d0f76-0448-11de-a6fe-93d51630548a --- ...delGenotypeLikelihoodsCalculationModel.java | 3 ++- .../walkers/indels/PairHMMIndelErrorModel.java | 4 ++-- .../sting/utils/sam/ReadUtils.java | 18 +++++++++--------- .../UnifiedGenotyperIntegrationTest.java | 6 +++--- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 2a5ebf027..37ede7f2c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -43,6 +43,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broad.tribble.util.variantcontext.Allele; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.*; @@ -130,7 +131,7 @@ private HaplotypeIndelErrorModel model; for ( ExtendedEventPileupElement p : indelPileup.toExtendedIterable() ) { //SAMRecord read = p.getRead(); - SAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead()); + GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead()); if (read == null) continue; if(ReadUtils.is454Read(read)) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 4e328ded3..e84a05a7d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -751,7 +751,7 @@ public class PairHMMIndelErrorModel { } } for (SAMRecord pread : pileup.getReads()) { - SAMRecord read = ReadUtils.hardClipAdaptorSequence(pread); + GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(pread); if (read == null) continue; @@ -781,7 +781,7 @@ public class PairHMMIndelErrorModel { qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey); } - recalQuals[offset] = (double)qualityScore; + recalQuals[offset] = -((double)qualityScore)/10.0; } // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi)) diff --git a/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 02a1bdb5d..e30584113 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -182,10 +182,10 @@ public class ReadUtils { * @param adaptorLength length of adaptor sequence * @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped */ - public static SAMRecord hardClipAdaptorSequence(final SAMRecord rec, int adaptorLength) { + public static GATKSAMRecord hardClipAdaptorSequence(final SAMRecord rec, int adaptorLength) { Pair adaptorBoundaries = getAdaptorBoundaries(rec, adaptorLength); - SAMRecord result = rec; + GATKSAMRecord result = (GATKSAMRecord)rec; if ( adaptorBoundaries != null ) { if ( rec.getReadNegativeStrandFlag() && adaptorBoundaries.second >= rec.getAlignmentStart() && adaptorBoundaries.first < rec.getAlignmentEnd() ) @@ -198,7 +198,7 @@ public class ReadUtils { } // return true if the read needs to be completely clipped - private static SAMRecord hardClipStartOfRead(SAMRecord oldRec, int stopPosition) { + private static GATKSAMRecord hardClipStartOfRead(SAMRecord oldRec, int stopPosition) { if ( stopPosition >= oldRec.getAlignmentEnd() ) { // BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it @@ -206,9 +206,9 @@ public class ReadUtils { return null; } - SAMRecord rec; + GATKSAMRecord rec; try { - rec = (SAMRecord)oldRec.clone(); + rec = (GATKSAMRecord)oldRec.clone(); } catch (Exception e) { return null; } @@ -278,7 +278,7 @@ public class ReadUtils { return rec; } - private static SAMRecord hardClipEndOfRead(SAMRecord oldRec, int startPosition) { + private static GATKSAMRecord hardClipEndOfRead(SAMRecord oldRec, int startPosition) { if ( startPosition <= oldRec.getAlignmentStart() ) { // BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it @@ -286,9 +286,9 @@ public class ReadUtils { return null; } - SAMRecord rec; + GATKSAMRecord rec; try { - rec = (SAMRecord)oldRec.clone(); + rec = (GATKSAMRecord)oldRec.clone(); } catch (Exception e) { return null; } @@ -430,7 +430,7 @@ public class ReadUtils { * @param rec original SAM record * @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped */ - public static SAMRecord hardClipAdaptorSequence(final SAMRecord rec) { + public static GATKSAMRecord hardClipAdaptorSequence(final SAMRecord rec) { return hardClipAdaptorSequence(rec, DEFAULT_ADAPTOR_SIZE); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 49c2459d8..773c1bca9 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -34,7 +34,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testMultiSamplePilot2AndRecallingWithAlleles() { - String md5 = "87a99063152ca935a1bec87ef19e0dad"; + String md5 = "93d2571e686740c5c775b1fb862b62ec"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1, @@ -248,7 +248,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("84bc209f38d60f325f1a8b6292a82c82")); + Arrays.asList("3a6ba2d9b9a5c606389d3353bb27bbe8")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -276,7 +276,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("e9b4fef2cdfa4c4657f0df53309131b6")); + Arrays.asList("592a214b8ca1b62733f9627adb631f16")); executeTest(String.format("test indel calling, multiple technologies"), spec); }