Make the reference model calculation work with reduced reads.

It's just a matter of using PileupElement.getRepresentativeCount() instead of '++'.
2013-11-21 10:21:25 -05:00 · 2013-11-21 10:21:25 -05:00 · 0fac4fb3b6
parent adb77b406f
commit 0fac4fb3b6
2 changed files with 23 additions and 4 deletions
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReferenceConfidenceModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReferenceConfidenceModel.java
@ -296,9 +296,9 @@ public class ReferenceConfidenceModel {
                    if( hqSoftClips != null && p.isNextToSoftClip() ) {
                        hqSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28));
                    }
-                    result.AD_Ref_Any[1]++;
+                    result.AD_Ref_Any[1] += p.getRepresentativeCount();
                } else {
-                    result.AD_Ref_Any[0]++;
+                    result.AD_Ref_Any[0] += p.getRepresentativeCount();
                }
                result.genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual);
                result.genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + MathUtils.LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + MathUtils.LOG_ONE_THIRD + MathUtils.LOG_ONE_HALF );
@ -483,7 +483,7 @@ public class ReferenceConfidenceModel {

            // todo -- this code really should handle CIGARs directly instead of relying on the above tests
            if ( isReadInformativeAboutIndelsOfSize(read.getReadBases(), read.getBaseQualities(), offset, ref, pileupOffsetIntoRef, maxIndelSize) ) {
-                nInformative++;
+                nInformative += p.getRepresentativeCount();
                if( nInformative > MAX_N_INDEL_INFORMATIVE_READS ) {
                    return MAX_N_INDEL_INFORMATIVE_READS;
                }
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReferenceConfidenceModelUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReferenceConfidenceModelUnitTest.java
@ -47,7 +47,6 @@
 package org.broadinstitute.sting.gatk.walkers.haplotypecaller;

 import net.sf.samtools.SAMFileHeader;
-import org.apache.commons.lang.ArrayUtils;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
@ -164,6 +163,26 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
        }
    }

+    @Test
+    public void testCalcNIndelInformativeReducedReads() {
+        final String bases = "ACGGGTTTGGAC";
+        final byte[] quals = Utils.dupBytes((byte)30, bases.length());
+        final int count = 10;
+        final int[] counts = new int[bases.length()];
+        for ( int i = 0; i < counts.length; i++ )
+            counts[i] = count;
+        final int position = 100;
+
+        final GATKSAMRecord read = ArtificialSAMUtils.createArtificialReducedRead(header, "foo", 0, position, counts.length, counts);
+        read.setReadString(bases);
+        read.setBaseQualities(quals);
+        read.setCigarString(bases.length() + "M");
+        final GenomeLoc loc = new UnvalidatingGenomeLoc("20", 0, position, position);
+        final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, Collections.singletonList(read), 0);
+        final int actual = model.calcNIndelInformativeReads(pileup, 0, bases.getBytes(), 3);
+        Assert.assertEquals(actual, count);
+    }
+
    @Test
    public void testClose() {
        model.close();