Make the reference model calculation work with reduced reads.

It's just a matter of using PileupElement.getRepresentativeCount() instead of '++'.
This commit is contained in:
Eric Banks 2013-11-21 10:21:25 -05:00
parent adb77b406f
commit 0fac4fb3b6
2 changed files with 23 additions and 4 deletions

View File

@ -296,9 +296,9 @@ public class ReferenceConfidenceModel {
if( hqSoftClips != null && p.isNextToSoftClip() ) {
hqSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28));
}
result.AD_Ref_Any[1]++;
result.AD_Ref_Any[1] += p.getRepresentativeCount();
} else {
result.AD_Ref_Any[0]++;
result.AD_Ref_Any[0] += p.getRepresentativeCount();
}
result.genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual);
result.genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + MathUtils.LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + MathUtils.LOG_ONE_THIRD + MathUtils.LOG_ONE_HALF );
@ -483,7 +483,7 @@ public class ReferenceConfidenceModel {
// todo -- this code really should handle CIGARs directly instead of relying on the above tests
if ( isReadInformativeAboutIndelsOfSize(read.getReadBases(), read.getBaseQualities(), offset, ref, pileupOffsetIntoRef, maxIndelSize) ) {
nInformative++;
nInformative += p.getRepresentativeCount();
if( nInformative > MAX_N_INDEL_INFORMATIVE_READS ) {
return MAX_N_INDEL_INFORMATIVE_READS;
}

View File

@ -47,7 +47,6 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import net.sf.samtools.SAMFileHeader;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -164,6 +163,26 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
}
}
@Test
public void testCalcNIndelInformativeReducedReads() {
final String bases = "ACGGGTTTGGAC";
final byte[] quals = Utils.dupBytes((byte)30, bases.length());
final int count = 10;
final int[] counts = new int[bases.length()];
for ( int i = 0; i < counts.length; i++ )
counts[i] = count;
final int position = 100;
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialReducedRead(header, "foo", 0, position, counts.length, counts);
read.setReadString(bases);
read.setBaseQualities(quals);
read.setCigarString(bases.length() + "M");
final GenomeLoc loc = new UnvalidatingGenomeLoc("20", 0, position, position);
final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, Collections.singletonList(read), 0);
final int actual = model.calcNIndelInformativeReads(pileup, 0, bases.getBytes(), 3);
Assert.assertEquals(actual, count);
}
@Test
public void testClose() {
model.close();