Count Reads should use a Long instead of an Integer for counts to prevent overflows. Added unit test.

This commit is contained in:
Eric Banks 2013-05-07 12:23:24 -04:00
parent 1f3624d204
commit 58f4b81222
3 changed files with 61 additions and 7 deletions

View File

@ -66,11 +66,16 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
*/
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountReads extends ReadWalker<Integer, Integer> implements NanoSchedulable {
public class CountReads extends ReadWalker<Integer, Long> implements NanoSchedulable {
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
return 1;
}
@Override public Integer reduceInit() { return 0; }
@Override public Integer reduce(Integer value, Integer sum) { return value + sum; }
@Override public Long reduceInit() { return 0L; }
public Long reduce(Integer value, Long sum) { return (long) value + sum; }
public void onTraversalDone(Long result) {
logger.info("CountReads counted " + result + " reads in the traversal");
}
}

View File

@ -153,11 +153,11 @@ public class TraverseReadsUnitTest extends BaseTest {
countReadWalker.onTraversalDone(accumulator);
if (!(accumulator instanceof Integer)) {
fail("Count read walker should return an interger.");
if (!(accumulator instanceof Long)) {
fail("Count read walker should return a Long.");
}
if (((Integer) accumulator) != 10000) {
fail("there should be 10000 mapped reads in the index file, there was " + ((Integer) accumulator));
if (!accumulator.equals(new Long(10000))) {
fail("there should be 10000 mapped reads in the index file, there was " + (accumulator));
}
}

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.qc;
import org.testng.Assert;
import org.testng.annotations.Test;
public class CountReadsUnitTest {
@Test
public void testReadsDoNotOverflowInt() {
final CountReads walker = new CountReads();
final long moreThanMaxInt = ((long)Integer.MAX_VALUE) + 1L;
Long sum = walker.reduceInit();
for ( long i = 0L; i < moreThanMaxInt; i++ ) {
final Integer x = walker.map(null, null, null);
sum = walker.reduce(x, sum);
}
Assert.assertEquals(sum.longValue(), moreThanMaxInt);
}
}