Fixes for running HaplotypeCaller with reduced reads: a) minor refactoring, pulled out code to compute mean representative count to ReadUtils, b) Don't use min representative count over kmer when constructing de Bruijn graph - this creates many paths with multiplicity=1 and makes us lose a lot of SNP's at edge of capture targets. Use mean instead

This commit is contained in:
Guillermo del Angel 2012-08-06 20:22:12 -04:00
parent 44f160f29f
commit 238d55cb61
3 changed files with 15 additions and 12 deletions

View File

@ -30,6 +30,7 @@ import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -92,7 +93,7 @@ public class LikelihoodCalculationEngine {
final int[][] readCounts = new int[numHaplotypes][numReads];
for( int iii = 0; iii < numReads; iii++ ) {
final GATKSAMRecord read = reads.get(iii);
final int readCount = getRepresentativeReadCount(read);
final int readCount = ReadUtils.getMeanRepresentativeReadCount(read);
final byte[] overallGCP = new byte[read.getReadLength()];
Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data?
@ -123,15 +124,6 @@ public class LikelihoodCalculationEngine {
}
}
private static int getRepresentativeReadCount(GATKSAMRecord read) {
if (!read.isReducedRead())
return 1;
// compute mean representative read counts
final byte[] counts = read.getReducedReadCounts();
return MathUtils.sum(counts)/counts.length;
}
private static int computeFirstDifferingPosition( final byte[] b1, final byte[] b2 ) {
for( int iii = 0; iii < b1.length && iii < b2.length; iii++ ){
if( b1[iii] != b2[iii] ) {

View File

@ -198,8 +198,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
}
int countNumber = 1;
if (read.isReducedRead()) {
// compute min (?) number of reduced read counts in current kmer span
countNumber = MathUtils.arrayMin(Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1));
// compute mean number of reduced read counts in current kmer span
final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1);
// precise rounding can make a difference with low consensus counts
countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length);
}
if( !badKmer ) {

View File

@ -56,6 +56,15 @@ public class ReadUtils {
private static int DEFAULT_ADAPTOR_SIZE = 100;
public static int CLIPPING_GOAL_NOT_REACHED = -1;
public static int getMeanRepresentativeReadCount(GATKSAMRecord read) {
if (!read.isReducedRead())
return 1;
// compute mean representative read counts
final byte[] counts = read.getReducedReadCounts();
return (int)Math.round((double)MathUtils.sum(counts)/counts.length);
}
/**
* A marker to tell which end of the read has been clipped
*/