Fixes for running HaplotypeCaller with reduced reads: a) minor refactoring, pulled out code to compute mean representative count to ReadUtils, b) Don't use min representative count over kmer when constructing de Bruijn graph - this creates many paths with multiplicity=1 and makes us lose a lot of SNP's at edge of capture targets. Use mean instead
This commit is contained in:
parent
44f160f29f
commit
238d55cb61
|
|
@ -30,6 +30,7 @@ import com.google.java.contract.Requires;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
|
|
@ -92,7 +93,7 @@ public class LikelihoodCalculationEngine {
|
|||
final int[][] readCounts = new int[numHaplotypes][numReads];
|
||||
for( int iii = 0; iii < numReads; iii++ ) {
|
||||
final GATKSAMRecord read = reads.get(iii);
|
||||
final int readCount = getRepresentativeReadCount(read);
|
||||
final int readCount = ReadUtils.getMeanRepresentativeReadCount(read);
|
||||
|
||||
final byte[] overallGCP = new byte[read.getReadLength()];
|
||||
Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data?
|
||||
|
|
@ -123,15 +124,6 @@ public class LikelihoodCalculationEngine {
|
|||
}
|
||||
}
|
||||
|
||||
private static int getRepresentativeReadCount(GATKSAMRecord read) {
|
||||
if (!read.isReducedRead())
|
||||
return 1;
|
||||
|
||||
// compute mean representative read counts
|
||||
final byte[] counts = read.getReducedReadCounts();
|
||||
return MathUtils.sum(counts)/counts.length;
|
||||
}
|
||||
|
||||
private static int computeFirstDifferingPosition( final byte[] b1, final byte[] b2 ) {
|
||||
for( int iii = 0; iii < b1.length && iii < b2.length; iii++ ){
|
||||
if( b1[iii] != b2[iii] ) {
|
||||
|
|
|
|||
|
|
@ -198,8 +198,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
}
|
||||
int countNumber = 1;
|
||||
if (read.isReducedRead()) {
|
||||
// compute min (?) number of reduced read counts in current kmer span
|
||||
countNumber = MathUtils.arrayMin(Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1));
|
||||
// compute mean number of reduced read counts in current kmer span
|
||||
final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1);
|
||||
// precise rounding can make a difference with low consensus counts
|
||||
countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length);
|
||||
}
|
||||
|
||||
if( !badKmer ) {
|
||||
|
|
|
|||
|
|
@ -56,6 +56,15 @@ public class ReadUtils {
|
|||
private static int DEFAULT_ADAPTOR_SIZE = 100;
|
||||
public static int CLIPPING_GOAL_NOT_REACHED = -1;
|
||||
|
||||
public static int getMeanRepresentativeReadCount(GATKSAMRecord read) {
|
||||
if (!read.isReducedRead())
|
||||
return 1;
|
||||
|
||||
// compute mean representative read counts
|
||||
final byte[] counts = read.getReducedReadCounts();
|
||||
return (int)Math.round((double)MathUtils.sum(counts)/counts.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* A marker to tell which end of the read has been clipped
|
||||
*/
|
||||
|
|
|
|||
Loading…
Reference in New Issue