Fixes for running HaplotypeCaller with reduced reads: a) minor refactoring, pulled out code to compute mean representative count to ReadUtils, b) Don't use min representative count over kmer when constructing de Bruijn graph - this creates many paths with multiplicity=1 and makes us lose a lot of SNP's at edge of capture targets. Use mean instead
This commit is contained in:
parent
44f160f29f
commit
238d55cb61
|
|
@ -30,6 +30,7 @@ import com.google.java.contract.Requires;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
|
@ -92,7 +93,7 @@ public class LikelihoodCalculationEngine {
|
||||||
final int[][] readCounts = new int[numHaplotypes][numReads];
|
final int[][] readCounts = new int[numHaplotypes][numReads];
|
||||||
for( int iii = 0; iii < numReads; iii++ ) {
|
for( int iii = 0; iii < numReads; iii++ ) {
|
||||||
final GATKSAMRecord read = reads.get(iii);
|
final GATKSAMRecord read = reads.get(iii);
|
||||||
final int readCount = getRepresentativeReadCount(read);
|
final int readCount = ReadUtils.getMeanRepresentativeReadCount(read);
|
||||||
|
|
||||||
final byte[] overallGCP = new byte[read.getReadLength()];
|
final byte[] overallGCP = new byte[read.getReadLength()];
|
||||||
Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data?
|
Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data?
|
||||||
|
|
@ -123,15 +124,6 @@ public class LikelihoodCalculationEngine {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int getRepresentativeReadCount(GATKSAMRecord read) {
|
|
||||||
if (!read.isReducedRead())
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
// compute mean representative read counts
|
|
||||||
final byte[] counts = read.getReducedReadCounts();
|
|
||||||
return MathUtils.sum(counts)/counts.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int computeFirstDifferingPosition( final byte[] b1, final byte[] b2 ) {
|
private static int computeFirstDifferingPosition( final byte[] b1, final byte[] b2 ) {
|
||||||
for( int iii = 0; iii < b1.length && iii < b2.length; iii++ ){
|
for( int iii = 0; iii < b1.length && iii < b2.length; iii++ ){
|
||||||
if( b1[iii] != b2[iii] ) {
|
if( b1[iii] != b2[iii] ) {
|
||||||
|
|
|
||||||
|
|
@ -198,8 +198,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
}
|
}
|
||||||
int countNumber = 1;
|
int countNumber = 1;
|
||||||
if (read.isReducedRead()) {
|
if (read.isReducedRead()) {
|
||||||
// compute min (?) number of reduced read counts in current kmer span
|
// compute mean number of reduced read counts in current kmer span
|
||||||
countNumber = MathUtils.arrayMin(Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1));
|
final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1);
|
||||||
|
// precise rounding can make a difference with low consensus counts
|
||||||
|
countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !badKmer ) {
|
if( !badKmer ) {
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,15 @@ public class ReadUtils {
|
||||||
private static int DEFAULT_ADAPTOR_SIZE = 100;
|
private static int DEFAULT_ADAPTOR_SIZE = 100;
|
||||||
public static int CLIPPING_GOAL_NOT_REACHED = -1;
|
public static int CLIPPING_GOAL_NOT_REACHED = -1;
|
||||||
|
|
||||||
|
public static int getMeanRepresentativeReadCount(GATKSAMRecord read) {
|
||||||
|
if (!read.isReducedRead())
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
// compute mean representative read counts
|
||||||
|
final byte[] counts = read.getReducedReadCounts();
|
||||||
|
return (int)Math.round((double)MathUtils.sum(counts)/counts.length);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A marker to tell which end of the read has been clipped
|
* A marker to tell which end of the read has been clipped
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue