Bug fix in HaplotypeCaller for non-regular bases in the reference or reads. Those events don't get created any more. Bug fix for advanced GenotypeFullActiveRegion mode: custom variant annotations created by the HC don't make sense when in this mode so don't try to calculate them.
This commit is contained in:
parent
97b191f578
commit
c67d708c51
|
|
@ -56,8 +56,12 @@ public class GenotypingEngine {
|
||||||
|
|
||||||
// This function is the streamlined approach, currently not being used
|
// This function is the streamlined approach, currently not being used
|
||||||
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
|
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
|
||||||
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc,
|
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine,
|
||||||
final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser ) {
|
final ArrayList<Haplotype> haplotypes,
|
||||||
|
final byte[] ref,
|
||||||
|
final GenomeLoc refLoc,
|
||||||
|
final GenomeLoc activeRegionWindow,
|
||||||
|
final GenomeLocParser genomeLocParser ) {
|
||||||
// Prepare the list of haplotype indices to genotype
|
// Prepare the list of haplotype indices to genotype
|
||||||
final ArrayList<Allele> allelesToGenotype = new ArrayList<Allele>();
|
final ArrayList<Allele> allelesToGenotype = new ArrayList<Allele>();
|
||||||
|
|
||||||
|
|
@ -224,7 +228,6 @@ public class GenotypingEngine {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Walk along each position in the key set and create each event to be outputted
|
// Walk along each position in the key set and create each event to be outputted
|
||||||
for( final int loc : startPosKeySet ) {
|
for( final int loc : startPosKeySet ) {
|
||||||
if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) {
|
if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) {
|
||||||
|
|
@ -533,24 +536,36 @@ public class GenotypingEngine {
|
||||||
final int elementLength = ce.getLength();
|
final int elementLength = ce.getLength();
|
||||||
switch( ce.getOperator() ) {
|
switch( ce.getOperator() ) {
|
||||||
case I:
|
case I:
|
||||||
|
{
|
||||||
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
|
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
|
||||||
final int insertionStart = refLoc.getStart() + refPos - 1;
|
final int insertionStart = refLoc.getStart() + refPos - 1;
|
||||||
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
|
final byte refByte = ref[refPos-1];
|
||||||
|
if( BaseUtils.isRegularBase(refByte) ) {
|
||||||
|
insertionAlleles.add( Allele.create(refByte, true) );
|
||||||
|
}
|
||||||
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
|
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
|
||||||
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||||
} else {
|
} else {
|
||||||
byte[] insertionBases = new byte[]{};
|
byte[] insertionBases = new byte[]{};
|
||||||
insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
|
insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
|
||||||
insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
|
insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
|
||||||
|
if( BaseUtils.isAllRegularBases(insertionBases) ) {
|
||||||
insertionAlleles.add( Allele.create(insertionBases, false) );
|
insertionAlleles.add( Allele.create(insertionBases, false) );
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele
|
||||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
||||||
|
}
|
||||||
alignmentPos += elementLength;
|
alignmentPos += elementLength;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case S:
|
case S:
|
||||||
|
{
|
||||||
alignmentPos += elementLength;
|
alignmentPos += elementLength;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case D:
|
case D:
|
||||||
|
{
|
||||||
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
|
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
|
||||||
final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
|
final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
|
||||||
final int deletionStart = refLoc.getStart() + refPos - 1;
|
final int deletionStart = refLoc.getStart() + refPos - 1;
|
||||||
|
|
@ -561,15 +576,20 @@ public class GenotypingEngine {
|
||||||
// deletionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
// deletionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||||
// vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make());
|
// vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make());
|
||||||
//} else {
|
//} else {
|
||||||
|
final byte refByte = ref[refPos-1];
|
||||||
|
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) {
|
||||||
deletionAlleles.add( Allele.create(deletionBases, true) );
|
deletionAlleles.add( Allele.create(deletionBases, true) );
|
||||||
deletionAlleles.add( Allele.create(ref[refPos-1], false) );
|
deletionAlleles.add( Allele.create(refByte, false) );
|
||||||
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
|
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
|
||||||
|
}
|
||||||
//}
|
//}
|
||||||
refPos += elementLength;
|
refPos += elementLength;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case M:
|
case M:
|
||||||
case EQ:
|
case EQ:
|
||||||
case X:
|
case X:
|
||||||
|
{
|
||||||
int numSinceMismatch = -1;
|
int numSinceMismatch = -1;
|
||||||
int stopOfMismatch = -1;
|
int stopOfMismatch = -1;
|
||||||
int startOfMismatch = -1;
|
int startOfMismatch = -1;
|
||||||
|
|
@ -592,11 +612,13 @@ public class GenotypingEngine {
|
||||||
if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) {
|
if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) {
|
||||||
final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 );
|
final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 );
|
||||||
final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 );
|
final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 );
|
||||||
|
if( BaseUtils.isAllRegularBases(refBases) && BaseUtils.isAllRegularBases(mismatchBases) ) {
|
||||||
final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
|
final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
|
||||||
snpAlleles.add( Allele.create( refBases, true ) );
|
snpAlleles.add( Allele.create( refBases, true ) );
|
||||||
snpAlleles.add( Allele.create( mismatchBases, false ) );
|
snpAlleles.add( Allele.create( mismatchBases, false ) );
|
||||||
final int snpStart = refLoc.getStart() + refPosStartOfMismatch;
|
final int snpStart = refLoc.getStart() + refPosStartOfMismatch;
|
||||||
vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make());
|
vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make());
|
||||||
|
}
|
||||||
numSinceMismatch = -1;
|
numSinceMismatch = -1;
|
||||||
stopOfMismatch = -1;
|
stopOfMismatch = -1;
|
||||||
startOfMismatch = -1;
|
startOfMismatch = -1;
|
||||||
|
|
@ -606,6 +628,7 @@ public class GenotypingEngine {
|
||||||
alignmentPos++;
|
alignmentPos++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case N:
|
case N:
|
||||||
case H:
|
case H:
|
||||||
case P:
|
case P:
|
||||||
|
|
|
||||||
|
|
@ -413,15 +413,17 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
|
|
||||||
for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult :
|
for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult :
|
||||||
( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
|
( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
|
||||||
? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser() )
|
? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getExtendedLoc(), getToolkit().getGenomeLocParser() )
|
||||||
: genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
|
: genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
|
||||||
if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }
|
if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }
|
||||||
|
|
||||||
final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult );
|
final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult );
|
||||||
final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
|
final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
|
||||||
|
|
||||||
// add some custom annotations to the calls
|
|
||||||
final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
|
final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
|
||||||
|
|
||||||
|
if( !GENOTYPE_FULL_ACTIVE_REGION ) {
|
||||||
|
// add some custom annotations to the calls
|
||||||
|
|
||||||
// Calculate the number of variants on the haplotype
|
// Calculate the number of variants on the haplotype
|
||||||
int maxNumVar = 0;
|
int maxNumVar = 0;
|
||||||
for( final Allele allele : callResult.getFirst().getAlleles() ) {
|
for( final Allele allele : callResult.getFirst().getAlleles() ) {
|
||||||
|
|
@ -453,6 +455,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
if( annotatedCall.hasAttribute("QD") ) {
|
if( annotatedCall.hasAttribute("QD") ) {
|
||||||
myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) );
|
myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) );
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() );
|
vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() );
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -179,7 +179,6 @@ public class LikelihoodCalculationEngine {
|
||||||
final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
|
final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
|
||||||
for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
|
for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
|
||||||
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
|
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
|
||||||
// log10(10^(a*x1) + 10^(b*x2)) ???
|
|
||||||
// First term is approximated by Jacobian log with table lookup.
|
// First term is approximated by Jacobian log with table lookup.
|
||||||
haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF );
|
haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF );
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -181,7 +181,6 @@ public class UnifiedArgumentCollection {
|
||||||
Generalized ploidy argument (debug only): When building site error models, ignore lane information and build only
|
Generalized ploidy argument (debug only): When building site error models, ignore lane information and build only
|
||||||
sample-level error model
|
sample-level error model
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@Argument(fullName = "ignoreLaneInfo", shortName = "ignoreLane", doc = "Ignore lane when building error model, error model is then per-site", required = false)
|
@Argument(fullName = "ignoreLaneInfo", shortName = "ignoreLane", doc = "Ignore lane when building error model, error model is then per-site", required = false)
|
||||||
public boolean IGNORE_LANE_INFO = false;
|
public boolean IGNORE_LANE_INFO = false;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -227,14 +227,21 @@ public class BaseUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Deprecated
|
@Deprecated
|
||||||
static public boolean isRegularBase(char base) {
|
static public boolean isRegularBase( final char base ) {
|
||||||
return simpleBaseToBaseIndex(base) != -1;
|
return simpleBaseToBaseIndex(base) != -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static public boolean isRegularBase(byte base) {
|
static public boolean isRegularBase( final byte base ) {
|
||||||
return simpleBaseToBaseIndex(base) != -1;
|
return simpleBaseToBaseIndex(base) != -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static public boolean isAllRegularBases( final byte[] bases ) {
|
||||||
|
for( final byte base : bases) {
|
||||||
|
if( !isRegularBase(base) ) { return false; }
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static public boolean isNBase(byte base) {
|
static public boolean isNBase(byte base) {
|
||||||
return base == 'N' || base == 'n';
|
return base == 'N' || base == 'n';
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue