Adding Genotype Given Alleles mode to the HaplotypeCaller. It constructs the possible haplotypes via assembly and then injects the desired allele to be genotyped.
This commit is contained in:
parent
30085781cf
commit
78718b8d6a
|
|
@ -69,8 +69,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) {
|
for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) {
|
||||||
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
|
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
|
||||||
if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
|
if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
|
||||||
final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( null, null, null )
|
final double isActiveProb = ( walker.presetActiveRegions == null ? 0.0 : ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
|
||||||
: ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
|
|
||||||
isActiveList.add( isActiveProb );
|
isActiveList.add( isActiveProb );
|
||||||
if( firstIsActiveStart == null ) {
|
if( firstIsActiveStart == null ) {
|
||||||
firstIsActiveStart = fakeLoc;
|
firstIsActiveStart = fakeLoc;
|
||||||
|
|
|
||||||
|
|
@ -24,11 +24,17 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
import net.sf.samtools.Cigar;
|
||||||
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.CigarOperator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -109,8 +115,52 @@ public class Haplotype {
|
||||||
return isReference;
|
return isReference;
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] insertAllele( final Allele a ) {
|
public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final byte[] paddedRef, final int refStart,
|
||||||
return getBases();
|
final Cigar haplotypeCigar, final int numBasesAddedToStartOfHaplotype, final int refHaplotypeLength ) {
|
||||||
|
|
||||||
|
if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
|
||||||
|
int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(refStart + numBasesAddedToStartOfHaplotype, haplotypeCigar, refInsertLocation);
|
||||||
|
if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
|
||||||
|
return getBases().clone();
|
||||||
|
}
|
||||||
|
haplotypeInsertLocation += numBasesAddedToStartOfHaplotype;
|
||||||
|
final byte[] newHaplotype = getBases().clone();
|
||||||
|
|
||||||
|
try {
|
||||||
|
if( refAllele.length() == altAllele.length() ) { // SNP or MNP
|
||||||
|
for( int iii = 0; iii < altAllele.length(); iii++ ) {
|
||||||
|
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||||
|
}
|
||||||
|
} else if( refAllele.length() < altAllele.length() ) { // insertion
|
||||||
|
final int altAlleleLength = altAllele.length();
|
||||||
|
for( int iii = newHaplotype.length -1; iii > haplotypeInsertLocation + altAlleleLength; iii-- ) {
|
||||||
|
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
|
||||||
|
}
|
||||||
|
for( int iii = 0; iii < altAlleleLength; iii++ ) {
|
||||||
|
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||||
|
}
|
||||||
|
} else { // deletion
|
||||||
|
int refHaplotypeOffset = 0;
|
||||||
|
for( final CigarElement ce : haplotypeCigar.getCigarElements()) {
|
||||||
|
if(ce.getOperator() == CigarOperator.D) { refHaplotypeOffset += ce.getLength(); }
|
||||||
|
else if(ce.getOperator() == CigarOperator.I) { refHaplotypeOffset -= ce.getLength(); }
|
||||||
|
}
|
||||||
|
for( int iii = 0; iii < altAllele.length(); iii++ ) {
|
||||||
|
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||||
|
}
|
||||||
|
final int shift = refAllele.length() - altAllele.length();
|
||||||
|
for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length - shift; iii++ ) {
|
||||||
|
newHaplotype[iii] = newHaplotype[iii+shift];
|
||||||
|
}
|
||||||
|
for( int iii = 0; iii < shift; iii++ ) {
|
||||||
|
newHaplotype[iii+newHaplotype.length-shift] = paddedRef[refStart+refHaplotypeLength+refHaplotypeOffset+iii];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
|
||||||
|
return getBases().clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
return newHaplotype;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
|
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
|
||||||
|
|
@ -169,4 +219,84 @@ public class Haplotype {
|
||||||
return haplotypeMap;
|
return haplotypeMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Integer getHaplotypeCoordinateForReferenceCoordinate( final int haplotypeStart, final Cigar haplotypeCigar, final int refCoord ) {
|
||||||
|
int readBases = 0;
|
||||||
|
int refBases = 0;
|
||||||
|
boolean fallsInsideDeletion = false;
|
||||||
|
|
||||||
|
int goal = refCoord - haplotypeStart; // The goal is to move this many reference bases
|
||||||
|
boolean goalReached = refBases == goal;
|
||||||
|
|
||||||
|
Iterator<CigarElement> cigarElementIterator = haplotypeCigar.getCigarElements().iterator();
|
||||||
|
while (!goalReached && cigarElementIterator.hasNext()) {
|
||||||
|
CigarElement cigarElement = cigarElementIterator.next();
|
||||||
|
int shift = 0;
|
||||||
|
|
||||||
|
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
|
||||||
|
if (refBases + cigarElement.getLength() < goal)
|
||||||
|
shift = cigarElement.getLength();
|
||||||
|
else
|
||||||
|
shift = goal - refBases;
|
||||||
|
|
||||||
|
refBases += shift;
|
||||||
|
}
|
||||||
|
goalReached = refBases == goal;
|
||||||
|
|
||||||
|
if (!goalReached && cigarElement.getOperator().consumesReadBases())
|
||||||
|
readBases += cigarElement.getLength();
|
||||||
|
|
||||||
|
if (goalReached) {
|
||||||
|
// Is this base's reference position within this cigar element? Or did we use it all?
|
||||||
|
boolean endsWithinCigar = shift < cigarElement.getLength();
|
||||||
|
|
||||||
|
// If it isn't, we need to check the next one. There should *ALWAYS* be a next one
|
||||||
|
// since we checked if the goal coordinate is within the read length, so this is just a sanity check.
|
||||||
|
if (!endsWithinCigar && !cigarElementIterator.hasNext())
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
CigarElement nextCigarElement;
|
||||||
|
|
||||||
|
// if we end inside the current cigar element, we just have to check if it is a deletion
|
||||||
|
if (endsWithinCigar)
|
||||||
|
fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
|
||||||
|
|
||||||
|
// if we end outside the current cigar element, we need to check if the next element is an insertion or deletion.
|
||||||
|
else {
|
||||||
|
nextCigarElement = cigarElementIterator.next();
|
||||||
|
|
||||||
|
// if it's an insertion, we need to clip the whole insertion before looking at the next element
|
||||||
|
if (nextCigarElement.getOperator() == CigarOperator.INSERTION) {
|
||||||
|
readBases += nextCigarElement.getLength();
|
||||||
|
if (!cigarElementIterator.hasNext())
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
nextCigarElement = cigarElementIterator.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
// if it's a deletion, we will pass the information on to be handled downstream.
|
||||||
|
fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we reached our goal outside a deletion, add the shift
|
||||||
|
if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
|
||||||
|
readBases += shift;
|
||||||
|
|
||||||
|
// If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
|
||||||
|
// to add the shift of the current cigar element but go back to it's last element to return the last
|
||||||
|
// base before the deletion (see warning in function contracts)
|
||||||
|
else if (fallsInsideDeletion && !endsWithinCigar)
|
||||||
|
readBases += shift - 1;
|
||||||
|
|
||||||
|
// If we reached our goal inside a deletion then we must backtrack to the last base before the deletion
|
||||||
|
else if (fallsInsideDeletion && endsWithinCigar)
|
||||||
|
readBases--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!goalReached)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
return (fallsInsideDeletion ? -1 : readBases);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue