From 78718b8d6a88de69a7537769ca5b4c63bcac8169 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Sat, 18 Feb 2012 10:31:26 -0500 Subject: [PATCH] Adding Genotype Given Alleles mode to the HaplotypeCaller. It constructs the possible haplotypes via assembly and then injects the desired allele to be genotyped. --- .../traversals/TraverseActiveRegions.java | 3 +- .../broadinstitute/sting/utils/Haplotype.java | 134 +++++++++++++++++- 2 files changed, 133 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 92c508f85..3f24e6585 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -69,8 +69,7 @@ public class TraverseActiveRegions extends TraversalEngine haplotypeInsertLocation + altAlleleLength; iii-- ) { + newHaplotype[iii] = newHaplotype[iii-altAlleleLength]; + } + for( int iii = 0; iii < altAlleleLength; iii++ ) { + newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; + } + } else { // deletion + int refHaplotypeOffset = 0; + for( final CigarElement ce : haplotypeCigar.getCigarElements()) { + if(ce.getOperator() == CigarOperator.D) { refHaplotypeOffset += ce.getLength(); } + else if(ce.getOperator() == CigarOperator.I) { refHaplotypeOffset -= ce.getLength(); } + } + for( int iii = 0; iii < altAllele.length(); iii++ ) { + newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; + } + final int shift = refAllele.length() - altAllele.length(); + for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length - shift; iii++ ) { + newHaplotype[iii] = newHaplotype[iii+shift]; + } + for( int iii = 0; iii < shift; iii++ ) { + newHaplotype[iii+newHaplotype.length-shift] = paddedRef[refStart+refHaplotypeLength+refHaplotypeOffset+iii]; + } + } + } catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding + return getBases().clone(); + } + + return newHaplotype; } public static LinkedHashMap makeHaplotypeListFromAlleles(List alleleList, int startPos, ReferenceContext ref, @@ -169,4 +219,84 @@ public class Haplotype { return haplotypeMap; } + private static Integer getHaplotypeCoordinateForReferenceCoordinate( final int haplotypeStart, final Cigar haplotypeCigar, final int refCoord ) { + int readBases = 0; + int refBases = 0; + boolean fallsInsideDeletion = false; + + int goal = refCoord - haplotypeStart; // The goal is to move this many reference bases + boolean goalReached = refBases == goal; + + Iterator cigarElementIterator = haplotypeCigar.getCigarElements().iterator(); + while (!goalReached && cigarElementIterator.hasNext()) { + CigarElement cigarElement = cigarElementIterator.next(); + int shift = 0; + + if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) { + if (refBases + cigarElement.getLength() < goal) + shift = cigarElement.getLength(); + else + shift = goal - refBases; + + refBases += shift; + } + goalReached = refBases == goal; + + if (!goalReached && cigarElement.getOperator().consumesReadBases()) + readBases += cigarElement.getLength(); + + if (goalReached) { + // Is this base's reference position within this cigar element? Or did we use it all? + boolean endsWithinCigar = shift < cigarElement.getLength(); + + // If it isn't, we need to check the next one. There should *ALWAYS* be a next one + // since we checked if the goal coordinate is within the read length, so this is just a sanity check. + if (!endsWithinCigar && !cigarElementIterator.hasNext()) + return -1; + + CigarElement nextCigarElement; + + // if we end inside the current cigar element, we just have to check if it is a deletion + if (endsWithinCigar) + fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION; + + // if we end outside the current cigar element, we need to check if the next element is an insertion or deletion. + else { + nextCigarElement = cigarElementIterator.next(); + + // if it's an insertion, we need to clip the whole insertion before looking at the next element + if (nextCigarElement.getOperator() == CigarOperator.INSERTION) { + readBases += nextCigarElement.getLength(); + if (!cigarElementIterator.hasNext()) + return -1; + + nextCigarElement = cigarElementIterator.next(); + } + + // if it's a deletion, we will pass the information on to be handled downstream. + fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION; + } + + // If we reached our goal outside a deletion, add the shift + if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases()) + readBases += shift; + + // If we reached our goal inside a deletion, but the deletion is the next cigar element then we need + // to add the shift of the current cigar element but go back to it's last element to return the last + // base before the deletion (see warning in function contracts) + else if (fallsInsideDeletion && !endsWithinCigar) + readBases += shift - 1; + + // If we reached our goal inside a deletion then we must backtrack to the last base before the deletion + else if (fallsInsideDeletion && endsWithinCigar) + readBases--; + } + } + + if (!goalReached) + return -1; + + return (fallsInsideDeletion ? -1 : readBases); + } + }