From 0d282a7750df16b154f87cc83e391188c70e1dab Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 16 Jan 2013 00:12:02 -0500 Subject: [PATCH] Bam writing from HaplotypeCaller seems to be working on all my test cases. Note that it's a hidden debugging option for now. Please let me know if you notice any bad behavior with it. --- .../haplotypecaller/HaplotypeCaller.java | 60 +++++++++++++++++-- .../LikelihoodCalculationEngine.java | 4 -- .../broadinstitute/sting/utils/Haplotype.java | 8 --- 3 files changed, 54 insertions(+), 18 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 04da91f65..4da2e1179 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -484,18 +484,17 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } if ( bamWriter != null ) { - // sort the haplotypes in coordinate order and then write them to the bam - Collections.sort( haplotypes, new Haplotype.HaplotypePositionComparator() ); + // write the haplotypes to the bam final GenomeLoc paddedRefLoc = getPaddedLoc(activeRegion); for ( Haplotype haplotype : haplotypes ) writeHaplotype(haplotype, paddedRefLoc, bestHaplotypes.contains(haplotype)); - // now, output the interesting reads for each sample + // next, output the interesting reads for each sample aligned against the appropriate haplotype for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) { for ( Map.Entry> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) { final Allele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue()); if ( bestAllele != Allele.NO_CALL ) - writeReadAgainstHaplotype(entry.getKey(), (Haplotype)bestAllele); + writeReadAgainstHaplotype(entry.getKey(), (Haplotype) bestAllele, paddedRefLoc.getStart()); } } } @@ -607,8 +606,8 @@ public class HaplotypeCaller extends ActiveRegionWalker implem readGroups.add(rg); bamHeader.setReadGroups(readGroups); + bamWriter.setPresorted(false); bamWriter.writeHeader(bamHeader); - bamWriter.setPresorted(true); } private void writeHaplotype(final Haplotype haplotype, final GenomeLoc paddedRefLoc, final boolean isAmongBestHaplotypes) { @@ -626,11 +625,60 @@ public class HaplotypeCaller extends ActiveRegionWalker implem bamWriter.addAlignment(record); } - private void writeReadAgainstHaplotype(final GATKSAMRecord read, final Haplotype haplotype) { + private void writeReadAgainstHaplotype(final GATKSAMRecord read, final Haplotype haplotype, final int referenceStart) { + final SWPairwiseAlignment swPairwiseAlignment = new SWPairwiseAlignment(haplotype.getBases(), read.getReadBases(), 5.0, -10.0, -22.0, -1.2); + final int readStartOnHaplotype = swPairwiseAlignment.getAlignmentStart2wrt1(); + final int readStartOnReference = referenceStart + haplotype.getAlignmentStartHapwrtRef() + readStartOnHaplotype; + read.setAlignmentStart(readStartOnReference); + final Cigar cigar = generateReadCigarFromHaplotype(read, readStartOnHaplotype, haplotype.getCigar()); + read.setCigar(cigar); + bamWriter.addAlignment(read); + } + private Cigar generateReadCigarFromHaplotype(final GATKSAMRecord read, final int readStartOnHaplotype, final Cigar haplotypeCigar) { + + int currentReadPos = 0; + int currentHapPos = 0; + final List readCigarElements = new ArrayList(); + + for ( final CigarElement cigarElement : haplotypeCigar.getCigarElements() ) { + + if ( cigarElement.getOperator() == CigarOperator.D ) { + if ( currentReadPos > 0 ) + readCigarElements.add(cigarElement); + } else if ( cigarElement.getOperator() == CigarOperator.M || cigarElement.getOperator() == CigarOperator.I ) { + + final int elementLength = cigarElement.getLength(); + final int nextReadPos = currentReadPos + elementLength; + final int nextHapPos = currentHapPos + elementLength; + + // do we want this element? + if ( currentReadPos > 0 ) { + // do we want the entire element? + if ( nextReadPos < read.getReadLength() ) { + readCigarElements.add(cigarElement); + currentReadPos = nextReadPos; + } + // otherwise, we can finish up and return the cigar + else { + readCigarElements.add(new CigarElement(read.getReadLength() - currentReadPos, cigarElement.getOperator())); + return new Cigar(readCigarElements); + } + } + // do we want part of the element to start? + else if ( currentReadPos == 0 && nextHapPos > readStartOnHaplotype ) { + currentReadPos = Math.min(nextHapPos - readStartOnHaplotype, read.getReadLength()); + readCigarElements.add(new CigarElement(currentReadPos, cigarElement.getOperator())); + } + + currentHapPos = nextHapPos; + } + } + + return new Cigar(readCigarElements); } /* diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index e05ad85a9..aafdbf126 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -145,10 +145,6 @@ public class LikelihoodCalculationEngine { for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { final Haplotype haplotype = haplotypes.get(jjj); - // TODO -- need to test against a reference/position with non-standard bases - //if ( !Allele.acceptableAlleleBases(haplotype.getBases(), false) ) - // continue; - final int haplotypeStart = ( previousHaplotypeSeen == null ? 0 : computeFirstDifferingPosition(haplotype.getBases(), previousHaplotypeSeen.getBases()) ); previousHaplotypeSeen = haplotype; diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index 4830bf053..8c40b9972 100644 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -211,14 +211,6 @@ public class Haplotype extends Allele { } } - public static class HaplotypePositionComparator implements Comparator, Serializable { - @Override - public int compare( final Haplotype hap1, final Haplotype hap2 ) { - final int comp = hap1.getAlignmentStartHapwrtRef() - hap2.getAlignmentStartHapwrtRef(); - return comp == 0 ? HaplotypeBaseComparator.compareHaplotypeBases(hap1, hap2) : comp; - } - } - public static LinkedHashMap makeHaplotypeListFromAlleles(final List alleleList, final int startPos, final ReferenceContext ref,