diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticMutationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticMutationWalker.java index e00139c68..5fedc14bb 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticMutationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticMutationWalker.java @@ -1,20 +1,25 @@ package org.broadinstitute.sting.playground.gatk.walkers; +import net.sf.picard.reference.ReferenceSequence; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.util.StringUtil; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; -import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.walkers.By; -import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.playground.indels.SWPairwiseAlignment; import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods; -import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import java.util.List; -import java.util.HashMap; -import java.util.Map; +import java.io.IOException; +import java.util.*; @By(DataSource.REFERENCE) public class SomaticMutationWalker extends LocusWalker { @@ -78,10 +83,91 @@ public class SomaticMutationWalker extends LocusWalker { public static int MIN_QSCORE = 13; - public static double TUMOR_LOD = 6.3d; -// public static double TUMOR_LOD = 1.0d; +// public static double TUMOR_LOD = 6.3d; + public static double TUMOR_LOD = 4.0d; public static double NORMAL_LOD = 2.3d; + private String cachedContigName; + private byte[] cachedContig; + + private boolean outputFailures = true; + + private static class LocusReadPile { + private char refBase; + public List reads = new ArrayList(); + public List offsets = new ArrayList(); + public GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(); + public QualitySums qualitySums = new QualitySums(); + + public LocusReadPile(char refBase) { + this.refBase = refBase; + } + + public void add(SAMRecord read, int offset) { + char base = read.getReadString().charAt(offset); + byte qual = read.getBaseQualities()[offset]; + + if (base == 'N' || base == 'n') { return; } + + reads.add(read); + offsets.add(offset); + + + likelihoods.add(refBase, base, qual); + if (qual > MIN_QSCORE) qualitySums.incrementSum(base, qual); + + } + + public String getLocusBases() { + StringBuilder sb = new StringBuilder(); + for(int i=0; i midp = new HashMap(); @@ -109,14 +195,9 @@ public class SomaticMutationWalker extends LocusWalker { List reads = context.getReads(); - GenotypeLikelihoods tumorGL = new GenotypeLikelihoods(); - GenotypeLikelihoods normalGL = new GenotypeLikelihoods(); + LocusReadPile tumorReadPile = new LocusReadPile(ref); + LocusReadPile normalReadPile = new LocusReadPile(ref); - QualitySums tumorQualitySums = new QualitySums(); - QualitySums normalQualitySums = new QualitySums(); - - StringBuilder tumorBases = new StringBuilder(); - StringBuilder normalBases = new StringBuilder(); for ( int i = 0; i < reads.size(); i++ ) { SAMRecord read = reads.get(i); @@ -126,7 +207,7 @@ public class SomaticMutationWalker extends LocusWalker { read.getReadUnmappedFlag() || read.getMappingQuality() <= 0 -// || (read.getReadPairedFlag() && (!read.getProperPairFlag() || read.getInferredInsertSize() >= MAX_INSERT_SIZE)) + || (read.getReadPairedFlag() && (!read.getProperPairFlag() || read.getInferredInsertSize() >= MAX_INSERT_SIZE)) ) { continue; } @@ -135,20 +216,20 @@ public class SomaticMutationWalker extends LocusWalker { String sample = read.getHeader().getReadGroup(rg).getSample(); int offset = context.getOffsets().get(i); + char base = read.getReadString().charAt(offset); - byte qual = read.getBaseQualities()[offset]; - - if (base == 'N' || base == 'n') { continue; } - + + + // TODO: build a pile of reads and offsets, then pass that into a + // constructor for the normalGL class + // that way, we can build a different pile of reads later on and extract the genotype if (normalSampleName.equals(sample)) { - normalGL.add(ref, base, qual); - if (qual > MIN_QSCORE) normalQualitySums.incrementSum(base, qual); - normalBases.append(base); + normalReadPile.add(read, offset); + } else if (tumorSampleName.equals(sample)) { - tumorGL.add(ref, base, qual); - if (qual > MIN_QSCORE) tumorQualitySums.incrementSum(base, qual); - tumorBases.append(base); + tumorReadPile.add(read, offset); + int midDist = Math.abs((int)(read.getReadLength() / 2) - offset); if (midDist < midp.get(base)) { midp.put(base, midDist); } @@ -159,7 +240,7 @@ public class SomaticMutationWalker extends LocusWalker { } // pretest: if the sum of the quality scores for all non-ref alleles < 60, just quit looking now - if (tumorQualitySums.getOtherQualities(ref) < MIN_MUTANT_SUM_PRETEST) { + if (tumorReadPile.qualitySums.getOtherQualities(ref) < MIN_MUTANT_SUM_PRETEST) { return -1; } @@ -170,13 +251,18 @@ public class SomaticMutationWalker extends LocusWalker { if (altAllele == upRef) { continue; } - double[] tumorRefAlt = extractRefAlt(tumorGL, ref, altAllele); - double tumorLod = Math.log10(tumorRefAlt[1] + tumorRefAlt[2]) - tumorRefAlt[0]; - // (i) either an adjusted quality score sum in the tumor for the mutant base must be // at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must // be at least 6.3; - if (tumorQualitySums.get(altAllele) < MIN_MUTANT_SUM && tumorLod < TUMOR_LOD) { + int mutantSum = tumorReadPile.qualitySums.get(altAllele); + int refSum = tumorReadPile.qualitySums.get(ref); + + if (tumorReadPile.getAltVsRef(altAllele) >= TUMOR_LOD +// || +// (mutantSum >= MIN_MUTANT_SUM && (float)mutantSum / (float) refSum >= 0.05f) + ) { + // yep -- just fall through... easier to think about this way! + } else { continue; } @@ -187,12 +273,64 @@ public class SomaticMutationWalker extends LocusWalker { // return 0; // } - double[] refAlt = extractRefAlt(normalGL, ref, altAllele); - double normalLod = refAlt[0] - Math.log10(refAlt[1] + refAlt[2]); + + // do a MSA to figure out if the alternate reads comes from a cluster of reads with more + // than one alternate allele (hints at being an alignment artifact) + ReferenceSequence refSeq; + // TODO: don't hardcode. Make this the max read length in the pile + long refStart = context.getLocation().getStart() - 100; + //tumorReadPile.offsets.get(0); + long refStop = context.getLocation().getStart() + 100; + try { + IndexedFastaSequenceFile seqFile = new IndexedFastaSequenceFile(getToolkit().getArguments().referenceFile); + refSeq = seqFile.getSubsequenceAt(context.getContig(),refStart, refStop); + + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + +// System.out.println("TESTING " + context.getContig() + ":" + context.getPosition()); + LocusReadPile t2 = filterHighMismatchScoreReads(tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart); + + // TEST the LOD score again! + // TODO: extract this since we'll do it multiple times... + + // (i) either an adjusted quality score sum in the tumor for the mutant base must be + // at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must + // be at least 6.3; + double tumorLod = t2.getAltVsRef(altAllele); + if (t2.qualitySums.get(altAllele) < MIN_MUTANT_SUM && tumorLod < TUMOR_LOD) { + if (outputFailures) { + System.out.println("FAILED " + context.getContig() + ":" + context.getPosition() + " due to MAX MM QSCORE TEST." + + " LOD was " + tumorReadPile.getAltVsRef(altAllele) + + " LOD is now " + t2.getAltVsRef(altAllele) + + " QSUM was " + tumorReadPile.qualitySums.get(altAllele) + + " QSUM is now " + t2.qualitySums.get(altAllele)); + } + continue; + } + + // TODO: using the original pile here since often these artifacts will be supported + // by those reads that get thrown out! Maybe that means we don't need the noise filter... + boolean shouldDisalign = + disaligner(context.getPosition(), tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart); + + if (shouldDisalign) { + if (outputFailures) { + System.out.println("FAILED " + context.getContig() + ":" + context.getPosition() + " due to DISALIGNMENT TEST."); + } + continue; + } + + + + + // (ii) the quality score sum for the mutant base in the normal must be < 50 and the // LOD score for ref:ref vs mutant:ref + mutant:mutant must be at least 2.3. - if ( normalQualitySums.get(altAllele) > 50 || normalLod < NORMAL_LOD ) { + double normalLod = normalReadPile.getRefVsAlt(altAllele); + if ( normalReadPile.qualitySums.get(altAllele) > 50 || normalLod < NORMAL_LOD ) { continue; } @@ -204,11 +342,11 @@ public class SomaticMutationWalker extends LocusWalker { context.getPosition() + "\t" + context.getPosition() + "\t" + "TScore:" + tumorLod + - "__TRefSum: " + tumorQualitySums.get(ref) + - "__TAltSum: " + tumorQualitySums.get(altAllele) + + "__TRefSum: " + tumorReadPile.qualitySums.get(ref) + + "__TAltSum: " + tumorReadPile.qualitySums.get(altAllele) + "__NScore:" + normalLod + - "__NRefSum: " + normalQualitySums.get(ref) + - "__NAltSum: " + normalQualitySums.get(altAllele) + + "__NRefSum: " + normalReadPile.qualitySums.get(ref) + + "__NAltSum: " + normalReadPile.qualitySums.get(altAllele) + "__MIDP: " + midp.get(altAllele) + (failedMidpointCheck?"__FAILED-MPCHECK":"") ); @@ -216,13 +354,13 @@ public class SomaticMutationWalker extends LocusWalker { } else { out.println(context.getLocation() + " " + upRef + " " + altAllele + " TScore:" + tumorLod + - " TRefSum: " + tumorQualitySums.get(ref) + - " TAltSum: " + tumorQualitySums.get(altAllele) + + " TRefSum: " + tumorReadPile.qualitySums.get(ref) + + " TAltSum: " + tumorReadPile.qualitySums.get(altAllele) + " NScore:" + normalLod + - " NRefSum: " + normalQualitySums.get(ref) + - " NAltSum: " + normalQualitySums.get(altAllele) + " " + - tumorBases.toString() + " " + - normalBases.toString() + " NRefSum: " + normalReadPile.qualitySums.get(ref) + + " NAltSum: " + normalReadPile.qualitySums.get(altAllele) + " " + + tumorReadPile.getLocusBases().toString() + " " + + normalReadPile.getLocusBases().toString() ); } @@ -235,34 +373,6 @@ public class SomaticMutationWalker extends LocusWalker { return -1; } - /** - * Extract the LOD comparing ref:ref to ref:alt and alt:alt - */ - private double[] extractRefAlt(GenotypeLikelihoods gl, char ref, char altAllele) { - double refRef = 0; - double altRef = 0; - double altAlt = 0; - for(int j=0; j<10; j++) { - String gt = gl.genotypes[j]; - double likelihood = gl.likelihoods[j]; - - // the ref:mutant theory - if ( (gt.charAt(0) == ref && gt.charAt(1) == altAllele) || - (gt.charAt(0) == altAllele && gt.charAt(1) == ref) ) { - altRef += Math.pow(10, likelihood); - } - - if ( gt.charAt(0) == altAllele && gt.charAt(1) == altAllele) { - altAlt += Math.pow(10, likelihood); - } - - if ( gt.charAt(0) == ref && gt.charAt(1) == ref) { - refRef = likelihood; - } - - } - return new double[]{refRef, altRef, altAlt}; - } // Given result of map function public Integer reduceInit() { @@ -279,4 +389,469 @@ public class SomaticMutationWalker extends LocusWalker { + int MAX_READ_MISMATCH_QUALITY_SCORE_SUM = 100; + private LocusReadPile filterHighMismatchScoreReads(LocusReadPile pile, String reference, long leftmostIndex) { + LocusReadPile newPile = new LocusReadPile(pile.refBase); + + for ( int i=0; i< pile.reads.size(); i++) { + SAMRecord read = pile.reads.get(i); + int offset = pile.offsets.get(i); + + AlignedRead aRead = new AlignedRead(read); + int mismatchScore = mismatchQualitySum(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex); + + if (mismatchScore <= MAX_READ_MISMATCH_QUALITY_SCORE_SUM) { + newPile.add(read, offset); + } else { + char base = read.getReadString().charAt(offset); + + // TODO: use a logger to output this information for debugging/Picard folks + //System.out.println("MMQSUM: Filtering out " + read.getReadName() + " with locus base " + base + " and ref of " + pile.refBase); + } + } + return newPile; + } + + + // also pass in the variant position + private boolean disaligner(long mutationLocus, LocusReadPile pile, String reference, long leftmostIndex) { + int VARIANT_SITE_MIN_QSCORE = 40; // ie 2x20 + // build a 2d array of read # vs position +// Map>> matrix = new TreeMap>>(); + + int mutationLocusOffset = (int)(mutationLocus - leftmostIndex); + int[][] mmm = new int[pile.reads.size()][reference.length()]; + + // for every read which has an offset + + + for ( int i=0; i< pile.reads.size(); i++) { + SAMRecord read = pile.reads.get(i); + int offset = pile.offsets.get(i); + + AlignedRead aRead = new AlignedRead(read); + List mismatches = getMismatches(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex); + + // now fill in with -1 where we had reference + // TODO: deal with other CIGAR strings? + for(long j=read.getAlignmentStart(); j <= read.getAlignmentEnd(); j++) { + mmm[i][(int)( j - leftmostIndex )] = -1; + } + + + for(Mismatch mm : mismatches) { + if (mm.mismatchBase == 'N') { continue; } + + mmm[i][(int)mm.position] = mm.qualityScore; + } + } + + // traverse in column-order + int[] altSums = new int[mmm[0].length]; + for(int j=0; j 0) altSums[j] += mmm[i][j]; + } + } + + // now check that we have at least 3 alignment artifacts, and that + // they are at least 5bp from eachother + // FIXME: what should these numbers be? + int varSites = 0; + int lastVarSite = -1; + for(int j=0; j= VARIANT_SITE_MIN_QSCORE) { + if (lastVarSite > -1) { + if (j - lastVarSite > 5) { + varSites++; + lastVarSite = j; + } + } + } + } + if (varSites < 3) { return false; } + + // now go through each read which has the alternate allele at the + // current site (the potential somatic mutation) and see what fraction of + // the "covered" variant bases were called as variant + Map fractionVariantSites = new HashMap(); + for(int i=0; i 0) { + float calledSites = 0; + float variantSites = 0; + for(int j=0; j VARIANT_SITE_MIN_QSCORE && j != mutationLocusOffset) { + if (mmm[i][j] != 0) { calledSites++; } + if (mmm[i][j] > 0) { variantSites++; } + } + } + fractionVariantSites.put(i, variantSites / calledSites); + } + } + + // FIXME: what's the right statistic to use here? + if (getMedian(fractionVariantSites.values()) >= 0.75) { + return true; + } + + return false; + + } + + protected float getMedian(Collection in) { + float[] data = new float[in.size()]; + int i=0; + for(float f : in) { data[i++] = f; } + + Arrays.sort(data); + + int middle = data.length/2; // subscript of middle element + if (data.length%2 == 1) { + return data[middle]; + } else { + // Even number -- return average of middle two + // Must cast the numbers to double before dividing. + return (data[middle-1] + data[middle]) / 2.0f; + } + } + + + private void clean(LocusReadPile pile, String reference, long leftmostIndex) { +// LocusReadPile newPile = new LocusReadPile(pile.refBase); + + ArrayList refReads = new ArrayList(); + ArrayList altReads = new ArrayList(); + ArrayList altAlignmentsToTest = new ArrayList(); + int totalMismatchSum = 0; + + int MIN_MISMATCH_QSCORE = 10; + // decide which reads potentially need to be cleaned + for ( int i=0; i< pile.reads.size(); i++) { + SAMRecord read = pile.reads.get(i); + int offset = pile.offsets.get(i); + + AlignedRead aRead = new AlignedRead(read); + int mismatchScore = mismatchQualitySum(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex, MIN_MISMATCH_QSCORE); + + // if this doesn't match perfectly to the reference, let's try to clean it + if ( mismatchScore > 0 ) { + + if ( mismatchScore > 100) { + System.out.println(read.getReadName() + " has a sum of mismatch quality scores of " + mismatchScore); + + } else { + altReads.add(aRead); + altAlignmentsToTest.add(true); + totalMismatchSum += mismatchScore; + aRead.setMismatchScoreToReference(mismatchScore); + } + } + // otherwise, we can emit it as is + else { + refReads.add(read); + } + } + + // build a map of position -> + Consensus bestConsensus = null; + + // for each alternative consensus to test, align it to the reference and create an alternative consensus + for ( int index = 0; index < altAlignmentsToTest.size(); index++ ) { + if ( altAlignmentsToTest.get(index) ) { + + // do a pairwise alignment against the reference + AlignedRead aRead = altReads.get(index); + SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getReadString()); + int refIdx = swConsensus.getAlignmentStart2wrt1(); + if ( refIdx < 0 ) + continue; + + // create the new consensus + StringBuffer sb = new StringBuffer(); + sb.append(reference.substring(0, refIdx)); + Cigar c = swConsensus.getCigar(); + logger.debug("CIGAR = " + cigarToString(c)); + + int indelCount = 0; + int altIdx = 0; + boolean ok_flag = true; + for ( int i = 0 ; i < c.numCigarElements() ; i++ ) { + CigarElement ce = c.getCigarElement(i); + switch( ce.getOperator() ) { + case D: + indelCount++; + refIdx += ce.getLength(); + break; + case M: + if ( reference.length() < refIdx+ce.getLength() ) + ok_flag = false; + else + sb.append(reference.substring(refIdx, refIdx+ce.getLength())); + refIdx += ce.getLength(); + altIdx += ce.getLength(); + break; + case I: + sb.append(aRead.getReadString().substring(altIdx, altIdx+ce.getLength())); + altIdx += ce.getLength(); + indelCount++; + break; + } + } + // make sure that there is at most only a single indel and it aligns appropriately! + if ( !ok_flag || indelCount > 1 || reference.length() < refIdx ) + continue; + + sb.append(reference.substring(refIdx)); + String altConsensus = sb.toString(); + + // for each imperfect match to the reference, score it against this alternative + Consensus consensus = new Consensus(altConsensus, c, swConsensus.getAlignmentStart2wrt1()); + for ( int j = 0; j < altReads.size(); j++ ) { + AlignedRead toTest = altReads.get(j); + Pair altAlignment = findBestOffset(altConsensus, toTest); + + // the mismatch score is the min of its alignment vs. the reference and vs. the alternate + int myScore = altAlignment.getSecond(); + if ( myScore >= toTest.getMismatchScoreToReference() ) + myScore = toTest.getMismatchScoreToReference(); + // keep track of reads that align better to the alternate consensus + else + consensus.readIndexes.add(new Pair(j, altAlignment.getFirst())); + + logger.debug(aRead.getReadString() + " vs. " + toTest.getReadString() + " => " + myScore + " - " + altAlignment.getFirst()); + consensus.mismatchSum += myScore; + if ( myScore == 0 ) + // we already know that this is its consensus, so don't bother testing it later + altAlignmentsToTest.set(j, false); + } + logger.debug(aRead.getReadString() + " " + consensus.mismatchSum); + if ( bestConsensus == null || bestConsensus.mismatchSum > consensus.mismatchSum) { + bestConsensus = consensus; + logger.debug(aRead.getReadString() + " " + consensus.mismatchSum); + } + } + } + System.out.println("Done with MSA"); +// // if the best alternate consensus has a smaller sum of quality score mismatches (more than the LOD threshold), then clean! +// if ( bestConsensus != null && ((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0 >= LOD_THRESHOLD ) { +// logger.debug("CLEAN: " + bestConsensus.str ); +// if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) { +// StringBuffer str = new StringBuffer(); +// str.append(reads.get(0).getReferenceName()); +// int position = bestConsensus.positionOnReference + bestConsensus.cigar.getCigarElement(0).getLength(); +// str.append(":" + (leftmostIndex + position)); +// CigarElement ce = bestConsensus.cigar.getCigarElement(1); +// str.append("\t" + ce.getLength() + ce.getOperator()); +// str.append("\t" + (((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0) + "\n"); +// try { +// indelOutput.write(str.toString()); +// indelOutput.flush(); +// } catch (Exception e) {} +// } +// +// // We need to update the mapping quality score of the cleaned reads; +// // however we don't have enough info to use the proper MAQ scoring system. +// // For now, we'll use a heuristic: +// // the mapping quality score is improved by the LOD difference in mismatching +// // bases between the reference and alternate consensus +// int improvement = (totalMismatchSum - bestConsensus.mismatchSum) / 10; +// +// // clean the appropriate reads +// for ( Pair indexPair : bestConsensus.readIndexes ) { +// AlignedRead aRead = altReads.get(indexPair.getFirst()); +// updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.getSecond(), aRead, (int)leftmostIndex); +// aRead.getRead().setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + improvement, 255)); +// aRead.getRead().setAttribute("NM", numMismatches(aRead.getRead(), reference, aRead.getRead().getAlignmentStart()-(int)leftmostIndex)); +// } +// } +// +// // write them out +// for ( SAMRecord rec : refReads ) +// readsToWrite.add(new ComparableSAMRecord(rec)); +// for ( AlignedRead aRec : altReads ) +// readsToWrite.add(new ComparableSAMRecord(aRec.getRead())); + } + + + + // ----------------------------------- PRIVATE IN IntervalCleanerWalker + public static final int MAX_QUAL = 99; + + private static class Mismatch { + AlignedRead aRead; + int offset; + long position; + char mismatchBase; + int qualityScore; + + private Mismatch(AlignedRead aRead, int offset, long position, char mismatchBase, int qualityScore) { + this.aRead = aRead; + this.offset = offset; + this.position = position; + this.mismatchBase = mismatchBase; + this.qualityScore = qualityScore; + } + } + + private static int mismatchQualitySum(AlignedRead aRead, String refSeq, int refIndex) { + return mismatchQualitySum(aRead, refSeq, refIndex, 0); + } + + private static int mismatchQualitySum(AlignedRead aRead, String refSeq, int refIndex, int minMismatchQualityScore) { + List mismatches = getMismatches(aRead, refSeq, refIndex); + int sum = 0; + for(Mismatch mm : mismatches) { + if (mm.qualityScore >= minMismatchQualityScore) { + sum += mm.qualityScore; + } + } + return sum; + + } + + + + + /** + * Returns a list of position + * @param aRead + * @param refSeq + * @param refIndex + * @param minMismatchQualityScore + * @return + */ + private static List getMismatches(AlignedRead aRead, String refSeq, int refIndex) { + List mismatches = new ArrayList(); + + String readSeq = aRead.getReadString(); + String quals = aRead.getBaseQualityString(); + int readIndex = 0; + int sum = 0; + Cigar c = aRead.getCigar(); + for (int i = 0 ; i < c.numCigarElements() ; i++) { + CigarElement ce = c.getCigarElement(i); + switch ( ce.getOperator() ) { + case M: + for (int j = 0 ; j < ce.getLength() ; j++, refIndex++, readIndex++ ) { + // FIXME: what is this case???? + if ( refIndex >= refSeq.length() ) { + sum += MAX_QUAL; + } else { + char readBase = Character.toUpperCase(readSeq.charAt(readIndex)); + char refBase = Character.toUpperCase(refSeq.charAt(refIndex)); + + if ( readBase != refBase ) { + int qual = (int)quals.charAt(readIndex) - 33; + mismatches.add(new Mismatch(aRead, readIndex, refIndex, readBase, qual)); + } + } + } + break; + case I: + readIndex += ce.getLength(); + break; + case D: + refIndex += ce.getLength(); + break; + } + + } + return mismatches; + } + + private Pair findBestOffset(String ref, AlignedRead read) { + int attempts = ref.length() - read.getReadLength() + 1; + int bestScore = mismatchQualitySum(read, ref, 0); + int bestIndex = 0; + for ( int i = 1; i < attempts; i++ ) { + // we can't get better than 0! + if ( bestScore == 0 ) + return new Pair(bestIndex, 0); + int score = mismatchQualitySum(read, ref, i); + if ( score < bestScore ) { + bestScore = score; + bestIndex = i; + } + } + return new Pair(bestIndex, bestScore); + } + + + private class AlignedRead { + SAMRecord read; + int mismatchScoreToReference; + + public AlignedRead(SAMRecord read) { + this.read = read; + mismatchScoreToReference = 0; + } + + public SAMRecord getRead() { + return read; + } + + public String getReadString() { + return read.getReadString(); + } + + public int getReadLength() { + return read.getReadLength(); + } + + public Cigar getCigar() { + return read.getCigar(); + } + + public void setCigar(Cigar cigar) { + read.setCigar(cigar); + } + + public String getBaseQualityString() { + return read.getBaseQualityString(); + } + + public void setMismatchScoreToReference(int score) { + mismatchScoreToReference = score; + } + + public int getMismatchScoreToReference() { + return mismatchScoreToReference; + } + } + + private class Consensus { + public String str; + public int mismatchSum; + public int positionOnReference; + public Cigar cigar; + public ArrayList> readIndexes; + + public Consensus(String str, Cigar cigar, int positionOnReference) { + this.str = str; + this.cigar = cigar; + this.positionOnReference = positionOnReference; + mismatchSum = 0; + readIndexes = new ArrayList>(); + } + } + + public static String cigarToString(Cigar cig) { + StringBuilder b = new StringBuilder(); + + for ( int i = 0 ; i < cig.numCigarElements() ; i++ ) { + char c='?'; + switch ( cig.getCigarElement(i).getOperator() ) { + case M : c = 'M'; break; + case D : c = 'D'; break; + case I : c = 'I'; break; + } + b.append(cig.getCigarElement(i).getLength()); + b.append(c); + } + return b.toString(); + } + } \ No newline at end of file