From e1055bcc4c1c6907e02a71bf99a42898a56f0580 Mon Sep 17 00:00:00 2001 From: kcibul Date: Wed, 15 Jul 2009 20:46:08 +0000 Subject: [PATCH] moving to new external repository git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1261 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/cancer/LOHWalker.java | 164 ---- .../gatk/walkers/cancer/LocusReadPile.java | 191 ---- .../gatk/walkers/cancer/QualitySums.java | 58 -- .../walkers/cancer/SomaticCoverageWalker.java | 167 ---- .../walkers/cancer/SomaticMutationWalker.java | 926 ------------------ 5 files changed, 1506 deletions(-) delete mode 100644 java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/LOHWalker.java delete mode 100644 java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/LocusReadPile.java delete mode 100644 java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/QualitySums.java delete mode 100644 java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/SomaticCoverageWalker.java delete mode 100644 java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/SomaticMutationWalker.java diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/LOHWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/LOHWalker.java deleted file mode 100644 index f3a22c26e..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/LOHWalker.java +++ /dev/null @@ -1,164 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers.cancer; - -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; -import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods; - -import java.util.Map; -import java.util.HashMap; -import java.util.List; - -import net.sf.samtools.SAMRecord; - -/** - * Created by IntelliJ IDEA. - * User: kcibul - * Date: Jun 27, 2009 - * Time: 3:21:23 PM - * To change this template use File | Settings | File Templates. - */ -public class LOHWalker extends LocusWalker { - @Argument(fullName = "tumor_sample_name", shortName = "s1", required = true, doc="Name of the tumor sample") - public String tumorSampleName; - - @Argument(fullName = "normal_sample_name", shortName = "s2", required = true, doc="Name of the normal sample") - public String normalSampleName; - - public Integer reduceInit() { - return null; //To change body of implemented methods use File | Settings | File Templates. - } - - public Integer reduce(Integer value, Integer sum) { - return null; //To change body of implemented methods use File | Settings | File Templates. - } - - @Override - public void initialize() { - out.println("chrom\tposition\tnormal_btnb\tallele1\tallele2\tnormal_allele1_count\tnormal_allele2_count\tnormal_mar\ttumor_allele1_count\ttumor_allele2_count\ttumor_mar"); - - } - - public static int MAX_INSERT_SIZE = 10000; - - public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) { - char upRef = Character.toUpperCase(ref); - - // TODO: should we be able to call mutations at bases where ref is N? - if (upRef == 'N') { return -1; } - - List reads = context.getReads(); - - LocusReadPile tumorReadPile = new LocusReadPile(upRef); - LocusReadPile normalReadPile = new LocusReadPile(upRef); - - for ( int i = 0; i < reads.size(); i++ ) - { - SAMRecord read = reads.get(i); - - if (read.getNotPrimaryAlignmentFlag() || - read.getDuplicateReadFlag() || - read.getReadUnmappedFlag() || - read.getMappingQuality() <= 0 - -// || (read.getReadPairedFlag() && (!read.getProperPairFlag() || read.getInferredInsertSize() >= MAX_INSERT_SIZE)) - ) { - continue; - } - - String rg = (String) read.getAttribute("RG"); - String sample = read.getHeader().getReadGroup(rg).getSample(); - - int offset = context.getOffsets().get(i); - - char base = read.getReadString().charAt(offset); - if (base == 'N' || base == 'n') { continue; } - - - // TODO: build a pile of reads and offsets, then pass that into a - // constructor for the normalGL class - // that way, we can build a different pile of reads later on and extract the genotype - if (normalSampleName.equals(sample)) { - normalReadPile.add(read, offset); - - } else if (tumorSampleName.equals(sample)) { - tumorReadPile.add(read, offset); - } else { - throw new RuntimeException("Unknown Sample Name: " + sample); - } - } - - normalReadPile.likelihoods.ApplyPrior(upRef,' ', -1); // apply std priors - double normalBtnb = normalReadPile.likelihoods.LodVsNextBest(); - if (normalBtnb < 5.0) { return null; } - - String normalGt = normalReadPile.likelihoods.BestGenotype(); - char allele1 = normalGt.charAt(0); - char allele2 = normalGt.charAt(1); - if (allele2 == upRef) { - char oldAllele1 = allele1; - allele1 = allele2; - allele2 = oldAllele1; - } - - - if (allele1 == upRef && allele1 == allele2) { - return null; - } - - StringBuilder sb = new StringBuilder(); - sb.append(String.format("%s\t%d\t%1.2f\t%s\t%s\t", - context.getContig(), - context.getPosition(), - normalBtnb, - allele1, - allele2)); - - int normalAllele1Counts = normalReadPile.qualitySums.getCounts(allele1); - int normalAllele2Counts = normalReadPile.qualitySums.getCounts(allele2); - int tumorAllele1Counts = tumorReadPile.qualitySums.getCounts(allele1); - int tumorAllele2Counts = tumorReadPile.qualitySums.getCounts(allele2); - - // allele ratio in the normal - double normalAlleleRatio = - (double) normalAllele2Counts / (double) (normalAllele1Counts+normalAllele2Counts) ; - - // allele ratio in the tumor - double tumorAlleleRatio = - (double) tumorAllele2Counts / (double) (tumorAllele1Counts+tumorAllele2Counts) ; - - - sb.append( - String.format("%d\t%d\t%1.2f\t%d\t%d\t%1.2f", - normalAllele1Counts, - normalAllele2Counts, - normalAlleleRatio, - tumorAllele1Counts, - tumorAllele2Counts, - tumorAlleleRatio)); - - - -// // do the normal -// for (char allele : new char[]{'A','C','G','T'}) { -// QualitySums qs = normalReadPile.qualitySums; -// sb.append(" "); -// sb.append(qs.getCounts(allele)).append(" "); -//// sb.append(qs.getQualitySum(allele)); -// } -// -// // do the tumor -// for (char allele : new char[]{'A','C','G','T'}) { -// QualitySums qs = tumorReadPile.qualitySums; -// sb.append(" "); -// sb.append(qs.getCounts(allele)).append(" "); -//// sb.append(qs.getQualitySum(allele)); -// } - - out.println(sb); - return null; - } -} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/LocusReadPile.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/LocusReadPile.java deleted file mode 100644 index 5cdeccdc9..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/LocusReadPile.java +++ /dev/null @@ -1,191 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers.cancer; - -import net.sf.samtools.SAMRecord; - -import java.util.List; -import java.util.ArrayList; - -import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods; - -/** - * Created by IntelliJ IDEA. - * User: kcibul - * Date: Jun 27, 2009 - * Time: 3:41:59 PM - * To change this template use File | Settings | File Templates. - */ -public class LocusReadPile { - public List reads = new ArrayList(); - public List offsets = new ArrayList(); - public char refBase; - public int minQualityScore; - public GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(); - public QualitySums qualitySums = new QualitySums(); - - public LocusReadPile(char refBase) { - this(refBase, 0); - } - - public LocusReadPile(char refBase, int minQualityScore) { - this.refBase = refBase; - this.minQualityScore = minQualityScore; - } - - public void add(SAMRecord read, int offset) { - add(read, offset, false); - } - - public void add(SAMRecord read, int offset, boolean allowMapq0ForQualSum) { - char base = read.getReadString().charAt(offset); - byte qual = read.getBaseQualities()[offset]; - - if (base == 'N' || base == 'n') { return; } - - if (read.getMappingQuality() > 0) { - reads.add(read); - offsets.add(offset); - - likelihoods.add(refBase, base, qual); - } - - if (read.getMappingQuality() == 0 && !allowMapq0ForQualSum) { return; } - - if (qual > this.minQualityScore) qualitySums.incrementSum(base, qual); - } - - public String getLocusBases() { - return getLocusBases(0); - } - - public String getLocusBases(int locusOffset) { - StringBuilder sb = new StringBuilder(); - for(int i=0; i= 0 && offset < read.getReadString().length()) { - char base = read.getReadString().charAt(offset); - sb.append(base); - } - } - return sb.toString(); - } - - public double getAltVsRef(char altAllele) { - double[] tumorRefAlt = extractRefAlt(this.likelihoods, this.refBase, altAllele); - double tumorLod = Math.log10(tumorRefAlt[1] + tumorRefAlt[2]) - tumorRefAlt[0]; - return tumorLod; - } - - public double getRefVsAlt(char altAllele) { - double[] refAlt = extractRefAlt(this.likelihoods, this.refBase, altAllele); - double normalLod = refAlt[0] - Math.log10(refAlt[1] + refAlt[2]); - return normalLod; - } - - /** - * Extract the LOD comparing ref:ref to ref:alt and alt:alt - */ - private double[] extractRefAlt(GenotypeLikelihoods gl, char ref, char altAllele) { - double refRef = 0; - double altRef = 0; - double altAlt = 0; - for(int j=0; j<10; j++) { - String gt = gl.genotypes[j]; - double likelihood = gl.likelihoods[j]; - - // the ref:mutant theory - if ( (gt.charAt(0) == ref && gt.charAt(1) == altAllele) || - (gt.charAt(0) == altAllele && gt.charAt(1) == ref) ) { - altRef += Math.pow(10, likelihood); - } - - if ( gt.charAt(0) == altAllele && gt.charAt(1) == altAllele) { - altAlt += Math.pow(10, likelihood); - } - - if ( gt.charAt(0) == ref && gt.charAt(1) == ref) { - refRef = likelihood; - } - - } - return new double[]{refRef, altRef, altAlt}; - } - - private GenotypeLikelihoods getLikelihood(int locusOffset) { - GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(); - - - for(int i=0; i= 0 && offset < read.getReadString().length()) { - - char base = read.getReadString().charAt(offset); - byte qual = read.getBaseQualities()[offset]; - - likelihoods.add(refBase, base, qual); - } - - } - return likelihoods; - } - - public double[] getNormalizedProbs(int locusOffset, int qThreshold) { - GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(); - - - // FIXME: gaddy suggested this "correction" which evidently has a name... - //likelihoods.add(refBase, refBase, (byte) 20); - - for(int i=0; i= 0 && offset < read.getReadString().length()) { - - char base = read.getReadString().charAt(offset); - byte qual = read.getBaseQualities()[offset]; - - if (qual >= qThreshold) { - likelihoods.add(refBase, base, qual); -// if (locusOffset == -43) System.out.println("\tUSING " + base + " " + qual); - } else { -// if (locusOffset == -43) System.out.println("\tDropping " + base + " " + qual); - } - } - - } - - - double[] logLikelihood = likelihoods.likelihoods; - double[] nonLogLikelihood = new double[10]; - double sum = 0; - for(int i=0; i<10; i++) { - nonLogLikelihood[i] = Math.pow(10, logLikelihood[i]); - sum += nonLogLikelihood[i]; - } - - double[] normalizedProbs = new double[10]; - for(int i=0; i<10; i++) { - normalizedProbs[i] = nonLogLikelihood[i] / sum; - } - - - //quick sanity check -// sum=0; -// for(int i=0; i<10; i++) { -// sum += normalizedProbs[i]; -// } -// System.out.println("normalized probs = " + sum); - - - return normalizedProbs; - } - - -} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/QualitySums.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/QualitySums.java deleted file mode 100644 index 714389560..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/QualitySums.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers.cancer; - -/** - * Created by IntelliJ IDEA. - * User: kcibul - * Date: Jun 27, 2009 - * Time: 3:43:29 PM - * To change this template use File | Settings | File Templates. - */ -public class QualitySums { - private int a = 0; - private int c = 0; - private int g = 0; - private int t = 0; - private int aCounts = 0; - private int cCounts = 0; - private int gCounts = 0; - private int tCounts = 0; - - public int getQualitySum(final char base) { - if (base == 'a' || base == 'A') { return a; } - if (base == 'c' || base == 'C') { return c; } - if (base == 'g' || base == 'G') { return g; } - if (base == 't' || base == 'T') { return t; } - throw new RuntimeException("Unknown base: " + base); - } - - public int getCounts(final char base) { - if (base == 'a' || base == 'A') { return aCounts; } - if (base == 'c' || base == 'C') { return cCounts; } - if (base == 'g' || base == 'G') { return gCounts; } - if (base == 't' || base == 'T') { return tCounts; } - throw new RuntimeException("Unknown base: " + base); - } - - public void incrementSum(final char base, final byte qual) { - if (base == 'a' || base == 'A') { a += qual; aCounts++;} - else if (base == 'c' || base == 'C') { c += qual; cCounts++;} - else if (base == 'g' || base == 'G') { g += qual; gCounts++; } - else if (base == 't' || base == 'T') { t += qual; tCounts++; } - else throw new RuntimeException("Unknown base: " + base); - } - - public int getOtherQualities(final char base) { - int total = a + c + g + t; - if (base == 'a' || base == 'A') { return total-a; } - else if (base == 'c' || base == 'C') { return total-c; } - else if (base == 'g' || base == 'G') { return total-g; } - else if (base == 't' || base == 'T') { return total-t; } - else throw new RuntimeException("Unknown base: " + base); - } - - public void reset() { - a = 0; c = 0; g = 0; t = 0; - aCounts = 0; cCounts = 0; gCounts = 0; tCounts = 0; - } -} - diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/SomaticCoverageWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/SomaticCoverageWalker.java deleted file mode 100644 index 005fe1229..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/SomaticCoverageWalker.java +++ /dev/null @@ -1,167 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers.cancer; - -import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMReadGroupRecord; - -import java.util.List; -import java.util.Formatter; -import static java.lang.Math.log10; - -import edu.mit.broad.picard.genotype.DiploidGenotype; -import net.sf.picard.PicardException; - -public class SomaticCoverageWalker extends LocusWalker { - - @Argument(fullName = "tumor_sample_name", shortName = "s1", doc="tumor sample name", required = true) - public String tumorSampleName; - - @Argument(fullName = "normal_sample_name", shortName = "s2", doc="normal sample name", required = true) - public String normalSampleName; - - @Argument(fullName = "extended", shortName="ext", doc="extended output", required=false) - public boolean extendedOutput = false; - - - // --normal_sample_name TCGA-06-0188-10B-01W --tumor_sample_name TCGA-06-0188-01A-01W - public void initialize() { - out.println("track type=wiggle_0 name=SomaticCoverage viewLimits=0:1"); - } - - public String walkerType() { return "ByLocus"; } - - // Do we actually want to operate on the context? - public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) { - return true; // We are keeping all the reads - } - - - - // Map over the org.broadinstitute.sting.gatk.LocusContext - int MAPPING_QUALITY_THRESHOLD = 1; - int totalSites; - int tumorCovered; - int normalCovered; - int somaticCovered; - long start = 0; - - int lastContigIndex = -1; - long lastPosition = -1; - - public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) { - if (start ==0) { start = System.currentTimeMillis(); } - - List reads = context.getReads(); - int tumorDepth = 0; - int normalDepth = 0; - StringBuilder readNames = new StringBuilder(); - for ( int i = 0; i < reads.size(); i++ ) - { - SAMRecord read = reads.get(i); - - // TODO: could this be done better elsewhere? - // only process primary, non duplicate alignments - // that come from fully mapped pairs with a mappign quality threshold >= x - if (read.getNotPrimaryAlignmentFlag() || - read.getDuplicateReadFlag() || - read.getReadUnmappedFlag() || - read.getMappingQuality() < MAPPING_QUALITY_THRESHOLD -// || -// read.getMateUnmappedFlag() || - ) { - continue; - } - - String rg = (String) read.getAttribute("RG"); - SAMFileHeader header = read.getHeader(); - SAMReadGroupRecord readGroup = header.getReadGroup(rg); - - if (readGroup == null) { - err.println("WARNING: read " + read.getReadName() + " belongs to read group " + rg + " which isn't in the header!"); - continue; - } - String sample = readGroup.getSample(); - - - - if (normalSampleName.equals(sample)) { - normalDepth++; - } else if (tumorSampleName.equals(sample)) { - tumorDepth++; - } else { - throw new RuntimeException("Unknown Sample Name: " + sample); - } - readNames.append(read.getReadName()).append("+").append(read.getAlignmentStart()).append("+").append(read.getCigarString()); - - } - - - boolean isTumorCovered = tumorDepth >= 14; - boolean isNormalCovered = normalDepth >= 8; - - if (isTumorCovered) { tumorCovered++; } - if (isNormalCovered) { normalCovered++; } - if (isTumorCovered && isNormalCovered) {somaticCovered++; } - totalSites++; - -// if (totalSites % 100000 == 0) { -// long now = System.currentTimeMillis(); -// out.println(String.format("%s:%d %d %d %d %d %dms", context.getContig(), context.getPosition(), totalSites, tumorCovered, normalCovered, somaticCovered, (now-start))); -// } - - // if the contig index has changed OR if it's the same contig index but we jumped positions - // output a wiggle header - StringBuilder sb = new StringBuilder(); - if (lastContigIndex != context.getLocation().getContigIndex() || - lastPosition + 1 != context.getPosition()) { - lastContigIndex = context.getLocation().getContigIndex(); - sb.append("fixedStep").append(" ") - .append("chrom=").append(context.getContig()).append(" ") - .append("start=").append(context.getPosition()).append(" ") - .append("step=1") - .append("\n"); - } - lastPosition = context.getPosition(); - - if (extendedOutput) { - sb.append(context.getPosition()).append(" "); - } - sb.append((isTumorCovered && isNormalCovered)?"1":"0"); - if (extendedOutput) { - sb.append(" ").append(tumorDepth).append(" ").append(normalDepth).append(" ").append(readNames); - } - -// sb.append(context.getContig()).append(" "); -// sb.append(context.getPosition()).append(" "); -// sb.append(tumorDepth).append(" "); -// sb.append(normalDepth).append(" "); -// sb.append((isTumorCovered && isNormalCovered)?"Y":"N"); - - out.println(sb.toString()); - return 1; - } - - // Given result of map function - public Integer reduceInit() { - return 0; - } - public Integer reduce(Integer value, Integer sum) { - return value + sum; - } - - @Override - public void onTraversalDone(Integer result) { -// out.println(String.format("FINAL - %d %d %d %d", totalSites, tumorCovered, normalCovered, somaticCovered)); - } - - - -} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/SomaticMutationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/SomaticMutationWalker.java deleted file mode 100644 index 5e854c66f..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/SomaticMutationWalker.java +++ /dev/null @@ -1,926 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers.cancer; - -import net.sf.picard.reference.ReferenceSequence; -import net.sf.picard.PicardException; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.util.StringUtil; -import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; -import org.broadinstitute.sting.gatk.walkers.By; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.playground.indels.SWPairwiseAlignment; -import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; - -import java.io.IOException; -import java.io.File; -import java.io.FileWriter; -import java.io.BufferedWriter; -import java.util.*; - -@By(DataSource.REFERENCE) -public class SomaticMutationWalker extends LocusWalker { - protected enum MutantFailureReason { - StrandImbalance, - Misalignment - } - @Argument(fullName = "tumor_sample_name", shortName = "s1", required = true, doc="Name of the tumor sample") - public String tumorSampleName; - - @Argument(fullName = "normal_sample_name", shortName = "s2", required = true, doc="Name of the normal sample") - public String normalSampleName; - - @Argument(fullName = "tumor_lod", required = false, doc = "LOD threshold for calling tumor variant") - public float TUMOR_LOD_THRESHOLD = 6.3f; - - @Argument(fullName = "normal_lod", required = false, doc = "LOD threshold for calling normal non-variant") - public float NORMAL_LOD_THRESHOLD = 2.3f; - - @Argument(fullName = "min_mutant_sum", required = false, doc = "threshold for sum of mutant allele quality scores") - public int MIN_MUTANT_SUM = 100; - - public float SKEW_LOD_THRESHOLD = 1.5f; - - @Argument(fullName = "output_failures", required = false, doc="produce output for failed sites") - public boolean OUTPUT_FAILURES = false; - - @Argument(fullName="maf_file", shortName="maf", doc="write out calls in MAF format to this file", required=false) - public String MAF_FILE = null; - - public boolean USE_MAPQ0_IN_NORMAL_QSCORE = true; - - private FileWriter mafWriter = null; - - - public void initialize() { - if ( MAF_FILE != null ) { - try { - mafWriter = new FileWriter(new File(MAF_FILE)); - } catch (IOException ioe) { - throw new RuntimeException(ioe); - } - } - - } - - public String walkerType() { return "ByLocus"; } - - // Do we actually want to operate on the context? - public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) { - return true; // We are keeping all the reads - } - - public static int MAX_INSERT_SIZE = 10000; - public static int MIN_MUTANT_SUM_PRETEST = 60; - - public static int MIN_QSCORE = 13; - - - public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) { - Map midp = new HashMap(); - midp.put('A', Integer.MAX_VALUE); - midp.put('C', Integer.MAX_VALUE); - midp.put('G', Integer.MAX_VALUE); - midp.put('T', Integer.MAX_VALUE); - - // First, exclude DBSNP Sites - for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) - { - if ( datum != null ) - { - if ( datum instanceof rodDbSNP) { - // we're at a dbsnp site... move along... nothing to see here... - return -1; - } - } - } - - char upRef = Character.toUpperCase(ref); - - // TODO: should we be able to call mutations at bases where ref is N? - if (upRef == 'N') { return -1; } - - List reads = context.getReads(); - - LocusReadPile tumorReadPile = new LocusReadPile(upRef,MIN_QSCORE); - LocusReadPile normalReadPile = new LocusReadPile(upRef,MIN_QSCORE); - - for ( int i = 0; i < reads.size(); i++ ) - { - SAMRecord read = reads.get(i); - - if (read.getNotPrimaryAlignmentFlag() || - read.getDuplicateReadFlag() || - read.getReadUnmappedFlag() - || (read.getReadPairedFlag() && (!read.getProperPairFlag() || read.getInferredInsertSize() >= MAX_INSERT_SIZE)) - ) { - continue; - } - - String rg = (String) read.getAttribute("RG"); - String sample = read.getHeader().getReadGroup(rg).getSample(); - - int offset = context.getOffsets().get(i); - - char base = read.getReadString().charAt(offset); - if (base == 'N' || base == 'n') { continue; } - - - // TODO: build a pile of reads and offsets, then pass that into a - // constructor for the normalGL class - // that way, we can build a different pile of reads later on and extract the genotype - if (normalSampleName.equals(sample)) { - normalReadPile.add(read, offset, USE_MAPQ0_IN_NORMAL_QSCORE); - - } else if (tumorSampleName.equals(sample)) { - if (read.getMappingQuality() > 0) { - tumorReadPile.add(read, offset); - } - - int midDist = Math.abs((int)(read.getReadLength() / 2) - offset); - if (midDist < midp.get(base)) { midp.put(base, midDist); } - - } else { - throw new RuntimeException("Unknown Sample Name: " + sample); - } - } - - // pretest: if the sum of the quality scores for all non-ref alleles < 60, just quit looking now - if (tumorReadPile.qualitySums.getOtherQualities(upRef) < MIN_MUTANT_SUM_PRETEST) { - return -1; - } - - - // Test each of the poosible alternate alleles - - for (char altAllele : new char[]{'A','C','G','T'}) { - if (altAllele == upRef) { continue; } - - - // (i) either an adjusted quality score sum in the tumor for the mutant base must be - // at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must - // be at least 6.3; - int mutantSum = tumorReadPile.qualitySums.getQualitySum(altAllele); - int refSum = tumorReadPile.qualitySums.getQualitySum(upRef); - - if (tumorReadPile.getAltVsRef(altAllele) >= TUMOR_LOD_THRESHOLD -// || -// (mutantSum >= MIN_MUTANT_SUM && (float)mutantSum / (float) refSum >= 0.05f) - ) { - // yep -- just fall through... easier to think about this way! - } else { - continue; - } - - // (ii) the quality score sum for the mutant base in the normal must be < 50 and the - // LOD score for ref:ref vs mutant:ref + mutant:mutant must be at least 2.3. - double normalLod = normalReadPile.getRefVsAlt(altAllele); - QualitySums normQs = normalReadPile.qualitySums; - - if ( (normQs.getCounts(altAllele) >= 2 && normQs.getQualitySum(altAllele) > 20) - || normalLod < NORMAL_LOD_THRESHOLD) { - continue; - } - - // make sure we've seen at least 1 obs of the alternate allele within 20bp of the read-middle - boolean failedMidpointCheck = midp.get(altAllele) > 20; -// if (failedMidpointCheck) { -// out.println("Rejecting due to midpoint check!"); -// return 0; -// } - - - // do a MSA to figure out if the alternate reads comes from a cluster of reads with more - // than one alternate allele (hints at being an alignment artifact) - ReferenceSequence refSeq; - // TODO: don't hardcode. Make this the max read length in the pile - long refStart = context.getLocation().getStart() - 150; - //tumorReadPile.offsets.get(0); - long refStop = context.getLocation().getStart() + 150; - try { - IndexedFastaSequenceFile seqFile = new IndexedFastaSequenceFile(getToolkit().getArguments().referenceFile); - refSeq = seqFile.getSubsequenceAt(context.getContig(),refStart, refStop); - - } catch (IOException ioe) { - throw new RuntimeException(ioe); - } - -// System.out.println("TESTING " + context.getContig() + ":" + context.getPosition()); - LocusReadPile t2 = filterHighMismatchScoreReads(tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart); - - // TEST the LOD score again! - // TODO: extract this since we'll do it multiple times... - - // (i) either an adjusted quality score sum in the tumor for the mutant base must be - // at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must - // be at least 6.3; - double tumorLod = t2.getAltVsRef(altAllele); - if (t2.qualitySums.getQualitySum(altAllele) < MIN_MUTANT_SUM && tumorLod < TUMOR_LOD_THRESHOLD) { - if (OUTPUT_FAILURES) { - - String msg = "FAILED due to MAX MM QSCORE TEST." + - " LOD was " + tumorReadPile.getAltVsRef(altAllele) + - " LOD is now " + t2.getAltVsRef(altAllele) + - " QSUM was " + tumorReadPile.qualitySums.getQualitySum(altAllele) + - " QSUM is now " + t2.qualitySums.getQualitySum(altAllele); - - out.println( - context.getContig() + "\t" + - context.getPosition() + "\t" + - context.getPosition() + "\t" - + msg.replace(' ','_') - ); - } - continue; - } - -// // TODO: using the original pile here since often these artifacts will be supported -// // by those reads that get thrown out! Maybe that means we don't need the noise filter... -// boolean shouldDisalign = -// disaligner(context.getPosition(), tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart); - Pair failureReason = - readPileSkew(t2, upRef, altAllele, StringUtil.bytesToString(refSeq.getBases()), refStart); - - // time for the big guns... the realignment! -// try { -// File f = File.createTempFile("/tmp","RANDOM"); -// BufferedWriter writer = new BufferedWriter(new FileWriter(f)); -// for(SAMRecord read : t2.reads) { -// writer.write(convertToFastq(read)); -// } -// out.println("Finished writing realignment to " + f.getCanonicalPath()); -// writer.flush(); -// writer.close(); -// } catch (IOException ioe) { -// throw new PicardException("Error performing realignment", ioe); -// } - - if (failureReason.first != null) { - if (OUTPUT_FAILURES) { - String msg = "FAILED due to " + failureReason.first.name() + - " mutAllele " + altAllele + "_" + - " maxSkewLod " + failureReason.second - ; - - out.println( - context.getContig() + "\t" + - context.getPosition() + "\t" + - context.getPosition() + "\t" + - msg.replace(' ','_') - ); - - } - continue; - } - - - - - - - - // if we're still here... we've got a somatic mutation! Output the results - // and stop looking for mutants! - writeMafLine( - "36\t" + //build - context.getContig() + "\t" + - context.getPosition() + "\t" + - context.getPosition() + "\t" + - upRef + "\t" + - upRef + "\t" + - altAllele + "\t" + - tumorSampleName + "\t" + - normalSampleName); - - String msg = - (failedMidpointCheck?"__FAILED-MPCHECK":"") + - "TScore:" + tumorLod + - "__TRefSum: " + tumorReadPile.qualitySums.getQualitySum(upRef) + - "__TAltSum: " + tumorReadPile.qualitySums.getQualitySum(altAllele) + - "__NScore:" + normalLod + - "__NRefSum: " + normalReadPile.qualitySums.getQualitySum(upRef) + - "__NAltSum: " + normalReadPile.qualitySums.getQualitySum(altAllele) + - "__maxSkewLod_" + failureReason.second + "_" + - "__MIDP: " + midp.get(altAllele); - - out.println( - context.getContig() + "\t" + - context.getPosition() + "\t" + - context.getPosition() + "\t" - + msg.replace(' ','_') - ); - - - - - return 1; - - - } - - return -1; - } - - private void writeMafLine(String s) { - if (mafWriter != null) { - try { - mafWriter.write(s); - mafWriter.write("\n"); - mafWriter.flush(); - } catch (IOException ioe) { - throw new RuntimeException(ioe); - } - } - } - - private String convertToFastq(SAMRecord read) { - // output FASTQ @\n\n+[]\n\n - StringBuilder sb = new StringBuilder(); - - sb.append("@").append(read.getReadName()).append("\n"); - sb.append(read.getReadString()).append("\n"); - sb.append("+").append("\n"); - sb.append(read.getBaseQualityString()).append("\n"); - return sb.toString(); - } - - - // Given result of map function - public Integer reduceInit() { - return 0; - } - public Integer reduce(Integer value, Integer sum) { - return value + sum; - } - - @Override - public void onTraversalDone(Integer result) { - if (mafWriter != null) { - try { - mafWriter.flush(); - mafWriter.close(); - } catch (IOException ioe) { - throw new RuntimeException(ioe); - } - } - } - - - - private Pair readPileSkew(LocusReadPile pile, char refAllele, char mutantAllele, String reference, long leftmostIndex) { - // first split into two piles, those supporting the mutant and those not - LocusReadPile mutantPile = new LocusReadPile(mutantAllele); - LocusReadPile refPile = new LocusReadPile(mutantAllele); - - - for (int i=0; i(MutantFailureReason.StrandImbalance, 0d); - } - - - //chr17:4979257 - // are the two alleles distributed differently - //fixme: this seems to degrade as you lose reads? - //fixme: what should the threshold be here? - SortedMap skewLodOffsets = new TreeMap(); - double maxSkewLod = 0; - - //fixme: calculate this range properly - int MAX_OFFSET_DISTANCE = 60; - for(int offset=-1 * MAX_OFFSET_DISTANCE; offset= -1 && offset <= 1 ) { continue; } - - int SKEW_QSCORE_THRESHOLD = 10; - double[] mutantNormProbs = mutantPile.getNormalizedProbs(offset, SKEW_QSCORE_THRESHOLD); - double[] otherNormProbs = refPile.getNormalizedProbs(offset, SKEW_QSCORE_THRESHOLD); - - double J = 0; - for(int i=0; i<10; i++) { - J += mutantNormProbs[i] * otherNormProbs[i]; - } - double skewLod = Math.log10( (1-J) / J); - - - // fixme: is it legit to require that we see it in at least 2 reads each? - int mutantReadCounts = mutantPile.getLocusBases(offset).length(); - int otherReadCounts = refPile.getLocusBases(offset).length(); - if (mutantReadCounts >= 2 && otherReadCounts >= 2) { - - if (skewLod > maxSkewLod) { maxSkewLod = skewLod; } - if (skewLod > SKEW_LOD_THRESHOLD) { - -// System.out.println( "Offset: " + offset + -// " mutant_reads: " + mutantReadCounts + -// " mutant bases: " + mutantPile.getLocusBases(offset) + -// " other_reads: " + otherReadCounts + -// " other bases: " + refPile.getLocusBases(offset) + -// " skewLod:" + skewLod ); - - skewLodOffsets.put(offset, skewLod); - } - } - } - - if (skewLodOffsets.size() > 0) { - return new Pair(MutantFailureReason.Misalignment, maxSkewLod); - } else { - return new Pair(null, maxSkewLod); -// System.out.println("MAX SKEW: " + maxSkewLod); - } - - } - - int MAX_READ_MISMATCH_QUALITY_SCORE_SUM = 100; - private LocusReadPile filterHighMismatchScoreReads(LocusReadPile pile, String reference, long leftmostIndex) { - LocusReadPile newPile = new LocusReadPile(pile.refBase); - - for ( int i=0; i< pile.reads.size(); i++) { - SAMRecord read = pile.reads.get(i); - int offset = pile.offsets.get(i); - - AlignedRead aRead = new AlignedRead(read); - int mismatchScore = mismatchQualitySum(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex); - - if (mismatchScore <= MAX_READ_MISMATCH_QUALITY_SCORE_SUM) { - newPile.add(read, offset); - } else { - char base = read.getReadString().charAt(offset); - - // TODO: use a logger to output this information for debugging/Picard folks - //out.println("MMQSUM: Filtering out " + read.getReadName() + " with locus base " + base + " and ref of " + pile.refBase); - } - } - return newPile; - } - - - // also pass in the variant position - private boolean disaligner(long mutationLocus, LocusReadPile pile, String reference, long leftmostIndex) { - int VARIANT_SITE_MIN_QSCORE = 40; // ie 2x20 - // build a 2d array of read # vs position -// Map>> matrix = new TreeMap>>(); - - int mutationLocusOffset = (int)(mutationLocus - leftmostIndex); - int[][] mmm = new int[pile.reads.size()][reference.length()]; - - // for every read which has an offset - - - for ( int i=0; i< pile.reads.size(); i++) { - SAMRecord read = pile.reads.get(i); - int offset = pile.offsets.get(i); - - AlignedRead aRead = new AlignedRead(read); - List mismatches = getMismatches(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex); - - // now fill in with -1 where we had reference - // TODO: deal with other CIGAR strings? - for(long j=read.getAlignmentStart(); j <= read.getAlignmentEnd(); j++) { - mmm[i][(int)( j - leftmostIndex )] = -1; - } - - - for(Mismatch mm : mismatches) { - if (mm.mismatchBase == 'N') { continue; } - - mmm[i][(int)mm.position] = mm.qualityScore; - } - } - - // traverse in column-order - int[] altSums = new int[mmm[0].length]; - for(int j=0; j 0) altSums[j] += mmm[i][j]; - } - } - - // now check that we have at least 3 alignment artifacts, and that - // they are at least 5bp from eachother - // FIXME: what should these numbers be? - int varSites = 0; - int lastVarSite = -1; - for(int j=0; j= VARIANT_SITE_MIN_QSCORE) { - if (lastVarSite > -1) { - if (j - lastVarSite > 5) { - varSites++; - lastVarSite = j; - } - } - } - } - if (varSites < 3) { return false; } - - // now go through each read which has the alternate allele at the - // current site (the potential somatic mutation) and see what fraction of - // the "covered" variant bases were called as variant - Map fractionVariantSites = new HashMap(); - for(int i=0; i 0) { - float calledSites = 0; - float variantSites = 0; - for(int j=0; j VARIANT_SITE_MIN_QSCORE && j != mutationLocusOffset) { - if (mmm[i][j] != 0) { calledSites++; } - if (mmm[i][j] > 0) { variantSites++; } - } - } - fractionVariantSites.put(i, variantSites / calledSites); - } - } - - // FIXME: what's the right statistic to use here? - if (getMedian(fractionVariantSites.values()) >= 0.75) { - return true; - } - - return false; - - } - - protected float getMedian(Collection in) { - float[] data = new float[in.size()]; - int i=0; - for(float f : in) { data[i++] = f; } - - Arrays.sort(data); - - int middle = data.length/2; // subscript of middle element - if (data.length%2 == 1) { - return data[middle]; - } else { - // Even number -- return average of middle two - // Must cast the numbers to double before dividing. - return (data[middle-1] + data[middle]) / 2.0f; - } - } - - - private void clean(LocusReadPile pile, String reference, long leftmostIndex) { -// LocusReadPile newPile = new LocusReadPile(pile.refBase); - - ArrayList refReads = new ArrayList(); - ArrayList altReads = new ArrayList(); - ArrayList altAlignmentsToTest = new ArrayList(); - int totalMismatchSum = 0; - - int MIN_MISMATCH_QSCORE = 10; - // decide which reads potentially need to be cleaned - for ( int i=0; i< pile.reads.size(); i++) { - SAMRecord read = pile.reads.get(i); - int offset = pile.offsets.get(i); - - AlignedRead aRead = new AlignedRead(read); - int mismatchScore = mismatchQualitySum(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex, MIN_MISMATCH_QSCORE); - - // if this doesn't match perfectly to the reference, let's try to clean it - if ( mismatchScore > 0 ) { - - if ( mismatchScore > 100) { - out.println(read.getReadName() + " has a sum of mismatch quality scores of " + mismatchScore); - - } else { - altReads.add(aRead); - altAlignmentsToTest.add(true); - totalMismatchSum += mismatchScore; - aRead.setMismatchScoreToReference(mismatchScore); - } - } - // otherwise, we can emit it as is - else { - refReads.add(read); - } - } - - // build a map of position -> - Consensus bestConsensus = null; - - // for each alternative consensus to test, align it to the reference and create an alternative consensus - for ( int index = 0; index < altAlignmentsToTest.size(); index++ ) { - if ( altAlignmentsToTest.get(index) ) { - - // do a pairwise alignment against the reference - AlignedRead aRead = altReads.get(index); - SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getReadString()); - int refIdx = swConsensus.getAlignmentStart2wrt1(); - if ( refIdx < 0 ) - continue; - - // create the new consensus - StringBuffer sb = new StringBuffer(); - sb.append(reference.substring(0, refIdx)); - Cigar c = swConsensus.getCigar(); - logger.debug("CIGAR = " + cigarToString(c)); - - int indelCount = 0; - int altIdx = 0; - boolean ok_flag = true; - for ( int i = 0 ; i < c.numCigarElements() ; i++ ) { - CigarElement ce = c.getCigarElement(i); - switch( ce.getOperator() ) { - case D: - indelCount++; - refIdx += ce.getLength(); - break; - case M: - if ( reference.length() < refIdx+ce.getLength() ) - ok_flag = false; - else - sb.append(reference.substring(refIdx, refIdx+ce.getLength())); - refIdx += ce.getLength(); - altIdx += ce.getLength(); - break; - case I: - sb.append(aRead.getReadString().substring(altIdx, altIdx+ce.getLength())); - altIdx += ce.getLength(); - indelCount++; - break; - } - } - // make sure that there is at most only a single indel and it aligns appropriately! - if ( !ok_flag || indelCount > 1 || reference.length() < refIdx ) - continue; - - sb.append(reference.substring(refIdx)); - String altConsensus = sb.toString(); - - // for each imperfect match to the reference, score it against this alternative - Consensus consensus = new Consensus(altConsensus, c, swConsensus.getAlignmentStart2wrt1()); - for ( int j = 0; j < altReads.size(); j++ ) { - AlignedRead toTest = altReads.get(j); - Pair altAlignment = findBestOffset(altConsensus, toTest); - - // the mismatch score is the min of its alignment vs. the reference and vs. the alternate - int myScore = altAlignment.getSecond(); - if ( myScore >= toTest.getMismatchScoreToReference() ) - myScore = toTest.getMismatchScoreToReference(); - // keep track of reads that align better to the alternate consensus - else - consensus.readIndexes.add(new Pair(j, altAlignment.getFirst())); - - logger.debug(aRead.getReadString() + " vs. " + toTest.getReadString() + " => " + myScore + " - " + altAlignment.getFirst()); - consensus.mismatchSum += myScore; - if ( myScore == 0 ) - // we already know that this is its consensus, so don't bother testing it later - altAlignmentsToTest.set(j, false); - } - logger.debug(aRead.getReadString() + " " + consensus.mismatchSum); - if ( bestConsensus == null || bestConsensus.mismatchSum > consensus.mismatchSum) { - bestConsensus = consensus; - logger.debug(aRead.getReadString() + " " + consensus.mismatchSum); - } - } - } - System.out.println("Done with MSA"); -// // if the best alternate consensus has a smaller sum of quality score mismatches (more than the LOD threshold), then clean! -// if ( bestConsensus != null && ((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0 >= LOD_THRESHOLD ) { -// logger.debug("CLEAN: " + bestConsensus.str ); -// if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) { -// StringBuffer str = new StringBuffer(); -// str.append(reads.get(0).getReferenceName()); -// int position = bestConsensus.positionOnReference + bestConsensus.cigar.getCigarElement(0).getLength(); -// str.append(":" + (leftmostIndex + position)); -// CigarElement ce = bestConsensus.cigar.getCigarElement(1); -// str.append("\t" + ce.getLength() + ce.getOperator()); -// str.append("\t" + (((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0) + "\n"); -// try { -// indelOutput.write(str.toString()); -// indelOutput.flush(); -// } catch (Exception e) {} -// } -// -// // We need to update the mapping quality score of the cleaned reads; -// // however we don't have enough info to use the proper MAQ scoring system. -// // For now, we'll use a heuristic: -// // the mapping quality score is improved by the LOD difference in mismatching -// // bases between the reference and alternate consensus -// int improvement = (totalMismatchSum - bestConsensus.mismatchSum) / 10; -// -// // clean the appropriate reads -// for ( Pair indexPair : bestConsensus.readIndexes ) { -// AlignedRead aRead = altReads.get(indexPair.getFirst()); -// updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.getSecond(), aRead, (int)leftmostIndex); -// aRead.getRead().setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + improvement, 255)); -// aRead.getRead().setAttribute("NM", numMismatches(aRead.getRead(), reference, aRead.getRead().getAlignmentStart()-(int)leftmostIndex)); -// } -// } -// -// // write them out -// for ( SAMRecord rec : refReads ) -// readsToWrite.add(new ComparableSAMRecord(rec)); -// for ( AlignedRead aRec : altReads ) -// readsToWrite.add(new ComparableSAMRecord(aRec.getRead())); - } - - - - // ----------------------------------- PRIVATE IN IntervalCleanerWalker - public static final int MAX_QUAL = 99; - - private static class Mismatch { - AlignedRead aRead; - int offset; - long position; - char mismatchBase; - int qualityScore; - - private Mismatch(AlignedRead aRead, int offset, long position, char mismatchBase, int qualityScore) { - this.aRead = aRead; - this.offset = offset; - this.position = position; - this.mismatchBase = mismatchBase; - this.qualityScore = qualityScore; - } - } - - private static int mismatchQualitySum(AlignedRead aRead, String refSeq, int refIndex) { - return mismatchQualitySum(aRead, refSeq, refIndex, 0); - } - - private static int mismatchQualitySum(AlignedRead aRead, String refSeq, int refIndex, int minMismatchQualityScore) { - List mismatches = getMismatches(aRead, refSeq, refIndex); - int sum = 0; - for(Mismatch mm : mismatches) { - if (mm.qualityScore >= minMismatchQualityScore) { - sum += mm.qualityScore; - } - } - return sum; - - } - - - - - /** - * Returns a list of position - * @param aRead - * @param refSeq - * @param refIndex - * @param minMismatchQualityScore - * @return - */ - private static List getMismatches(AlignedRead aRead, String refSeq, int refIndex) { - List mismatches = new ArrayList(); - - String readSeq = aRead.getReadString(); - String quals = aRead.getBaseQualityString(); - int readIndex = 0; - int sum = 0; - Cigar c = aRead.getCigar(); - for (int i = 0 ; i < c.numCigarElements() ; i++) { - CigarElement ce = c.getCigarElement(i); - switch ( ce.getOperator() ) { - case M: - for (int j = 0 ; j < ce.getLength() ; j++, refIndex++, readIndex++ ) { - // FIXME: what is this case???? - if ( refIndex >= refSeq.length() ) { - sum += MAX_QUAL; - } else { - char readBase = Character.toUpperCase(readSeq.charAt(readIndex)); - char refBase = Character.toUpperCase(refSeq.charAt(refIndex)); - - if ( readBase != refBase ) { - int qual = (int)quals.charAt(readIndex) - 33; - mismatches.add(new Mismatch(aRead, readIndex, refIndex, readBase, qual)); - } - } - } - break; - case I: - readIndex += ce.getLength(); - break; - case D: - refIndex += ce.getLength(); - break; - } - - } - return mismatches; - } - - private Pair findBestOffset(String ref, AlignedRead read) { - int attempts = ref.length() - read.getReadLength() + 1; - int bestScore = mismatchQualitySum(read, ref, 0); - int bestIndex = 0; - for ( int i = 1; i < attempts; i++ ) { - // we can't get better than 0! - if ( bestScore == 0 ) - return new Pair(bestIndex, 0); - int score = mismatchQualitySum(read, ref, i); - if ( score < bestScore ) { - bestScore = score; - bestIndex = i; - } - } - return new Pair(bestIndex, bestScore); - } - - - private class AlignedRead { - SAMRecord read; - int mismatchScoreToReference; - - public AlignedRead(SAMRecord read) { - this.read = read; - mismatchScoreToReference = 0; - } - - public SAMRecord getRead() { - return read; - } - - public String getReadString() { - return read.getReadString(); - } - - public int getReadLength() { - return read.getReadLength(); - } - - public Cigar getCigar() { - return read.getCigar(); - } - - public void setCigar(Cigar cigar) { - read.setCigar(cigar); - } - - public String getBaseQualityString() { - return read.getBaseQualityString(); - } - - public void setMismatchScoreToReference(int score) { - mismatchScoreToReference = score; - } - - public int getMismatchScoreToReference() { - return mismatchScoreToReference; - } - } - - private class Consensus { - public String str; - public int mismatchSum; - public int positionOnReference; - public Cigar cigar; - public ArrayList> readIndexes; - - public Consensus(String str, Cigar cigar, int positionOnReference) { - this.str = str; - this.cigar = cigar; - this.positionOnReference = positionOnReference; - mismatchSum = 0; - readIndexes = new ArrayList>(); - } - } - - public static String cigarToString(Cigar cig) { - StringBuilder b = new StringBuilder(); - - for ( int i = 0 ; i < cig.numCigarElements() ; i++ ) { - char c='?'; - switch ( cig.getCigarElement(i).getOperator() ) { - case M : c = 'M'; break; - case D : c = 'D'; break; - case I : c = 'I'; break; - } - b.append(cig.getCigarElement(i).getLength()); - b.append(c); - } - return b.toString(); - } - -} \ No newline at end of file