moving to new external repository

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1261 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kcibul 2009-07-15 20:46:08 +00:00
parent 4a730adfc1
commit e1055bcc4c
5 changed files with 0 additions and 1506 deletions

View File

@ -1,164 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.cancer;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods;
import java.util.Map;
import java.util.HashMap;
import java.util.List;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: kcibul
* Date: Jun 27, 2009
* Time: 3:21:23 PM
* To change this template use File | Settings | File Templates.
*/
public class LOHWalker extends LocusWalker<Integer, Integer> {
@Argument(fullName = "tumor_sample_name", shortName = "s1", required = true, doc="Name of the tumor sample")
public String tumorSampleName;
@Argument(fullName = "normal_sample_name", shortName = "s2", required = true, doc="Name of the normal sample")
public String normalSampleName;
public Integer reduceInit() {
return null; //To change body of implemented methods use File | Settings | File Templates.
}
public Integer reduce(Integer value, Integer sum) {
return null; //To change body of implemented methods use File | Settings | File Templates.
}
@Override
public void initialize() {
out.println("chrom\tposition\tnormal_btnb\tallele1\tallele2\tnormal_allele1_count\tnormal_allele2_count\tnormal_mar\ttumor_allele1_count\ttumor_allele2_count\ttumor_mar");
}
public static int MAX_INSERT_SIZE = 10000;
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
char upRef = Character.toUpperCase(ref);
// TODO: should we be able to call mutations at bases where ref is N?
if (upRef == 'N') { return -1; }
List<SAMRecord> reads = context.getReads();
LocusReadPile tumorReadPile = new LocusReadPile(upRef);
LocusReadPile normalReadPile = new LocusReadPile(upRef);
for ( int i = 0; i < reads.size(); i++ )
{
SAMRecord read = reads.get(i);
if (read.getNotPrimaryAlignmentFlag() ||
read.getDuplicateReadFlag() ||
read.getReadUnmappedFlag() ||
read.getMappingQuality() <= 0
// || (read.getReadPairedFlag() && (!read.getProperPairFlag() || read.getInferredInsertSize() >= MAX_INSERT_SIZE))
) {
continue;
}
String rg = (String) read.getAttribute("RG");
String sample = read.getHeader().getReadGroup(rg).getSample();
int offset = context.getOffsets().get(i);
char base = read.getReadString().charAt(offset);
if (base == 'N' || base == 'n') { continue; }
// TODO: build a pile of reads and offsets, then pass that into a
// constructor for the normalGL class
// that way, we can build a different pile of reads later on and extract the genotype
if (normalSampleName.equals(sample)) {
normalReadPile.add(read, offset);
} else if (tumorSampleName.equals(sample)) {
tumorReadPile.add(read, offset);
} else {
throw new RuntimeException("Unknown Sample Name: " + sample);
}
}
normalReadPile.likelihoods.ApplyPrior(upRef,' ', -1); // apply std priors
double normalBtnb = normalReadPile.likelihoods.LodVsNextBest();
if (normalBtnb < 5.0) { return null; }
String normalGt = normalReadPile.likelihoods.BestGenotype();
char allele1 = normalGt.charAt(0);
char allele2 = normalGt.charAt(1);
if (allele2 == upRef) {
char oldAllele1 = allele1;
allele1 = allele2;
allele2 = oldAllele1;
}
if (allele1 == upRef && allele1 == allele2) {
return null;
}
StringBuilder sb = new StringBuilder();
sb.append(String.format("%s\t%d\t%1.2f\t%s\t%s\t",
context.getContig(),
context.getPosition(),
normalBtnb,
allele1,
allele2));
int normalAllele1Counts = normalReadPile.qualitySums.getCounts(allele1);
int normalAllele2Counts = normalReadPile.qualitySums.getCounts(allele2);
int tumorAllele1Counts = tumorReadPile.qualitySums.getCounts(allele1);
int tumorAllele2Counts = tumorReadPile.qualitySums.getCounts(allele2);
// allele ratio in the normal
double normalAlleleRatio =
(double) normalAllele2Counts / (double) (normalAllele1Counts+normalAllele2Counts) ;
// allele ratio in the tumor
double tumorAlleleRatio =
(double) tumorAllele2Counts / (double) (tumorAllele1Counts+tumorAllele2Counts) ;
sb.append(
String.format("%d\t%d\t%1.2f\t%d\t%d\t%1.2f",
normalAllele1Counts,
normalAllele2Counts,
normalAlleleRatio,
tumorAllele1Counts,
tumorAllele2Counts,
tumorAlleleRatio));
// // do the normal
// for (char allele : new char[]{'A','C','G','T'}) {
// QualitySums qs = normalReadPile.qualitySums;
// sb.append(" ");
// sb.append(qs.getCounts(allele)).append(" ");
//// sb.append(qs.getQualitySum(allele));
// }
//
// // do the tumor
// for (char allele : new char[]{'A','C','G','T'}) {
// QualitySums qs = tumorReadPile.qualitySums;
// sb.append(" ");
// sb.append(qs.getCounts(allele)).append(" ");
//// sb.append(qs.getQualitySum(allele));
// }
out.println(sb);
return null;
}
}

View File

@ -1,191 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.cancer;
import net.sf.samtools.SAMRecord;
import java.util.List;
import java.util.ArrayList;
import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods;
/**
* Created by IntelliJ IDEA.
* User: kcibul
* Date: Jun 27, 2009
* Time: 3:41:59 PM
* To change this template use File | Settings | File Templates.
*/
public class LocusReadPile {
public List<SAMRecord> reads = new ArrayList<SAMRecord>();
public List<Integer> offsets = new ArrayList<Integer>();
public char refBase;
public int minQualityScore;
public GenotypeLikelihoods likelihoods = new GenotypeLikelihoods();
public QualitySums qualitySums = new QualitySums();
public LocusReadPile(char refBase) {
this(refBase, 0);
}
public LocusReadPile(char refBase, int minQualityScore) {
this.refBase = refBase;
this.minQualityScore = minQualityScore;
}
public void add(SAMRecord read, int offset) {
add(read, offset, false);
}
public void add(SAMRecord read, int offset, boolean allowMapq0ForQualSum) {
char base = read.getReadString().charAt(offset);
byte qual = read.getBaseQualities()[offset];
if (base == 'N' || base == 'n') { return; }
if (read.getMappingQuality() > 0) {
reads.add(read);
offsets.add(offset);
likelihoods.add(refBase, base, qual);
}
if (read.getMappingQuality() == 0 && !allowMapq0ForQualSum) { return; }
if (qual > this.minQualityScore) qualitySums.incrementSum(base, qual);
}
public String getLocusBases() {
return getLocusBases(0);
}
public String getLocusBases(int locusOffset) {
StringBuilder sb = new StringBuilder();
for(int i=0; i<reads.size(); i++) {
SAMRecord read = reads.get(i);
int readOffset = offsets.get(i);
int offset = readOffset + locusOffset;
if (offset >= 0 && offset < read.getReadString().length()) {
char base = read.getReadString().charAt(offset);
sb.append(base);
}
}
return sb.toString();
}
public double getAltVsRef(char altAllele) {
double[] tumorRefAlt = extractRefAlt(this.likelihoods, this.refBase, altAllele);
double tumorLod = Math.log10(tumorRefAlt[1] + tumorRefAlt[2]) - tumorRefAlt[0];
return tumorLod;
}
public double getRefVsAlt(char altAllele) {
double[] refAlt = extractRefAlt(this.likelihoods, this.refBase, altAllele);
double normalLod = refAlt[0] - Math.log10(refAlt[1] + refAlt[2]);
return normalLod;
}
/**
* Extract the LOD comparing ref:ref to ref:alt and alt:alt
*/
private double[] extractRefAlt(GenotypeLikelihoods gl, char ref, char altAllele) {
double refRef = 0;
double altRef = 0;
double altAlt = 0;
for(int j=0; j<10; j++) {
String gt = gl.genotypes[j];
double likelihood = gl.likelihoods[j];
// the ref:mutant theory
if ( (gt.charAt(0) == ref && gt.charAt(1) == altAllele) ||
(gt.charAt(0) == altAllele && gt.charAt(1) == ref) ) {
altRef += Math.pow(10, likelihood);
}
if ( gt.charAt(0) == altAllele && gt.charAt(1) == altAllele) {
altAlt += Math.pow(10, likelihood);
}
if ( gt.charAt(0) == ref && gt.charAt(1) == ref) {
refRef = likelihood;
}
}
return new double[]{refRef, altRef, altAlt};
}
private GenotypeLikelihoods getLikelihood(int locusOffset) {
GenotypeLikelihoods likelihoods = new GenotypeLikelihoods();
for(int i=0; i<reads.size(); i++) {
SAMRecord read = reads.get(i);
int readOffset = offsets.get(i);
int offset = readOffset + locusOffset;
if (offset >= 0 && offset < read.getReadString().length()) {
char base = read.getReadString().charAt(offset);
byte qual = read.getBaseQualities()[offset];
likelihoods.add(refBase, base, qual);
}
}
return likelihoods;
}
public double[] getNormalizedProbs(int locusOffset, int qThreshold) {
GenotypeLikelihoods likelihoods = new GenotypeLikelihoods();
// FIXME: gaddy suggested this "correction" which evidently has a name...
//likelihoods.add(refBase, refBase, (byte) 20);
for(int i=0; i<reads.size(); i++) {
SAMRecord read = reads.get(i);
int readOffset = offsets.get(i);
int offset = readOffset + locusOffset;
if (offset >= 0 && offset < read.getReadString().length()) {
char base = read.getReadString().charAt(offset);
byte qual = read.getBaseQualities()[offset];
if (qual >= qThreshold) {
likelihoods.add(refBase, base, qual);
// if (locusOffset == -43) System.out.println("\tUSING " + base + " " + qual);
} else {
// if (locusOffset == -43) System.out.println("\tDropping " + base + " " + qual);
}
}
}
double[] logLikelihood = likelihoods.likelihoods;
double[] nonLogLikelihood = new double[10];
double sum = 0;
for(int i=0; i<10; i++) {
nonLogLikelihood[i] = Math.pow(10, logLikelihood[i]);
sum += nonLogLikelihood[i];
}
double[] normalizedProbs = new double[10];
for(int i=0; i<10; i++) {
normalizedProbs[i] = nonLogLikelihood[i] / sum;
}
//quick sanity check
// sum=0;
// for(int i=0; i<10; i++) {
// sum += normalizedProbs[i];
// }
// System.out.println("normalized probs = " + sum);
return normalizedProbs;
}
}

View File

@ -1,58 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.cancer;
/**
* Created by IntelliJ IDEA.
* User: kcibul
* Date: Jun 27, 2009
* Time: 3:43:29 PM
* To change this template use File | Settings | File Templates.
*/
public class QualitySums {
private int a = 0;
private int c = 0;
private int g = 0;
private int t = 0;
private int aCounts = 0;
private int cCounts = 0;
private int gCounts = 0;
private int tCounts = 0;
public int getQualitySum(final char base) {
if (base == 'a' || base == 'A') { return a; }
if (base == 'c' || base == 'C') { return c; }
if (base == 'g' || base == 'G') { return g; }
if (base == 't' || base == 'T') { return t; }
throw new RuntimeException("Unknown base: " + base);
}
public int getCounts(final char base) {
if (base == 'a' || base == 'A') { return aCounts; }
if (base == 'c' || base == 'C') { return cCounts; }
if (base == 'g' || base == 'G') { return gCounts; }
if (base == 't' || base == 'T') { return tCounts; }
throw new RuntimeException("Unknown base: " + base);
}
public void incrementSum(final char base, final byte qual) {
if (base == 'a' || base == 'A') { a += qual; aCounts++;}
else if (base == 'c' || base == 'C') { c += qual; cCounts++;}
else if (base == 'g' || base == 'G') { g += qual; gCounts++; }
else if (base == 't' || base == 'T') { t += qual; tCounts++; }
else throw new RuntimeException("Unknown base: " + base);
}
public int getOtherQualities(final char base) {
int total = a + c + g + t;
if (base == 'a' || base == 'A') { return total-a; }
else if (base == 'c' || base == 'C') { return total-c; }
else if (base == 'g' || base == 'G') { return total-g; }
else if (base == 't' || base == 'T') { return total-t; }
else throw new RuntimeException("Unknown base: " + base);
}
public void reset() {
a = 0; c = 0; g = 0; t = 0;
aCounts = 0; cCounts = 0; gCounts = 0; tCounts = 0;
}
}

View File

@ -1,167 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.cancer;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import java.util.List;
import java.util.Formatter;
import static java.lang.Math.log10;
import edu.mit.broad.picard.genotype.DiploidGenotype;
import net.sf.picard.PicardException;
public class SomaticCoverageWalker extends LocusWalker<Integer, Integer> {
@Argument(fullName = "tumor_sample_name", shortName = "s1", doc="tumor sample name", required = true)
public String tumorSampleName;
@Argument(fullName = "normal_sample_name", shortName = "s2", doc="normal sample name", required = true)
public String normalSampleName;
@Argument(fullName = "extended", shortName="ext", doc="extended output", required=false)
public boolean extendedOutput = false;
// --normal_sample_name TCGA-06-0188-10B-01W --tumor_sample_name TCGA-06-0188-01A-01W
public void initialize() {
out.println("track type=wiggle_0 name=SomaticCoverage viewLimits=0:1");
}
public String walkerType() { return "ByLocus"; }
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) {
return true; // We are keeping all the reads
}
// Map over the org.broadinstitute.sting.gatk.LocusContext
int MAPPING_QUALITY_THRESHOLD = 1;
int totalSites;
int tumorCovered;
int normalCovered;
int somaticCovered;
long start = 0;
int lastContigIndex = -1;
long lastPosition = -1;
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
if (start ==0) { start = System.currentTimeMillis(); }
List<SAMRecord> reads = context.getReads();
int tumorDepth = 0;
int normalDepth = 0;
StringBuilder readNames = new StringBuilder();
for ( int i = 0; i < reads.size(); i++ )
{
SAMRecord read = reads.get(i);
// TODO: could this be done better elsewhere?
// only process primary, non duplicate alignments
// that come from fully mapped pairs with a mappign quality threshold >= x
if (read.getNotPrimaryAlignmentFlag() ||
read.getDuplicateReadFlag() ||
read.getReadUnmappedFlag() ||
read.getMappingQuality() < MAPPING_QUALITY_THRESHOLD
// ||
// read.getMateUnmappedFlag() ||
) {
continue;
}
String rg = (String) read.getAttribute("RG");
SAMFileHeader header = read.getHeader();
SAMReadGroupRecord readGroup = header.getReadGroup(rg);
if (readGroup == null) {
err.println("WARNING: read " + read.getReadName() + " belongs to read group " + rg + " which isn't in the header!");
continue;
}
String sample = readGroup.getSample();
if (normalSampleName.equals(sample)) {
normalDepth++;
} else if (tumorSampleName.equals(sample)) {
tumorDepth++;
} else {
throw new RuntimeException("Unknown Sample Name: " + sample);
}
readNames.append(read.getReadName()).append("+").append(read.getAlignmentStart()).append("+").append(read.getCigarString());
}
boolean isTumorCovered = tumorDepth >= 14;
boolean isNormalCovered = normalDepth >= 8;
if (isTumorCovered) { tumorCovered++; }
if (isNormalCovered) { normalCovered++; }
if (isTumorCovered && isNormalCovered) {somaticCovered++; }
totalSites++;
// if (totalSites % 100000 == 0) {
// long now = System.currentTimeMillis();
// out.println(String.format("%s:%d %d %d %d %d %dms", context.getContig(), context.getPosition(), totalSites, tumorCovered, normalCovered, somaticCovered, (now-start)));
// }
// if the contig index has changed OR if it's the same contig index but we jumped positions
// output a wiggle header
StringBuilder sb = new StringBuilder();
if (lastContigIndex != context.getLocation().getContigIndex() ||
lastPosition + 1 != context.getPosition()) {
lastContigIndex = context.getLocation().getContigIndex();
sb.append("fixedStep").append(" ")
.append("chrom=").append(context.getContig()).append(" ")
.append("start=").append(context.getPosition()).append(" ")
.append("step=1")
.append("\n");
}
lastPosition = context.getPosition();
if (extendedOutput) {
sb.append(context.getPosition()).append(" ");
}
sb.append((isTumorCovered && isNormalCovered)?"1":"0");
if (extendedOutput) {
sb.append(" ").append(tumorDepth).append(" ").append(normalDepth).append(" ").append(readNames);
}
// sb.append(context.getContig()).append(" ");
// sb.append(context.getPosition()).append(" ");
// sb.append(tumorDepth).append(" ");
// sb.append(normalDepth).append(" ");
// sb.append((isTumorCovered && isNormalCovered)?"Y":"N");
out.println(sb.toString());
return 1;
}
// Given result of map function
public Integer reduceInit() {
return 0;
}
public Integer reduce(Integer value, Integer sum) {
return value + sum;
}
@Override
public void onTraversalDone(Integer result) {
// out.println(String.format("FINAL - %d %d %d %d", totalSites, tumorCovered, normalCovered, somaticCovered));
}
}

View File

@ -1,926 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.cancer;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.picard.PicardException;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.StringUtil;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.playground.indels.SWPairwiseAlignment;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import java.io.IOException;
import java.io.File;
import java.io.FileWriter;
import java.io.BufferedWriter;
import java.util.*;
@By(DataSource.REFERENCE)
public class SomaticMutationWalker extends LocusWalker<Integer, Integer> {
protected enum MutantFailureReason {
StrandImbalance,
Misalignment
}
@Argument(fullName = "tumor_sample_name", shortName = "s1", required = true, doc="Name of the tumor sample")
public String tumorSampleName;
@Argument(fullName = "normal_sample_name", shortName = "s2", required = true, doc="Name of the normal sample")
public String normalSampleName;
@Argument(fullName = "tumor_lod", required = false, doc = "LOD threshold for calling tumor variant")
public float TUMOR_LOD_THRESHOLD = 6.3f;
@Argument(fullName = "normal_lod", required = false, doc = "LOD threshold for calling normal non-variant")
public float NORMAL_LOD_THRESHOLD = 2.3f;
@Argument(fullName = "min_mutant_sum", required = false, doc = "threshold for sum of mutant allele quality scores")
public int MIN_MUTANT_SUM = 100;
public float SKEW_LOD_THRESHOLD = 1.5f;
@Argument(fullName = "output_failures", required = false, doc="produce output for failed sites")
public boolean OUTPUT_FAILURES = false;
@Argument(fullName="maf_file", shortName="maf", doc="write out calls in MAF format to this file", required=false)
public String MAF_FILE = null;
public boolean USE_MAPQ0_IN_NORMAL_QSCORE = true;
private FileWriter mafWriter = null;
public void initialize() {
if ( MAF_FILE != null ) {
try {
mafWriter = new FileWriter(new File(MAF_FILE));
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
}
public String walkerType() { return "ByLocus"; }
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) {
return true; // We are keeping all the reads
}
public static int MAX_INSERT_SIZE = 10000;
public static int MIN_MUTANT_SUM_PRETEST = 60;
public static int MIN_QSCORE = 13;
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
Map<Character, Integer> midp = new HashMap<Character, Integer>();
midp.put('A', Integer.MAX_VALUE);
midp.put('C', Integer.MAX_VALUE);
midp.put('G', Integer.MAX_VALUE);
midp.put('T', Integer.MAX_VALUE);
// First, exclude DBSNP Sites
for ( ReferenceOrderedDatum datum : tracker.getAllRods() )
{
if ( datum != null )
{
if ( datum instanceof rodDbSNP) {
// we're at a dbsnp site... move along... nothing to see here...
return -1;
}
}
}
char upRef = Character.toUpperCase(ref);
// TODO: should we be able to call mutations at bases where ref is N?
if (upRef == 'N') { return -1; }
List<SAMRecord> reads = context.getReads();
LocusReadPile tumorReadPile = new LocusReadPile(upRef,MIN_QSCORE);
LocusReadPile normalReadPile = new LocusReadPile(upRef,MIN_QSCORE);
for ( int i = 0; i < reads.size(); i++ )
{
SAMRecord read = reads.get(i);
if (read.getNotPrimaryAlignmentFlag() ||
read.getDuplicateReadFlag() ||
read.getReadUnmappedFlag()
|| (read.getReadPairedFlag() && (!read.getProperPairFlag() || read.getInferredInsertSize() >= MAX_INSERT_SIZE))
) {
continue;
}
String rg = (String) read.getAttribute("RG");
String sample = read.getHeader().getReadGroup(rg).getSample();
int offset = context.getOffsets().get(i);
char base = read.getReadString().charAt(offset);
if (base == 'N' || base == 'n') { continue; }
// TODO: build a pile of reads and offsets, then pass that into a
// constructor for the normalGL class
// that way, we can build a different pile of reads later on and extract the genotype
if (normalSampleName.equals(sample)) {
normalReadPile.add(read, offset, USE_MAPQ0_IN_NORMAL_QSCORE);
} else if (tumorSampleName.equals(sample)) {
if (read.getMappingQuality() > 0) {
tumorReadPile.add(read, offset);
}
int midDist = Math.abs((int)(read.getReadLength() / 2) - offset);
if (midDist < midp.get(base)) { midp.put(base, midDist); }
} else {
throw new RuntimeException("Unknown Sample Name: " + sample);
}
}
// pretest: if the sum of the quality scores for all non-ref alleles < 60, just quit looking now
if (tumorReadPile.qualitySums.getOtherQualities(upRef) < MIN_MUTANT_SUM_PRETEST) {
return -1;
}
// Test each of the poosible alternate alleles
for (char altAllele : new char[]{'A','C','G','T'}) {
if (altAllele == upRef) { continue; }
// (i) either an adjusted quality score sum in the tumor for the mutant base must be
// at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must
// be at least 6.3;
int mutantSum = tumorReadPile.qualitySums.getQualitySum(altAllele);
int refSum = tumorReadPile.qualitySums.getQualitySum(upRef);
if (tumorReadPile.getAltVsRef(altAllele) >= TUMOR_LOD_THRESHOLD
// ||
// (mutantSum >= MIN_MUTANT_SUM && (float)mutantSum / (float) refSum >= 0.05f)
) {
// yep -- just fall through... easier to think about this way!
} else {
continue;
}
// (ii) the quality score sum for the mutant base in the normal must be < 50 and the
// LOD score for ref:ref vs mutant:ref + mutant:mutant must be at least 2.3.
double normalLod = normalReadPile.getRefVsAlt(altAllele);
QualitySums normQs = normalReadPile.qualitySums;
if ( (normQs.getCounts(altAllele) >= 2 && normQs.getQualitySum(altAllele) > 20)
|| normalLod < NORMAL_LOD_THRESHOLD) {
continue;
}
// make sure we've seen at least 1 obs of the alternate allele within 20bp of the read-middle
boolean failedMidpointCheck = midp.get(altAllele) > 20;
// if (failedMidpointCheck) {
// out.println("Rejecting due to midpoint check!");
// return 0;
// }
// do a MSA to figure out if the alternate reads comes from a cluster of reads with more
// than one alternate allele (hints at being an alignment artifact)
ReferenceSequence refSeq;
// TODO: don't hardcode. Make this the max read length in the pile
long refStart = context.getLocation().getStart() - 150;
//tumorReadPile.offsets.get(0);
long refStop = context.getLocation().getStart() + 150;
try {
IndexedFastaSequenceFile seqFile = new IndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
refSeq = seqFile.getSubsequenceAt(context.getContig(),refStart, refStop);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
// System.out.println("TESTING " + context.getContig() + ":" + context.getPosition());
LocusReadPile t2 = filterHighMismatchScoreReads(tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart);
// TEST the LOD score again!
// TODO: extract this since we'll do it multiple times...
// (i) either an adjusted quality score sum in the tumor for the mutant base must be
// at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must
// be at least 6.3;
double tumorLod = t2.getAltVsRef(altAllele);
if (t2.qualitySums.getQualitySum(altAllele) < MIN_MUTANT_SUM && tumorLod < TUMOR_LOD_THRESHOLD) {
if (OUTPUT_FAILURES) {
String msg = "FAILED due to MAX MM QSCORE TEST." +
" LOD was " + tumorReadPile.getAltVsRef(altAllele) +
" LOD is now " + t2.getAltVsRef(altAllele) +
" QSUM was " + tumorReadPile.qualitySums.getQualitySum(altAllele) +
" QSUM is now " + t2.qualitySums.getQualitySum(altAllele);
out.println(
context.getContig() + "\t" +
context.getPosition() + "\t" +
context.getPosition() + "\t"
+ msg.replace(' ','_')
);
}
continue;
}
// // TODO: using the original pile here since often these artifacts will be supported
// // by those reads that get thrown out! Maybe that means we don't need the noise filter...
// boolean shouldDisalign =
// disaligner(context.getPosition(), tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart);
Pair<MutantFailureReason,Double> failureReason =
readPileSkew(t2, upRef, altAllele, StringUtil.bytesToString(refSeq.getBases()), refStart);
// time for the big guns... the realignment!
// try {
// File f = File.createTempFile("/tmp","RANDOM");
// BufferedWriter writer = new BufferedWriter(new FileWriter(f));
// for(SAMRecord read : t2.reads) {
// writer.write(convertToFastq(read));
// }
// out.println("Finished writing realignment to " + f.getCanonicalPath());
// writer.flush();
// writer.close();
// } catch (IOException ioe) {
// throw new PicardException("Error performing realignment", ioe);
// }
if (failureReason.first != null) {
if (OUTPUT_FAILURES) {
String msg = "FAILED due to " + failureReason.first.name() +
" mutAllele " + altAllele + "_" +
" maxSkewLod " + failureReason.second
;
out.println(
context.getContig() + "\t" +
context.getPosition() + "\t" +
context.getPosition() + "\t" +
msg.replace(' ','_')
);
}
continue;
}
// if we're still here... we've got a somatic mutation! Output the results
// and stop looking for mutants!
writeMafLine(
"36\t" + //build
context.getContig() + "\t" +
context.getPosition() + "\t" +
context.getPosition() + "\t" +
upRef + "\t" +
upRef + "\t" +
altAllele + "\t" +
tumorSampleName + "\t" +
normalSampleName);
String msg =
(failedMidpointCheck?"__FAILED-MPCHECK":"") +
"TScore:" + tumorLod +
"__TRefSum: " + tumorReadPile.qualitySums.getQualitySum(upRef) +
"__TAltSum: " + tumorReadPile.qualitySums.getQualitySum(altAllele) +
"__NScore:" + normalLod +
"__NRefSum: " + normalReadPile.qualitySums.getQualitySum(upRef) +
"__NAltSum: " + normalReadPile.qualitySums.getQualitySum(altAllele) +
"__maxSkewLod_" + failureReason.second + "_" +
"__MIDP: " + midp.get(altAllele);
out.println(
context.getContig() + "\t" +
context.getPosition() + "\t" +
context.getPosition() + "\t"
+ msg.replace(' ','_')
);
return 1;
}
return -1;
}
private void writeMafLine(String s) {
if (mafWriter != null) {
try {
mafWriter.write(s);
mafWriter.write("\n");
mafWriter.flush();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
}
private String convertToFastq(SAMRecord read) {
// output FASTQ @<seqname>\n<seq>\n+[<seqname>]\n<qual>\n
StringBuilder sb = new StringBuilder();
sb.append("@").append(read.getReadName()).append("\n");
sb.append(read.getReadString()).append("\n");
sb.append("+").append("\n");
sb.append(read.getBaseQualityString()).append("\n");
return sb.toString();
}
// Given result of map function
public Integer reduceInit() {
return 0;
}
public Integer reduce(Integer value, Integer sum) {
return value + sum;
}
@Override
public void onTraversalDone(Integer result) {
if (mafWriter != null) {
try {
mafWriter.flush();
mafWriter.close();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
}
private Pair<MutantFailureReason, Double> readPileSkew(LocusReadPile pile, char refAllele, char mutantAllele, String reference, long leftmostIndex) {
// first split into two piles, those supporting the mutant and those not
LocusReadPile mutantPile = new LocusReadPile(mutantAllele);
LocusReadPile refPile = new LocusReadPile(mutantAllele);
for (int i=0; i<pile.reads.size(); i++) {
SAMRecord read = pile.reads.get(i);
int offset = pile.offsets.get(i);
if (read.getReadString().charAt(offset) == mutantAllele) {
mutantPile.add(read, offset);
} else if (read.getReadString().charAt(offset) == refAllele) {
refPile.add(read, offset);
} else {
// just drop the read...
}
}
// now we can ask questons abot the two piles.
// e.g. is the mutant allele seen in both strands?
boolean seenOnPositive = false;
boolean seenOnNegative = false;
for(SAMRecord read : mutantPile.reads) {
if (read.getReadNegativeStrandFlag()) { seenOnNegative = true; } else { seenOnPositive = true; }
}
if (!seenOnPositive || !seenOnNegative) {
return new Pair<MutantFailureReason, Double>(MutantFailureReason.StrandImbalance, 0d);
}
//chr17:4979257
// are the two alleles distributed differently
//fixme: this seems to degrade as you lose reads?
//fixme: what should the threshold be here?
SortedMap<Integer, Double> skewLodOffsets = new TreeMap<Integer, Double>();
double maxSkewLod = 0;
//fixme: calculate this range properly
int MAX_OFFSET_DISTANCE = 60;
for(int offset=-1 * MAX_OFFSET_DISTANCE; offset<MAX_OFFSET_DISTANCE; offset++) {
// allow for doubletons
if (offset >= -1 && offset <= 1 ) { continue; }
int SKEW_QSCORE_THRESHOLD = 10;
double[] mutantNormProbs = mutantPile.getNormalizedProbs(offset, SKEW_QSCORE_THRESHOLD);
double[] otherNormProbs = refPile.getNormalizedProbs(offset, SKEW_QSCORE_THRESHOLD);
double J = 0;
for(int i=0; i<10; i++) {
J += mutantNormProbs[i] * otherNormProbs[i];
}
double skewLod = Math.log10( (1-J) / J);
// fixme: is it legit to require that we see it in at least 2 reads each?
int mutantReadCounts = mutantPile.getLocusBases(offset).length();
int otherReadCounts = refPile.getLocusBases(offset).length();
if (mutantReadCounts >= 2 && otherReadCounts >= 2) {
if (skewLod > maxSkewLod) { maxSkewLod = skewLod; }
if (skewLod > SKEW_LOD_THRESHOLD) {
// System.out.println( "Offset: " + offset +
// " mutant_reads: " + mutantReadCounts +
// " mutant bases: " + mutantPile.getLocusBases(offset) +
// " other_reads: " + otherReadCounts +
// " other bases: " + refPile.getLocusBases(offset) +
// " skewLod:" + skewLod );
skewLodOffsets.put(offset, skewLod);
}
}
}
if (skewLodOffsets.size() > 0) {
return new Pair<MutantFailureReason, Double>(MutantFailureReason.Misalignment, maxSkewLod);
} else {
return new Pair<MutantFailureReason, Double>(null, maxSkewLod);
// System.out.println("MAX SKEW: " + maxSkewLod);
}
}
int MAX_READ_MISMATCH_QUALITY_SCORE_SUM = 100;
private LocusReadPile filterHighMismatchScoreReads(LocusReadPile pile, String reference, long leftmostIndex) {
LocusReadPile newPile = new LocusReadPile(pile.refBase);
for ( int i=0; i< pile.reads.size(); i++) {
SAMRecord read = pile.reads.get(i);
int offset = pile.offsets.get(i);
AlignedRead aRead = new AlignedRead(read);
int mismatchScore = mismatchQualitySum(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex);
if (mismatchScore <= MAX_READ_MISMATCH_QUALITY_SCORE_SUM) {
newPile.add(read, offset);
} else {
char base = read.getReadString().charAt(offset);
// TODO: use a logger to output this information for debugging/Picard folks
//out.println("MMQSUM: Filtering out " + read.getReadName() + " with locus base " + base + " and ref of " + pile.refBase);
}
}
return newPile;
}
// also pass in the variant position
private boolean disaligner(long mutationLocus, LocusReadPile pile, String reference, long leftmostIndex) {
int VARIANT_SITE_MIN_QSCORE = 40; // ie 2x20
// build a 2d array of read # vs position
// Map<Long, Map<Character, List<Mismatch>>> matrix = new TreeMap<Long, Map<Character, List<Mismatch>>>();
int mutationLocusOffset = (int)(mutationLocus - leftmostIndex);
int[][] mmm = new int[pile.reads.size()][reference.length()];
// for every read which has an offset
for ( int i=0; i< pile.reads.size(); i++) {
SAMRecord read = pile.reads.get(i);
int offset = pile.offsets.get(i);
AlignedRead aRead = new AlignedRead(read);
List<Mismatch> mismatches = getMismatches(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex);
// now fill in with -1 where we had reference
// TODO: deal with other CIGAR strings?
for(long j=read.getAlignmentStart(); j <= read.getAlignmentEnd(); j++) {
mmm[i][(int)( j - leftmostIndex )] = -1;
}
for(Mismatch mm : mismatches) {
if (mm.mismatchBase == 'N') { continue; }
mmm[i][(int)mm.position] = mm.qualityScore;
}
}
// traverse in column-order
int[] altSums = new int[mmm[0].length];
for(int j=0; j<altSums.length; j++) {
for(int i=0; i<mmm.length; i++) {
if (mmm[i][j] > 0) altSums[j] += mmm[i][j];
}
}
// now check that we have at least 3 alignment artifacts, and that
// they are at least 5bp from eachother
// FIXME: what should these numbers be?
int varSites = 0;
int lastVarSite = -1;
for(int j=0; j<altSums.length; j++) {
if (altSums[j] >= VARIANT_SITE_MIN_QSCORE) {
if (lastVarSite > -1) {
if (j - lastVarSite > 5) {
varSites++;
lastVarSite = j;
}
}
}
}
if (varSites < 3) { return false; }
// now go through each read which has the alternate allele at the
// current site (the potential somatic mutation) and see what fraction of
// the "covered" variant bases were called as variant
Map<Integer, Float> fractionVariantSites = new HashMap<Integer, Float>();
for(int i=0; i<mmm.length; i++) {
// does this read have the variant allele?
if (mmm[i][mutationLocusOffset] > 0) {
float calledSites = 0;
float variantSites = 0;
for(int j=0; j<altSums.length; j++) {
if (altSums[j] > VARIANT_SITE_MIN_QSCORE && j != mutationLocusOffset) {
if (mmm[i][j] != 0) { calledSites++; }
if (mmm[i][j] > 0) { variantSites++; }
}
}
fractionVariantSites.put(i, variantSites / calledSites);
}
}
// FIXME: what's the right statistic to use here?
if (getMedian(fractionVariantSites.values()) >= 0.75) {
return true;
}
return false;
}
protected float getMedian(Collection<Float> in) {
float[] data = new float[in.size()];
int i=0;
for(float f : in) { data[i++] = f; }
Arrays.sort(data);
int middle = data.length/2; // subscript of middle element
if (data.length%2 == 1) {
return data[middle];
} else {
// Even number -- return average of middle two
// Must cast the numbers to double before dividing.
return (data[middle-1] + data[middle]) / 2.0f;
}
}
private void clean(LocusReadPile pile, String reference, long leftmostIndex) {
// LocusReadPile newPile = new LocusReadPile(pile.refBase);
ArrayList<SAMRecord> refReads = new ArrayList<SAMRecord>();
ArrayList<AlignedRead> altReads = new ArrayList<AlignedRead>();
ArrayList<Boolean> altAlignmentsToTest = new ArrayList<Boolean>();
int totalMismatchSum = 0;
int MIN_MISMATCH_QSCORE = 10;
// decide which reads potentially need to be cleaned
for ( int i=0; i< pile.reads.size(); i++) {
SAMRecord read = pile.reads.get(i);
int offset = pile.offsets.get(i);
AlignedRead aRead = new AlignedRead(read);
int mismatchScore = mismatchQualitySum(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex, MIN_MISMATCH_QSCORE);
// if this doesn't match perfectly to the reference, let's try to clean it
if ( mismatchScore > 0 ) {
if ( mismatchScore > 100) {
out.println(read.getReadName() + " has a sum of mismatch quality scores of " + mismatchScore);
} else {
altReads.add(aRead);
altAlignmentsToTest.add(true);
totalMismatchSum += mismatchScore;
aRead.setMismatchScoreToReference(mismatchScore);
}
}
// otherwise, we can emit it as is
else {
refReads.add(read);
}
}
// build a map of position ->
Consensus bestConsensus = null;
// for each alternative consensus to test, align it to the reference and create an alternative consensus
for ( int index = 0; index < altAlignmentsToTest.size(); index++ ) {
if ( altAlignmentsToTest.get(index) ) {
// do a pairwise alignment against the reference
AlignedRead aRead = altReads.get(index);
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getReadString());
int refIdx = swConsensus.getAlignmentStart2wrt1();
if ( refIdx < 0 )
continue;
// create the new consensus
StringBuffer sb = new StringBuffer();
sb.append(reference.substring(0, refIdx));
Cigar c = swConsensus.getCigar();
logger.debug("CIGAR = " + cigarToString(c));
int indelCount = 0;
int altIdx = 0;
boolean ok_flag = true;
for ( int i = 0 ; i < c.numCigarElements() ; i++ ) {
CigarElement ce = c.getCigarElement(i);
switch( ce.getOperator() ) {
case D:
indelCount++;
refIdx += ce.getLength();
break;
case M:
if ( reference.length() < refIdx+ce.getLength() )
ok_flag = false;
else
sb.append(reference.substring(refIdx, refIdx+ce.getLength()));
refIdx += ce.getLength();
altIdx += ce.getLength();
break;
case I:
sb.append(aRead.getReadString().substring(altIdx, altIdx+ce.getLength()));
altIdx += ce.getLength();
indelCount++;
break;
}
}
// make sure that there is at most only a single indel and it aligns appropriately!
if ( !ok_flag || indelCount > 1 || reference.length() < refIdx )
continue;
sb.append(reference.substring(refIdx));
String altConsensus = sb.toString();
// for each imperfect match to the reference, score it against this alternative
Consensus consensus = new Consensus(altConsensus, c, swConsensus.getAlignmentStart2wrt1());
for ( int j = 0; j < altReads.size(); j++ ) {
AlignedRead toTest = altReads.get(j);
Pair<Integer, Integer> altAlignment = findBestOffset(altConsensus, toTest);
// the mismatch score is the min of its alignment vs. the reference and vs. the alternate
int myScore = altAlignment.getSecond();
if ( myScore >= toTest.getMismatchScoreToReference() )
myScore = toTest.getMismatchScoreToReference();
// keep track of reads that align better to the alternate consensus
else
consensus.readIndexes.add(new Pair<Integer, Integer>(j, altAlignment.getFirst()));
logger.debug(aRead.getReadString() + " vs. " + toTest.getReadString() + " => " + myScore + " - " + altAlignment.getFirst());
consensus.mismatchSum += myScore;
if ( myScore == 0 )
// we already know that this is its consensus, so don't bother testing it later
altAlignmentsToTest.set(j, false);
}
logger.debug(aRead.getReadString() + " " + consensus.mismatchSum);
if ( bestConsensus == null || bestConsensus.mismatchSum > consensus.mismatchSum) {
bestConsensus = consensus;
logger.debug(aRead.getReadString() + " " + consensus.mismatchSum);
}
}
}
System.out.println("Done with MSA");
// // if the best alternate consensus has a smaller sum of quality score mismatches (more than the LOD threshold), then clean!
// if ( bestConsensus != null && ((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0 >= LOD_THRESHOLD ) {
// logger.debug("CLEAN: " + bestConsensus.str );
// if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) {
// StringBuffer str = new StringBuffer();
// str.append(reads.get(0).getReferenceName());
// int position = bestConsensus.positionOnReference + bestConsensus.cigar.getCigarElement(0).getLength();
// str.append(":" + (leftmostIndex + position));
// CigarElement ce = bestConsensus.cigar.getCigarElement(1);
// str.append("\t" + ce.getLength() + ce.getOperator());
// str.append("\t" + (((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0) + "\n");
// try {
// indelOutput.write(str.toString());
// indelOutput.flush();
// } catch (Exception e) {}
// }
//
// // We need to update the mapping quality score of the cleaned reads;
// // however we don't have enough info to use the proper MAQ scoring system.
// // For now, we'll use a heuristic:
// // the mapping quality score is improved by the LOD difference in mismatching
// // bases between the reference and alternate consensus
// int improvement = (totalMismatchSum - bestConsensus.mismatchSum) / 10;
//
// // clean the appropriate reads
// for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes ) {
// AlignedRead aRead = altReads.get(indexPair.getFirst());
// updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.getSecond(), aRead, (int)leftmostIndex);
// aRead.getRead().setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + improvement, 255));
// aRead.getRead().setAttribute("NM", numMismatches(aRead.getRead(), reference, aRead.getRead().getAlignmentStart()-(int)leftmostIndex));
// }
// }
//
// // write them out
// for ( SAMRecord rec : refReads )
// readsToWrite.add(new ComparableSAMRecord(rec));
// for ( AlignedRead aRec : altReads )
// readsToWrite.add(new ComparableSAMRecord(aRec.getRead()));
}
// ----------------------------------- PRIVATE IN IntervalCleanerWalker
public static final int MAX_QUAL = 99;
private static class Mismatch {
AlignedRead aRead;
int offset;
long position;
char mismatchBase;
int qualityScore;
private Mismatch(AlignedRead aRead, int offset, long position, char mismatchBase, int qualityScore) {
this.aRead = aRead;
this.offset = offset;
this.position = position;
this.mismatchBase = mismatchBase;
this.qualityScore = qualityScore;
}
}
private static int mismatchQualitySum(AlignedRead aRead, String refSeq, int refIndex) {
return mismatchQualitySum(aRead, refSeq, refIndex, 0);
}
private static int mismatchQualitySum(AlignedRead aRead, String refSeq, int refIndex, int minMismatchQualityScore) {
List<Mismatch> mismatches = getMismatches(aRead, refSeq, refIndex);
int sum = 0;
for(Mismatch mm : mismatches) {
if (mm.qualityScore >= minMismatchQualityScore) {
sum += mm.qualityScore;
}
}
return sum;
}
/**
* Returns a list of position
* @param aRead
* @param refSeq
* @param refIndex
* @param minMismatchQualityScore
* @return
*/
private static List<Mismatch> getMismatches(AlignedRead aRead, String refSeq, int refIndex) {
List<Mismatch> mismatches = new ArrayList<Mismatch>();
String readSeq = aRead.getReadString();
String quals = aRead.getBaseQualityString();
int readIndex = 0;
int sum = 0;
Cigar c = aRead.getCigar();
for (int i = 0 ; i < c.numCigarElements() ; i++) {
CigarElement ce = c.getCigarElement(i);
switch ( ce.getOperator() ) {
case M:
for (int j = 0 ; j < ce.getLength() ; j++, refIndex++, readIndex++ ) {
// FIXME: what is this case????
if ( refIndex >= refSeq.length() ) {
sum += MAX_QUAL;
} else {
char readBase = Character.toUpperCase(readSeq.charAt(readIndex));
char refBase = Character.toUpperCase(refSeq.charAt(refIndex));
if ( readBase != refBase ) {
int qual = (int)quals.charAt(readIndex) - 33;
mismatches.add(new Mismatch(aRead, readIndex, refIndex, readBase, qual));
}
}
}
break;
case I:
readIndex += ce.getLength();
break;
case D:
refIndex += ce.getLength();
break;
}
}
return mismatches;
}
private Pair<Integer, Integer> findBestOffset(String ref, AlignedRead read) {
int attempts = ref.length() - read.getReadLength() + 1;
int bestScore = mismatchQualitySum(read, ref, 0);
int bestIndex = 0;
for ( int i = 1; i < attempts; i++ ) {
// we can't get better than 0!
if ( bestScore == 0 )
return new Pair<Integer, Integer>(bestIndex, 0);
int score = mismatchQualitySum(read, ref, i);
if ( score < bestScore ) {
bestScore = score;
bestIndex = i;
}
}
return new Pair<Integer, Integer>(bestIndex, bestScore);
}
private class AlignedRead {
SAMRecord read;
int mismatchScoreToReference;
public AlignedRead(SAMRecord read) {
this.read = read;
mismatchScoreToReference = 0;
}
public SAMRecord getRead() {
return read;
}
public String getReadString() {
return read.getReadString();
}
public int getReadLength() {
return read.getReadLength();
}
public Cigar getCigar() {
return read.getCigar();
}
public void setCigar(Cigar cigar) {
read.setCigar(cigar);
}
public String getBaseQualityString() {
return read.getBaseQualityString();
}
public void setMismatchScoreToReference(int score) {
mismatchScoreToReference = score;
}
public int getMismatchScoreToReference() {
return mismatchScoreToReference;
}
}
private class Consensus {
public String str;
public int mismatchSum;
public int positionOnReference;
public Cigar cigar;
public ArrayList<Pair<Integer, Integer>> readIndexes;
public Consensus(String str, Cigar cigar, int positionOnReference) {
this.str = str;
this.cigar = cigar;
this.positionOnReference = positionOnReference;
mismatchSum = 0;
readIndexes = new ArrayList<Pair<Integer, Integer>>();
}
}
public static String cigarToString(Cigar cig) {
StringBuilder b = new StringBuilder();
for ( int i = 0 ; i < cig.numCigarElements() ; i++ ) {
char c='?';
switch ( cig.getCigarElement(i).getOperator() ) {
case M : c = 'M'; break;
case D : c = 'D'; break;
case I : c = 'I'; break;
}
b.append(cig.getCigarElement(i).getLength());
b.append(c);
}
return b.toString();
}
}