basic step0 implementation
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@679 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
57ecb7fbf1
commit
8079acb1d3
|
|
@ -0,0 +1,257 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class SomaticMutationWalker extends LocusWalker<Integer, Integer> {
|
||||||
|
protected static class QualitySums {
|
||||||
|
private int a = 0;
|
||||||
|
private int c = 0;
|
||||||
|
private int g = 0;
|
||||||
|
private int t = 0;
|
||||||
|
|
||||||
|
public int get(final char base) {
|
||||||
|
if (base == 'a' || base == 'A') { return a; }
|
||||||
|
if (base == 'c' || base == 'C') { return c; }
|
||||||
|
if (base == 'g' || base == 'G') { return g; }
|
||||||
|
if (base == 't' || base == 'T') { return t; }
|
||||||
|
throw new RuntimeException("Unknown base: " + base);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrementSum(final char base, final byte qual) {
|
||||||
|
if (base == 'a' || base == 'A') { a += qual; }
|
||||||
|
else if (base == 'c' || base == 'C') { c += qual; }
|
||||||
|
else if (base == 'g' || base == 'G') { g += qual; }
|
||||||
|
else if (base == 't' || base == 'T') { t += qual; }
|
||||||
|
else throw new RuntimeException("Unknown base: " + base);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getOtherQualities(final char base) {
|
||||||
|
int total = a + c + g + t;
|
||||||
|
if (base == 'a' || base == 'A') { return total-a; }
|
||||||
|
else if (base == 'c' || base == 'C') { return total-c; }
|
||||||
|
else if (base == 'g' || base == 'G') { return total-g; }
|
||||||
|
else if (base == 't' || base == 'T') { return total-t; }
|
||||||
|
else throw new RuntimeException("Unknown base: " + base);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void reset() {
|
||||||
|
a = 0; c = 0; g = 0; t = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Argument(fullName = "tumor_sample_name", shortName = "s1", required = true)
|
||||||
|
public String tumorSampleName;
|
||||||
|
|
||||||
|
@Argument(fullName = "normal_sample_name", shortName = "s2", required = true)
|
||||||
|
public String normalSampleName;
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public String walkerType() { return "ByLocus"; }
|
||||||
|
|
||||||
|
// Do we actually want to operate on the context?
|
||||||
|
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
|
return true; // We are keeping all the reads
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int MAX_INSERT_SIZE = 10000;
|
||||||
|
public static int MIN_MUTANT_SUM_PRETEST = 60;
|
||||||
|
public static int MIN_MUTANT_SUM = 100;
|
||||||
|
|
||||||
|
public static double TUMOR_LOD = 6.3d;
|
||||||
|
// public static double TUMOR_LOD = 1.0d;
|
||||||
|
public static double NORMAL_LOD = 2.3d;
|
||||||
|
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
|
Map<Character, Integer> midp = new HashMap<Character, Integer>();
|
||||||
|
midp.put('A', Integer.MAX_VALUE);
|
||||||
|
midp.put('C', Integer.MAX_VALUE);
|
||||||
|
midp.put('G', Integer.MAX_VALUE);
|
||||||
|
midp.put('T', Integer.MAX_VALUE);
|
||||||
|
|
||||||
|
// First, exclude DBSNP Sites
|
||||||
|
for ( ReferenceOrderedDatum datum : tracker.getAllRods() )
|
||||||
|
{
|
||||||
|
if ( datum != null )
|
||||||
|
{
|
||||||
|
if ( datum instanceof rodDbSNP) {
|
||||||
|
// we're at a dbsnp site... move along... nothing to see here...
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
char upRef = Character.toUpperCase(ref);
|
||||||
|
|
||||||
|
// TODO: should we be able to call mutations at bases where ref is N?
|
||||||
|
if (upRef == 'N') { return -1; }
|
||||||
|
|
||||||
|
List<SAMRecord> reads = context.getReads();
|
||||||
|
|
||||||
|
GenotypeLikelihoods tumorGL = new GenotypeLikelihoods();
|
||||||
|
GenotypeLikelihoods normalGL = new GenotypeLikelihoods();
|
||||||
|
|
||||||
|
QualitySums tumorQualitySums = new QualitySums();
|
||||||
|
QualitySums normalQualitySums = new QualitySums();
|
||||||
|
|
||||||
|
StringBuilder tumorBases = new StringBuilder();
|
||||||
|
StringBuilder normalBases = new StringBuilder();
|
||||||
|
for ( int i = 0; i < reads.size(); i++ )
|
||||||
|
{
|
||||||
|
SAMRecord read = reads.get(i);
|
||||||
|
|
||||||
|
if (read.getNotPrimaryAlignmentFlag() ||
|
||||||
|
read.getDuplicateReadFlag() ||
|
||||||
|
read.getReadUnmappedFlag() ||
|
||||||
|
read.getMappingQuality() <= 0
|
||||||
|
|
||||||
|
// || (read.getReadPairedFlag() && (!read.getProperPairFlag() || read.getInferredInsertSize() >= MAX_INSERT_SIZE))
|
||||||
|
) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
String rg = (String) read.getAttribute("RG");
|
||||||
|
String sample = read.getHeader().getReadGroup(rg).getSample();
|
||||||
|
|
||||||
|
int offset = context.getOffsets().get(i);
|
||||||
|
char base = read.getReadString().charAt(offset);
|
||||||
|
byte qual = read.getBaseQualities()[offset];
|
||||||
|
|
||||||
|
|
||||||
|
if (base == 'N' || base == 'n') { continue; }
|
||||||
|
|
||||||
|
if (normalSampleName.equals(sample)) {
|
||||||
|
normalGL.add(ref, base, qual);
|
||||||
|
normalQualitySums.incrementSum(base, qual);
|
||||||
|
normalBases.append(base);
|
||||||
|
} else if (tumorSampleName.equals(sample)) {
|
||||||
|
tumorGL.add(ref, base, qual);
|
||||||
|
tumorQualitySums.incrementSum(base, qual);
|
||||||
|
tumorBases.append(base);
|
||||||
|
|
||||||
|
int midDist = Math.abs((int)(read.getReadLength() / 2) - offset);
|
||||||
|
if (midDist < midp.get(base)) { midp.put(base, midDist); }
|
||||||
|
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("Unknown Sample Name: " + sample);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// pretest: if the sum of the quality scores for all non-ref alleles < 60, just quit looking now
|
||||||
|
if (tumorQualitySums.getOtherQualities(ref) < MIN_MUTANT_SUM_PRETEST) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Test each of the poosible alternate alleles
|
||||||
|
|
||||||
|
for (char altAllele : new char[]{'A','C','G','T'}) {
|
||||||
|
if (altAllele == upRef) { continue; }
|
||||||
|
|
||||||
|
|
||||||
|
double[] tumorRefAlt = extractRefAlt(tumorGL, ref, altAllele);
|
||||||
|
double tumorLod = Math.log10(tumorRefAlt[1] + tumorRefAlt[2]) - tumorRefAlt[0];
|
||||||
|
|
||||||
|
// (i) either an adjusted quality score sum in the tumor for the mutant base must be
|
||||||
|
// at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must
|
||||||
|
// be at least 6.3;
|
||||||
|
if (tumorQualitySums.get(altAllele) < MIN_MUTANT_SUM && tumorLod < TUMOR_LOD) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// make sure we've seen at least 1 obs of the alternate allele within 20bp of the read-middle
|
||||||
|
if (midp.get(altAllele) > 20) {
|
||||||
|
out.println("Rejecting due to midpoint check!");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
double[] refAlt = extractRefAlt(normalGL, ref, altAllele);
|
||||||
|
double normalLod = refAlt[0] - Math.log10(refAlt[1] + refAlt[2]);
|
||||||
|
|
||||||
|
// (ii) the quality score sum for the mutant base in the normal must be < 50 and the
|
||||||
|
// LOD score for ref:ref vs mutant:ref + mutant:mutant must be at least 2.3.
|
||||||
|
if ( normalQualitySums.get(altAllele) > 50 || normalLod < NORMAL_LOD ) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if we're still here... we've got a somatic mutation! Output the results
|
||||||
|
// and stop looking for mutants!
|
||||||
|
out.println(context.getLocation() + " " + upRef + " " + altAllele +
|
||||||
|
" TScore:" + tumorLod +
|
||||||
|
" TRefSum: " + tumorQualitySums.get(ref) +
|
||||||
|
" TAltSum: " + tumorQualitySums.get(altAllele) +
|
||||||
|
" NScore:" + normalLod +
|
||||||
|
" NRefSum: " + normalQualitySums.get(ref) +
|
||||||
|
" NAltSum: " + normalQualitySums.get(altAllele) + " " +
|
||||||
|
tumorBases.toString() + " " +
|
||||||
|
normalBases.toString()
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract the LOD comparing ref:ref to ref:alt and alt:alt
|
||||||
|
*/
|
||||||
|
private double[] extractRefAlt(GenotypeLikelihoods gl, char ref, char altAllele) {
|
||||||
|
double refRef = 0;
|
||||||
|
double altRef = 0;
|
||||||
|
double altAlt = 0;
|
||||||
|
for(int j=0; j<10; j++) {
|
||||||
|
String gt = gl.genotypes[j];
|
||||||
|
double likelihood = gl.likelihoods[j];
|
||||||
|
|
||||||
|
// the ref:mutant theory
|
||||||
|
if ( (gt.charAt(0) == ref && gt.charAt(1) == altAllele) ||
|
||||||
|
(gt.charAt(0) == altAllele && gt.charAt(1) == ref) ) {
|
||||||
|
altRef += Math.pow(10, likelihood);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( gt.charAt(0) == altAllele && gt.charAt(1) == altAllele) {
|
||||||
|
altAlt += Math.pow(10, likelihood);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( gt.charAt(0) == ref && gt.charAt(1) == ref) {
|
||||||
|
refRef = likelihood;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return new double[]{refRef, altRef, altAlt};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given result of map function
|
||||||
|
public Integer reduceInit() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
return value + sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTraversalDone(Integer result) {
|
||||||
|
// out.println(String.format("FINAL - %d %d %d %d", totalSites, tumorCovered, normalCovered, somaticCovered));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue