Got Kris's permission to delete these walkers.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2200 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3180fffd43
commit
e581cceab6
|
|
@ -1,123 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class Pilot3CoverageWalker extends LocusWalker<Integer, Integer> {
|
||||
|
||||
@Argument(fullName = "extended", shortName="ext", doc="extended output", required=false)
|
||||
public boolean extendedOutput = false;
|
||||
|
||||
@Argument(fullName="min_mapq", shortName="mmq", required=false, doc="Minimum mapping quality of reads to consider") public Integer MIN_MAPQ = 1;
|
||||
|
||||
|
||||
public void initialize() {
|
||||
out.println("track type=wiggle_0 name=Pilot3Coverage viewLimits=0:1");
|
||||
}
|
||||
|
||||
public String walkerType() { return "ByLocus"; }
|
||||
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
return true; // We are keeping all the reads
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
|
||||
int MAPPING_QUALITY_THRESHOLD = 1;
|
||||
int totalSites;
|
||||
int tumorCovered;
|
||||
int normalCovered;
|
||||
int somaticCovered;
|
||||
long start = 0;
|
||||
|
||||
int lastContigIndex = -1;
|
||||
long lastPosition = -1;
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (start ==0) { start = System.currentTimeMillis(); }
|
||||
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
int totalDepth = 0;
|
||||
int positiveStrandDepth = 0;
|
||||
int negativeStrandDepth = 0;
|
||||
for ( int i = 0; i < reads.size(); i++ )
|
||||
{
|
||||
SAMRecord read = reads.get(i);
|
||||
|
||||
// TODO: could this be done better elsewhere?
|
||||
// only process primary, non duplicate alignments
|
||||
// that come from fully mapped pairs with a mappign quality threshold >= x
|
||||
if (read.getNotPrimaryAlignmentFlag() ||
|
||||
read.getDuplicateReadFlag() ||
|
||||
read.getReadUnmappedFlag() ||
|
||||
read.getMappingQuality() < MIN_MAPQ
|
||||
// ||
|
||||
// read.getMateUnmappedFlag() ||
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
|
||||
totalDepth++;
|
||||
if (read.getReadNegativeStrandFlag()) {
|
||||
negativeStrandDepth++;
|
||||
} else {
|
||||
positiveStrandDepth++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// if the contig index has changed OR if it's the same contig index but we jumped positions
|
||||
// output a wiggle header
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if (lastContigIndex != context.getLocation().getContigIndex() ||
|
||||
lastPosition + 1 != context.getPosition()) {
|
||||
lastContigIndex = context.getLocation().getContigIndex();
|
||||
sb.append("fixedStep").append(" ")
|
||||
.append("chrom=").append(context.getContig()).append(" ")
|
||||
.append("start=").append(context.getPosition()).append(" ")
|
||||
.append("step=1")
|
||||
.append("\n");
|
||||
}
|
||||
lastPosition = context.getPosition();
|
||||
|
||||
if (extendedOutput) {
|
||||
sb.append(context.getPosition()).append(" ");
|
||||
sb.append(totalDepth).append(" ");
|
||||
sb.append(positiveStrandDepth).append(" ");
|
||||
sb.append(negativeStrandDepth).append(" ");
|
||||
}
|
||||
|
||||
boolean siteCovered = (totalDepth >= 10 && positiveStrandDepth >= 2 && negativeStrandDepth >= 2);
|
||||
sb.append((siteCovered)?"1":"0");
|
||||
|
||||
out.println(sb.toString());
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Given result of map function
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Integer result) {
|
||||
// out.println(String.format("FINAL - %d %d %d %d", totalSites, tumorCovered, normalCovered, somaticCovered));
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,423 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import edu.mit.broad.picard.genotype.DiploidGenotype;
|
||||
import net.sf.picard.PicardException;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import java.util.Arrays;
|
||||
import java.util.Formatter;
|
||||
import java.util.List;
|
||||
|
||||
public class PopPriorWalker extends LocusWalker<Integer, Integer> {
|
||||
|
||||
@Argument(fullName = "quality_score_cutoff", doc="quality score cutoff", required = false) public Byte qualityScoreCutoff;
|
||||
|
||||
public PopPriorWalker() {
|
||||
}
|
||||
|
||||
public void initialize() {
|
||||
}
|
||||
|
||||
public String walkerType() { return "ByLocus"; }
|
||||
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
return true; // We are keeping all the reads
|
||||
}
|
||||
|
||||
protected class OldAndBustedGenotypeLikelihoods
|
||||
{
|
||||
|
||||
public double[] likelihoods;
|
||||
public String[] genotypes;
|
||||
|
||||
OldAndBustedGenotypeLikelihoods()
|
||||
{
|
||||
likelihoods = new double[10];
|
||||
genotypes = new String[10];
|
||||
|
||||
genotypes[0] = "AA";
|
||||
genotypes[1] = "AC";
|
||||
genotypes[2] = "AG";
|
||||
genotypes[3] = "AT";
|
||||
genotypes[4] = "CC";
|
||||
genotypes[5] = "CG";
|
||||
genotypes[6] = "CT";
|
||||
genotypes[7] = "GG";
|
||||
genotypes[8] = "GT";
|
||||
genotypes[9] = "TT";
|
||||
}
|
||||
|
||||
void add(char ref, char read, byte qual)
|
||||
{
|
||||
double p_error = Math.pow(10.0, (double)qual / -10);
|
||||
for (int i = 0; i < genotypes.length; i++)
|
||||
{
|
||||
likelihoods[i] += AlleleLikelihood(ref, read, genotypes[i], p_error);
|
||||
}
|
||||
}
|
||||
|
||||
double AlleleLikelihood(char ref, char read, String genotype, double p_error)
|
||||
{
|
||||
char h1 = genotype.charAt(0);
|
||||
char h2 = genotype.charAt(1);
|
||||
|
||||
double p_base;
|
||||
|
||||
if ((h1 == h2) && (h1 == read)) { p_base = Math.log10(1-p_error); }
|
||||
else if ((h1 != h2) && (h1 == read) || (h2 == read)) { p_base = Math.log10(0.5 - (p_error/2.0)); }
|
||||
else { p_base = Math.log10(p_error); }
|
||||
|
||||
return p_base;
|
||||
}
|
||||
|
||||
// TODO: horrible horrible idea... but it's ok for right now. should really
|
||||
// implement this more cleanly so you can get sorted genotypes without losing the
|
||||
// ability to call add
|
||||
public void doSort() {
|
||||
Integer[] permutation = Utils.SortPermutation(likelihoods);
|
||||
String[] sorted_genotypes = Utils.PermuteArray(genotypes, permutation);
|
||||
double[] sorted_likelihoods = Utils.PermuteArray(likelihoods, permutation);
|
||||
|
||||
this.genotypes = sorted_genotypes;
|
||||
this.likelihoods = sorted_likelihoods;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
char upRef = Character.toUpperCase(ref.getBase());
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
|
||||
// Look up hapmap and dbsnp priors
|
||||
String rodString = "";
|
||||
|
||||
rodDbSNP dbsnpInfo = null;
|
||||
HapMapAlleleFrequenciesROD hapmap = null;
|
||||
|
||||
for ( ReferenceOrderedDatum datum : tracker.getAllRods() )
|
||||
{
|
||||
if ( datum != null )
|
||||
{
|
||||
if ( datum instanceof rodDbSNP) {
|
||||
dbsnpInfo = (rodDbSNP)datum;
|
||||
|
||||
} else if (datum instanceof HapMapAlleleFrequenciesROD) {
|
||||
hapmap = (HapMapAlleleFrequenciesROD) datum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Accumulate genotype likelihoods
|
||||
int aCount = 0;
|
||||
int cCount = 0;
|
||||
int gCount = 0;
|
||||
int tCount = 0;
|
||||
OldAndBustedGenotypeLikelihoods glAll = new OldAndBustedGenotypeLikelihoods();
|
||||
OldAndBustedGenotypeLikelihoods glForward = new OldAndBustedGenotypeLikelihoods();
|
||||
OldAndBustedGenotypeLikelihoods glReverse = new OldAndBustedGenotypeLikelihoods();
|
||||
for ( int i = 0; i < reads.size(); i++ )
|
||||
{
|
||||
SAMRecord read = reads.get(i);
|
||||
|
||||
// TODO: could this be done better elsewhere?
|
||||
if (read.getNotPrimaryAlignmentFlag() || read.getDuplicateReadFlag() || read.getReadUnmappedFlag()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int offset = offsets.get(i);
|
||||
char base = (char)read.getReadBases()[offset];
|
||||
byte qual = read.getBaseQualities()[offset];
|
||||
|
||||
if (qual == 0) { continue; }
|
||||
|
||||
if (qualityScoreCutoff != null && qual < qualityScoreCutoff) { continue; }
|
||||
|
||||
if (base == 'A') { aCount++; }
|
||||
if (base == 'C') { cCount++; }
|
||||
if (base == 'G') { gCount++; }
|
||||
if (base == 'T') { tCount++; }
|
||||
|
||||
// System.out.println(read.getReadName() + " " + base + " " + qual );
|
||||
|
||||
glAll.add(ref.getBase(), base, qual);
|
||||
|
||||
if (read.getReadNegativeStrandFlag()) {
|
||||
glReverse.add(ref.getBase(), base, qual);
|
||||
} else {
|
||||
glForward.add(ref.getBase(), base, qual);
|
||||
}
|
||||
}
|
||||
|
||||
// apply priors
|
||||
String priorType = "NONE";
|
||||
|
||||
double[] priors;
|
||||
|
||||
// hapmap overrides dbsnp which overrides "novel"
|
||||
if (hapmap != null) {
|
||||
List<String> knownAlleles = hapmap.getAllelesFWD();
|
||||
priorType = "HAPMAP";
|
||||
rodString = "[HAPMAP" + hapmap.toSimpleString() + "]";
|
||||
|
||||
priors = getKnownSiteKnownFreqPriors(((byte)(upRef & 0xff)), knownAlleles, hapmap.getVarAlleleFreq());
|
||||
} else if (dbsnpInfo != null && dbsnpInfo.isSNP()) {
|
||||
List<String> knownAlleles = Arrays.asList(Utils.join("",dbsnpInfo.getAlleleList()));
|
||||
priorType = "DBSNP";
|
||||
rodString = "[DBSNP: " + dbsnpInfo.toMediumString() + "]";
|
||||
|
||||
priors = getKnownSitePriors(((byte)(upRef & 0xff)), knownAlleles);
|
||||
} else {
|
||||
priors = getNovelSitePriors(((byte)(upRef & 0xff)));
|
||||
}
|
||||
|
||||
|
||||
|
||||
StringBuilder priorString = new StringBuilder();
|
||||
priorString.append(priorType);
|
||||
for(int i=0; i<priors.length; i++) {
|
||||
|
||||
// apply the prior!!!
|
||||
double log10prior = log10(priors[i]);
|
||||
glAll.likelihoods[i] += log10prior;
|
||||
glForward.likelihoods[i] += log10prior;
|
||||
glReverse.likelihoods[i] += log10prior;
|
||||
|
||||
priorString.append(" ");
|
||||
priorString
|
||||
.append(glAll.genotypes[i])
|
||||
.append(":")
|
||||
.append(new Formatter().format("%4.2f",log10prior));
|
||||
|
||||
}
|
||||
|
||||
|
||||
// FIXME: horrible way to do this!!!!
|
||||
glAll.doSort();
|
||||
glForward.doSort();
|
||||
glReverse.doSort();
|
||||
|
||||
|
||||
String gt = glAll.genotypes[glAll.genotypes.length-1];
|
||||
double btnb = glAll.likelihoods[glAll.genotypes.length-1] - glAll.likelihoods[glAll.genotypes.length-2];
|
||||
double btr = 1.0d; // FIXME: do we need this?
|
||||
|
||||
// just to mimic old behavior
|
||||
String type = "homozygous";
|
||||
if (gt.charAt(0) != gt.charAt(1)) { type = "heterozygous-SNP"; }
|
||||
else if (gt.charAt(0) != upRef) { type = "homozygous-SNP"; }
|
||||
|
||||
|
||||
out.print(String.format("%s %d %s %s %4.6f %4.6f %s A:%d C:%d G:%d T:%d %d ",
|
||||
context.getContig(),
|
||||
context.getPosition(),
|
||||
upRef,
|
||||
gt,
|
||||
btnb,
|
||||
btr,
|
||||
type,
|
||||
aCount,
|
||||
cCount,
|
||||
gCount,
|
||||
tCount,
|
||||
aCount + cCount + gCount + tCount));
|
||||
|
||||
out.print(dumpTheories(glAll));
|
||||
|
||||
out.print(String.format("%s %s ",
|
||||
priorString,
|
||||
rodString));
|
||||
|
||||
out.print(dumpTheories(glForward));
|
||||
out.print(dumpTheories(glReverse));
|
||||
out.print("\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Given result of map function
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Integer result) {
|
||||
}
|
||||
|
||||
protected static final double NOVEL_HET_PRIOR = 2*1E-4;
|
||||
protected static final double NOVEL_OTHER_PRIOR = 1E-5;
|
||||
protected static final double NOVEL_REF_PRIOR = 1d - 4*NOVEL_HET_PRIOR - 6*NOVEL_OTHER_PRIOR;
|
||||
|
||||
protected static final double KNOWN_HET_PRIOR = .05;
|
||||
protected static final double KNOWN_HNREF_PRIOR = .05*.05;
|
||||
protected static final double KNOWN_OTHER_PRIOR = 1E-5;
|
||||
protected static final double KNOWN_REF_PRIOR = 1d - KNOWN_HET_PRIOR - KNOWN_HNREF_PRIOR - 8* KNOWN_OTHER_PRIOR;
|
||||
|
||||
protected static final double FREQ_OTHER_PRIOR = 1E-5;
|
||||
|
||||
public double[] getNovelSitePriors(final byte referenceBase) {
|
||||
//TODO: convert everything to use DiploidGenotype
|
||||
DiploidGenotype[] theories = DiploidGenotype.values();
|
||||
double[] priors = new double[theories.length];
|
||||
|
||||
DiploidGenotype refTheory = DiploidGenotype.fromBases(referenceBase, referenceBase);
|
||||
|
||||
// use novel variant prior
|
||||
// ref (e.g. AA) -> 1 - 4*(2*10^-4) - 6*(10^-5)
|
||||
// het (e.g. AC, AG, AT) -> 2*10^-4 (AC, AG, AT,
|
||||
// other (e.g. CC, CG, CT, GG, GT, TT) -> 10^-5
|
||||
int i=0;
|
||||
for (DiploidGenotype theory : theories) {
|
||||
double prior = 0;
|
||||
if (theory == refTheory) {
|
||||
prior = NOVEL_REF_PRIOR;
|
||||
} else if (theory.getAllele1() == referenceBase || theory.getAllele2() == referenceBase) {
|
||||
prior = NOVEL_HET_PRIOR;
|
||||
} else { // not reference, and not het with reference base
|
||||
prior = NOVEL_OTHER_PRIOR;
|
||||
}
|
||||
priors[i++] = prior;
|
||||
}
|
||||
|
||||
return priors;
|
||||
}
|
||||
|
||||
public double[] getKnownSitePriors(final byte referenceBase,
|
||||
final List<String> alleles) {
|
||||
|
||||
//TODO: convert everything to use DiploidGenotype
|
||||
DiploidGenotype[] theories = DiploidGenotype.values();
|
||||
double[] priors = new double[theories.length];
|
||||
|
||||
DiploidGenotype refTheory = DiploidGenotype.fromBases(referenceBase, referenceBase);
|
||||
|
||||
if (alleles != null && alleles.size() > 2 ) {
|
||||
// FIXME: handle multi-allele case!
|
||||
return getNovelSitePriors(referenceBase);
|
||||
}
|
||||
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if (alleles != null ) {
|
||||
for(String allele : alleles) { sb.append(allele); }
|
||||
}
|
||||
String allelesString = sb.toString();
|
||||
|
||||
|
||||
DiploidGenotype knownAlleles = DiploidGenotype.fromBases(allelesString.getBytes());
|
||||
byte altAllele = getAlternateAllele(knownAlleles, referenceBase);
|
||||
DiploidGenotype hnref = DiploidGenotype.fromBases(altAllele, altAllele);
|
||||
|
||||
|
||||
// do we not have a freq (in dbsnp, known alleles)
|
||||
// AA -
|
||||
// AG - .05
|
||||
// GG - .05^2
|
||||
// other - 10^-5
|
||||
int i=0;
|
||||
for (DiploidGenotype theory : theories) {
|
||||
double prior = 0;
|
||||
if (theory == refTheory) {
|
||||
prior = KNOWN_REF_PRIOR;
|
||||
} else if (theory == knownAlleles) {
|
||||
prior = KNOWN_HET_PRIOR;
|
||||
} else if (theory == hnref) {
|
||||
prior = KNOWN_HNREF_PRIOR;
|
||||
} else {
|
||||
prior = NOVEL_OTHER_PRIOR;
|
||||
}
|
||||
priors[i++] = prior;
|
||||
}
|
||||
|
||||
return priors;
|
||||
|
||||
}
|
||||
|
||||
public double[] getKnownSiteKnownFreqPriors(final byte referenceBase,
|
||||
final List<String> alleles,
|
||||
final Double altAllelefrequency) {
|
||||
|
||||
//TODO: convert everything to use DiploidGenotype
|
||||
DiploidGenotype[] theories = DiploidGenotype.values();
|
||||
double[] priors = new double[theories.length];
|
||||
|
||||
DiploidGenotype refTheory = DiploidGenotype.fromBases(referenceBase, referenceBase);
|
||||
|
||||
// if we're dealing with more than one allele here... make sure they all have no frequency data
|
||||
if (alleles != null && alleles.size() > 2 ) {
|
||||
throw new PicardException("Can't handle multiple known alleles with a single known frequency!");
|
||||
}
|
||||
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if (alleles != null ) {
|
||||
for(String allele : alleles) { sb.append(allele); }
|
||||
}
|
||||
String allelesString = sb.toString();
|
||||
|
||||
|
||||
// System.out.println("FREQ");
|
||||
// assert that we have just one set of frequencies...
|
||||
//FIXME: if (aaf.size() > 1) { throw new PicardException("Attempting to call with multiple allele frequencies!"); }
|
||||
DiploidGenotype knownAlleles = DiploidGenotype.fromBases(allelesString.getBytes());
|
||||
byte altAllele = getAlternateAllele(knownAlleles, referenceBase);
|
||||
DiploidGenotype hnref = DiploidGenotype.fromBases(altAllele, altAllele);
|
||||
|
||||
double q = altAllelefrequency;
|
||||
double p = 1d - q;
|
||||
|
||||
|
||||
// do we have a freq (in hapmap, known alleles)
|
||||
// AA -
|
||||
// AG -
|
||||
// GG -
|
||||
// other - 10^-5
|
||||
int i=0;
|
||||
for (DiploidGenotype theory : theories) {
|
||||
double prior = 0;
|
||||
if (theory == refTheory) {
|
||||
prior = p*p; //TODO: should this really sum to 1?
|
||||
} else if (theory == knownAlleles) {
|
||||
prior = 2*p*q;
|
||||
} else if (theory == hnref) {
|
||||
prior = q*q;
|
||||
} else {
|
||||
prior = FREQ_OTHER_PRIOR;
|
||||
}
|
||||
priors[i++] = prior;
|
||||
}
|
||||
|
||||
|
||||
return priors;
|
||||
}
|
||||
|
||||
protected byte getAlternateAllele(final DiploidGenotype gt, final byte refAllele) {
|
||||
return (gt.getAllele1() == refAllele)?gt.getAllele2():gt.getAllele1();
|
||||
}
|
||||
|
||||
protected String dumpTheories(OldAndBustedGenotypeLikelihoods gl) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = gl.genotypes.length-1; i >= 0; i--)
|
||||
{
|
||||
sb.append(String.format("%s:%4.2f ", gl.genotypes[i], gl.likelihoods[i]));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue