diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Pilot3CoverageWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Pilot3CoverageWalker.java deleted file mode 100644 index dc4ab17c9..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Pilot3CoverageWalker.java +++ /dev/null @@ -1,123 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import net.sf.samtools.SAMRecord; - -import java.util.List; - -public class Pilot3CoverageWalker extends LocusWalker { - - @Argument(fullName = "extended", shortName="ext", doc="extended output", required=false) - public boolean extendedOutput = false; - - @Argument(fullName="min_mapq", shortName="mmq", required=false, doc="Minimum mapping quality of reads to consider") public Integer MIN_MAPQ = 1; - - - public void initialize() { - out.println("track type=wiggle_0 name=Pilot3Coverage viewLimits=0:1"); - } - - public String walkerType() { return "ByLocus"; } - - // Do we actually want to operate on the context? - public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) { - return true; // We are keeping all the reads - } - - - - // Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext - int MAPPING_QUALITY_THRESHOLD = 1; - int totalSites; - int tumorCovered; - int normalCovered; - int somaticCovered; - long start = 0; - - int lastContigIndex = -1; - long lastPosition = -1; - - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (start ==0) { start = System.currentTimeMillis(); } - - List reads = context.getReads(); - int totalDepth = 0; - int positiveStrandDepth = 0; - int negativeStrandDepth = 0; - for ( int i = 0; i < reads.size(); i++ ) - { - SAMRecord read = reads.get(i); - - // TODO: could this be done better elsewhere? - // only process primary, non duplicate alignments - // that come from fully mapped pairs with a mappign quality threshold >= x - if (read.getNotPrimaryAlignmentFlag() || - read.getDuplicateReadFlag() || - read.getReadUnmappedFlag() || - read.getMappingQuality() < MIN_MAPQ -// || -// read.getMateUnmappedFlag() || - ) { - continue; - } - - - - totalDepth++; - if (read.getReadNegativeStrandFlag()) { - negativeStrandDepth++; - } else { - positiveStrandDepth++; - } - - } - - - // if the contig index has changed OR if it's the same contig index but we jumped positions - // output a wiggle header - StringBuilder sb = new StringBuilder(); - if (lastContigIndex != context.getLocation().getContigIndex() || - lastPosition + 1 != context.getPosition()) { - lastContigIndex = context.getLocation().getContigIndex(); - sb.append("fixedStep").append(" ") - .append("chrom=").append(context.getContig()).append(" ") - .append("start=").append(context.getPosition()).append(" ") - .append("step=1") - .append("\n"); - } - lastPosition = context.getPosition(); - - if (extendedOutput) { - sb.append(context.getPosition()).append(" "); - sb.append(totalDepth).append(" "); - sb.append(positiveStrandDepth).append(" "); - sb.append(negativeStrandDepth).append(" "); - } - - boolean siteCovered = (totalDepth >= 10 && positiveStrandDepth >= 2 && negativeStrandDepth >= 2); - sb.append((siteCovered)?"1":"0"); - - out.println(sb.toString()); - return 1; - } - - // Given result of map function - public Integer reduceInit() { - return 0; - } - public Integer reduce(Integer value, Integer sum) { - return value + sum; - } - - @Override - public void onTraversalDone(Integer result) { -// out.println(String.format("FINAL - %d %d %d %d", totalSites, tumorCovered, normalCovered, somaticCovered)); - } - - - -} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/PopPriorWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/PopPriorWalker.java deleted file mode 100644 index 1c25a952a..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/PopPriorWalker.java +++ /dev/null @@ -1,423 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers; - -import edu.mit.broad.picard.genotype.DiploidGenotype; -import net.sf.picard.PicardException; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.cmdLine.Argument; - -import static java.lang.Math.log10; -import java.util.Arrays; -import java.util.Formatter; -import java.util.List; - -public class PopPriorWalker extends LocusWalker { - - @Argument(fullName = "quality_score_cutoff", doc="quality score cutoff", required = false) public Byte qualityScoreCutoff; - - public PopPriorWalker() { - } - - public void initialize() { - } - - public String walkerType() { return "ByLocus"; } - - // Do we actually want to operate on the context? - public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) { - return true; // We are keeping all the reads - } - - protected class OldAndBustedGenotypeLikelihoods - { - - public double[] likelihoods; - public String[] genotypes; - - OldAndBustedGenotypeLikelihoods() - { - likelihoods = new double[10]; - genotypes = new String[10]; - - genotypes[0] = "AA"; - genotypes[1] = "AC"; - genotypes[2] = "AG"; - genotypes[3] = "AT"; - genotypes[4] = "CC"; - genotypes[5] = "CG"; - genotypes[6] = "CT"; - genotypes[7] = "GG"; - genotypes[8] = "GT"; - genotypes[9] = "TT"; - } - - void add(char ref, char read, byte qual) - { - double p_error = Math.pow(10.0, (double)qual / -10); - for (int i = 0; i < genotypes.length; i++) - { - likelihoods[i] += AlleleLikelihood(ref, read, genotypes[i], p_error); - } - } - - double AlleleLikelihood(char ref, char read, String genotype, double p_error) - { - char h1 = genotype.charAt(0); - char h2 = genotype.charAt(1); - - double p_base; - - if ((h1 == h2) && (h1 == read)) { p_base = Math.log10(1-p_error); } - else if ((h1 != h2) && (h1 == read) || (h2 == read)) { p_base = Math.log10(0.5 - (p_error/2.0)); } - else { p_base = Math.log10(p_error); } - - return p_base; - } - - // TODO: horrible horrible idea... but it's ok for right now. should really - // implement this more cleanly so you can get sorted genotypes without losing the - // ability to call add - public void doSort() { - Integer[] permutation = Utils.SortPermutation(likelihoods); - String[] sorted_genotypes = Utils.PermuteArray(genotypes, permutation); - double[] sorted_likelihoods = Utils.PermuteArray(likelihoods, permutation); - - this.genotypes = sorted_genotypes; - this.likelihoods = sorted_likelihoods; - } - - } - - // Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - char upRef = Character.toUpperCase(ref.getBase()); - List reads = context.getReads(); - List offsets = context.getOffsets(); - - // Look up hapmap and dbsnp priors - String rodString = ""; - - rodDbSNP dbsnpInfo = null; - HapMapAlleleFrequenciesROD hapmap = null; - - for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) - { - if ( datum != null ) - { - if ( datum instanceof rodDbSNP) { - dbsnpInfo = (rodDbSNP)datum; - - } else if (datum instanceof HapMapAlleleFrequenciesROD) { - hapmap = (HapMapAlleleFrequenciesROD) datum; - } - } - } - - // Accumulate genotype likelihoods - int aCount = 0; - int cCount = 0; - int gCount = 0; - int tCount = 0; - OldAndBustedGenotypeLikelihoods glAll = new OldAndBustedGenotypeLikelihoods(); - OldAndBustedGenotypeLikelihoods glForward = new OldAndBustedGenotypeLikelihoods(); - OldAndBustedGenotypeLikelihoods glReverse = new OldAndBustedGenotypeLikelihoods(); - for ( int i = 0; i < reads.size(); i++ ) - { - SAMRecord read = reads.get(i); - - // TODO: could this be done better elsewhere? - if (read.getNotPrimaryAlignmentFlag() || read.getDuplicateReadFlag() || read.getReadUnmappedFlag()) { - continue; - } - - int offset = offsets.get(i); - char base = (char)read.getReadBases()[offset]; - byte qual = read.getBaseQualities()[offset]; - - if (qual == 0) { continue; } - - if (qualityScoreCutoff != null && qual < qualityScoreCutoff) { continue; } - - if (base == 'A') { aCount++; } - if (base == 'C') { cCount++; } - if (base == 'G') { gCount++; } - if (base == 'T') { tCount++; } - - // System.out.println(read.getReadName() + " " + base + " " + qual ); - - glAll.add(ref.getBase(), base, qual); - - if (read.getReadNegativeStrandFlag()) { - glReverse.add(ref.getBase(), base, qual); - } else { - glForward.add(ref.getBase(), base, qual); - } - } - - // apply priors - String priorType = "NONE"; - - double[] priors; - - // hapmap overrides dbsnp which overrides "novel" - if (hapmap != null) { - List knownAlleles = hapmap.getAllelesFWD(); - priorType = "HAPMAP"; - rodString = "[HAPMAP" + hapmap.toSimpleString() + "]"; - - priors = getKnownSiteKnownFreqPriors(((byte)(upRef & 0xff)), knownAlleles, hapmap.getVarAlleleFreq()); - } else if (dbsnpInfo != null && dbsnpInfo.isSNP()) { - List knownAlleles = Arrays.asList(Utils.join("",dbsnpInfo.getAlleleList())); - priorType = "DBSNP"; - rodString = "[DBSNP: " + dbsnpInfo.toMediumString() + "]"; - - priors = getKnownSitePriors(((byte)(upRef & 0xff)), knownAlleles); - } else { - priors = getNovelSitePriors(((byte)(upRef & 0xff))); - } - - - - StringBuilder priorString = new StringBuilder(); - priorString.append(priorType); - for(int i=0; i 1 - 4*(2*10^-4) - 6*(10^-5) - // het (e.g. AC, AG, AT) -> 2*10^-4 (AC, AG, AT, - // other (e.g. CC, CG, CT, GG, GT, TT) -> 10^-5 - int i=0; - for (DiploidGenotype theory : theories) { - double prior = 0; - if (theory == refTheory) { - prior = NOVEL_REF_PRIOR; - } else if (theory.getAllele1() == referenceBase || theory.getAllele2() == referenceBase) { - prior = NOVEL_HET_PRIOR; - } else { // not reference, and not het with reference base - prior = NOVEL_OTHER_PRIOR; - } - priors[i++] = prior; - } - - return priors; - } - - public double[] getKnownSitePriors(final byte referenceBase, - final List alleles) { - - //TODO: convert everything to use DiploidGenotype - DiploidGenotype[] theories = DiploidGenotype.values(); - double[] priors = new double[theories.length]; - - DiploidGenotype refTheory = DiploidGenotype.fromBases(referenceBase, referenceBase); - - if (alleles != null && alleles.size() > 2 ) { - // FIXME: handle multi-allele case! - return getNovelSitePriors(referenceBase); - } - - - StringBuilder sb = new StringBuilder(); - if (alleles != null ) { - for(String allele : alleles) { sb.append(allele); } - } - String allelesString = sb.toString(); - - - DiploidGenotype knownAlleles = DiploidGenotype.fromBases(allelesString.getBytes()); - byte altAllele = getAlternateAllele(knownAlleles, referenceBase); - DiploidGenotype hnref = DiploidGenotype.fromBases(altAllele, altAllele); - - - // do we not have a freq (in dbsnp, known alleles) - // AA - - // AG - .05 - // GG - .05^2 - // other - 10^-5 - int i=0; - for (DiploidGenotype theory : theories) { - double prior = 0; - if (theory == refTheory) { - prior = KNOWN_REF_PRIOR; - } else if (theory == knownAlleles) { - prior = KNOWN_HET_PRIOR; - } else if (theory == hnref) { - prior = KNOWN_HNREF_PRIOR; - } else { - prior = NOVEL_OTHER_PRIOR; - } - priors[i++] = prior; - } - - return priors; - - } - - public double[] getKnownSiteKnownFreqPriors(final byte referenceBase, - final List alleles, - final Double altAllelefrequency) { - - //TODO: convert everything to use DiploidGenotype - DiploidGenotype[] theories = DiploidGenotype.values(); - double[] priors = new double[theories.length]; - - DiploidGenotype refTheory = DiploidGenotype.fromBases(referenceBase, referenceBase); - - // if we're dealing with more than one allele here... make sure they all have no frequency data - if (alleles != null && alleles.size() > 2 ) { - throw new PicardException("Can't handle multiple known alleles with a single known frequency!"); - } - - - StringBuilder sb = new StringBuilder(); - if (alleles != null ) { - for(String allele : alleles) { sb.append(allele); } - } - String allelesString = sb.toString(); - - -// System.out.println("FREQ"); - // assert that we have just one set of frequencies... -//FIXME: if (aaf.size() > 1) { throw new PicardException("Attempting to call with multiple allele frequencies!"); } - DiploidGenotype knownAlleles = DiploidGenotype.fromBases(allelesString.getBytes()); - byte altAllele = getAlternateAllele(knownAlleles, referenceBase); - DiploidGenotype hnref = DiploidGenotype.fromBases(altAllele, altAllele); - - double q = altAllelefrequency; - double p = 1d - q; - - - // do we have a freq (in hapmap, known alleles) - // AA - - // AG - - // GG - - // other - 10^-5 - int i=0; - for (DiploidGenotype theory : theories) { - double prior = 0; - if (theory == refTheory) { - prior = p*p; //TODO: should this really sum to 1? - } else if (theory == knownAlleles) { - prior = 2*p*q; - } else if (theory == hnref) { - prior = q*q; - } else { - prior = FREQ_OTHER_PRIOR; - } - priors[i++] = prior; - } - - - return priors; - } - - protected byte getAlternateAllele(final DiploidGenotype gt, final byte refAllele) { - return (gt.getAllele1() == refAllele)?gt.getAllele2():gt.getAllele1(); - } - - protected String dumpTheories(OldAndBustedGenotypeLikelihoods gl) { - StringBuilder sb = new StringBuilder(); - for (int i = gl.genotypes.length-1; i >= 0; i--) - { - sb.append(String.format("%s:%4.2f ", gl.genotypes[i], gl.likelihoods[i])); - } - return sb.toString(); - } - - -}