Directory to house variant calling and filtration tools.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1040 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5992d88409
commit
9a7cec7d2e
|
|
@ -0,0 +1,43 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class IVFBinomialStrand implements IndependentVariantFeature {
|
||||
public String getFeatureName() { return "binomial"; }
|
||||
|
||||
public double[] compute(char ref, LocusContext context) {
|
||||
double[] likelihoods = new double[10];
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
String bases = pileup.getBases();
|
||||
|
||||
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {
|
||||
Genotype genotype = Genotype.values()[genotypeIndex];
|
||||
|
||||
char[] alleles = { genotype.toString().charAt(0), genotype.toString().charAt(1) };
|
||||
int[] strandCounts = { 0, 0 };
|
||||
|
||||
for (int pileupIndex = 0; pileupIndex < bases.length(); pileupIndex++) {
|
||||
for (int alleleIndex = 0; alleleIndex < alleles.length; alleleIndex++) {
|
||||
if (bases.charAt(pileupIndex) == alleles[alleleIndex]) {
|
||||
if (reads.get(pileupIndex).getReadNegativeStrandFlag()) {
|
||||
strandCounts[1]++;
|
||||
} else {
|
||||
strandCounts[0]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
likelihoods[genotypeIndex] = Math.log10(MathUtils.binomialProbability(strandCounts[0], strandCounts[0] + strandCounts[1], 0.5));
|
||||
}
|
||||
|
||||
return likelihoods;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
|
||||
public class IVFNull implements IndependentVariantFeature {
|
||||
public String getFeatureName() { return "null"; }
|
||||
|
||||
public double[] compute(char ref, LocusContext context) {
|
||||
return new double[0];
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
|
||||
public interface IndependentVariantFeature {
|
||||
public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
||||
|
||||
public String getFeatureName();
|
||||
|
||||
public double[] compute(char ref, LocusContext context);
|
||||
}
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=rodVariants.class))
|
||||
public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||
@Argument(fullName="features", shortName="F", doc="Comma-separated list of feature tests to apply to genotype posteriors.") public String FEATURES;
|
||||
@Argument(fullName="variants_out", shortName="VO", doc="File to which modified variants should be written") public File VARIANTS_OUT;
|
||||
|
||||
private String[] features;
|
||||
private PrintWriter vwriter;
|
||||
|
||||
public void initialize() {
|
||||
features = FEATURES.split(",");
|
||||
|
||||
try {
|
||||
vwriter = new PrintWriter(VARIANTS_OUT);
|
||||
|
||||
vwriter.println(AlleleFrequencyEstimate.geliHeaderString());
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException(String.format("Could not open file '%s' for writing", VARIANTS_OUT.getAbsolutePath()));
|
||||
}
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
rodVariants variant = (rodVariants) tracker.lookup("variant", null);
|
||||
|
||||
for (String feature : features) {
|
||||
IndependentVariantFeature ivf;
|
||||
|
||||
if (feature.equalsIgnoreCase("binomial")) { ivf = new IVFBinomialStrand(); }
|
||||
else { throw new StingException(String.format("Cannot understand feature '%s'\n", feature)); }
|
||||
|
||||
variant.multiplyLikelihoods(ivf.compute(ref, context));
|
||||
vwriter.println(variant);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return sum + 1;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
out.printf("Processed %d loci.\n", result);
|
||||
|
||||
vwriter.close();
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue