small cleanups for the GATK paper genotyper; switched to the managed output system.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2156 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e1e5b35b19
commit
cfbd9332b0
|
|
@ -10,7 +10,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.PrintWriter;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -21,13 +21,14 @@ import java.io.File;
|
||||||
* A simple Bayesian genotyper, that output a text based call format. Intended to be used only as an
|
* A simple Bayesian genotyper, that output a text based call format. Intended to be used only as an
|
||||||
* example in the GATK publication.
|
* example in the GATK publication.
|
||||||
*/
|
*/
|
||||||
public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList> implements TreeReducible<SimpleCallList> {
|
public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> implements TreeReducible<Integer> {
|
||||||
|
|
||||||
// the possible diploid genotype strings
|
// the possible diploid genotype strings
|
||||||
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
||||||
|
|
||||||
|
// where to write the genotyping data to
|
||||||
@Argument(fullName = "call_location", shortName = "cl", doc = "File to which calls should be written", required = true)
|
@Argument(fullName = "call_location", shortName = "cl", doc = "File to which calls should be written", required = true)
|
||||||
private File LOCATION = new File("genotyping.output");
|
public PrintWriter outputStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* our map function, which takes the reads spanning this locus, any associated reference ordered data,
|
* our map function, which takes the reads spanning this locus, any associated reference ordered data,
|
||||||
|
|
@ -45,12 +46,13 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
|
||||||
double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
|
double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
|
||||||
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
|
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
|
||||||
DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
||||||
|
byte bases[] = pileup.getBases();
|
||||||
|
byte quals[] = pileup.getQuals();
|
||||||
for (GENOTYPE genotype : GENOTYPE.values())
|
for (GENOTYPE genotype : GENOTYPE.values())
|
||||||
for (int index = 0; index < pileup.getBases().length; index++) {
|
for (int index = 0; index < bases.length; index++) {
|
||||||
if (pileup.getQuals()[index] > 0) {
|
if (quals[index] > 0) {
|
||||||
double epsilon = Math.pow(10, pileup.getQuals()[index] / -10.0);
|
double epsilon = Math.pow(10, quals[index] / -10.0);
|
||||||
byte pileupBase = pileup.getBases()[index];
|
byte pileupBase = bases[index];
|
||||||
for (char genotypeBase : genotype.toString().toCharArray())
|
for (char genotypeBase : genotype.toString().toCharArray())
|
||||||
if (genotypeBase == pileupBase)
|
if (genotypeBase == pileupBase)
|
||||||
likelihoods[genotype.ordinal()] += Math.log10(0.5 * ((1 - epsilon) + epsilon / 3));
|
likelihoods[genotype.ordinal()] += Math.log10(0.5 * ((1 - epsilon) + epsilon / 3));
|
||||||
|
|
@ -76,8 +78,8 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
|
||||||
*
|
*
|
||||||
* @return Initial value of reduce.
|
* @return Initial value of reduce.
|
||||||
*/
|
*/
|
||||||
public SimpleCallList reduceInit() {
|
public Integer reduceInit() {
|
||||||
return new SimpleCallList(LOCATION);
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -87,9 +89,9 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
|
||||||
* @param sum accumulator for the reduce.
|
* @param sum accumulator for the reduce.
|
||||||
* @return accumulator with result of the map taken into account.
|
* @return accumulator with result of the map taken into account.
|
||||||
*/
|
*/
|
||||||
public SimpleCallList reduce(SimpleCall value, SimpleCallList sum) {
|
public Integer reduce(SimpleCall value, Integer sum) {
|
||||||
if (value != null) sum.add(value);
|
outputStream.println(value.toString());
|
||||||
return sum;
|
return sum + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -99,17 +101,16 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
|
||||||
* @param rhs 'right-most' portion of data in the composite reduce.
|
* @param rhs 'right-most' portion of data in the composite reduce.
|
||||||
* @return The composite reduce type.
|
* @return The composite reduce type.
|
||||||
*/
|
*/
|
||||||
public SimpleCallList treeReduce(SimpleCallList lhs, SimpleCallList rhs) {
|
public Integer treeReduce(Integer lhs, Integer rhs) {
|
||||||
lhs.addAll(rhs);
|
return lhs + rhs;
|
||||||
return lhs;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* when we finish traversing, close the result list
|
* when we finish traversing, close the result list
|
||||||
* @param result the final reduce result
|
* @param result the final reduce result
|
||||||
*/
|
*/
|
||||||
public void onTraversalDone(SimpleCallList result) {
|
public void onTraversalDone(Integer result) {
|
||||||
result.close();
|
out.println("Simple Genotyper genotyped " + result + "Loci.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
* Date: Nov 19, 2009
|
* Date: Nov 19, 2009
|
||||||
* Time: 2:07:25 AM
|
* Time: 2:07:25 AM
|
||||||
*
|
*
|
||||||
* This simple call class stores the data for the per-locus calls of the GATKPaperGenotyper.
|
* This is a simple call class that stores the data for the per-locus calls of the GATKPaperGenotyper.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
class SimpleCall {
|
class SimpleCall {
|
||||||
|
|
@ -23,6 +23,6 @@ class SimpleCall {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("Location %s : %s with LOD %.2f", loc, genotype, LOD);
|
return String.format("%s : %s with LOD %.4f", loc, genotype, LOD);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,68 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.papergenotyper;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.util.AbstractList;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collection;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: aaronmckenna
|
|
||||||
* Date: Nov 19, 2009
|
|
||||||
* Time: 12:50:20 AM
|
|
||||||
*
|
|
||||||
* A simple class, that dumps the records to disk when we've hit a threshold.
|
|
||||||
* This class makes the GATKPaperGenotyper much simpler to take in for the reader.
|
|
||||||
*/
|
|
||||||
class SimpleCallList extends AbstractList<SimpleCall> {
|
|
||||||
private File outputLocation;
|
|
||||||
private ArrayList<SimpleCall> list = new ArrayList<SimpleCall>();
|
|
||||||
private int WRITE_LIMIT = 100000;
|
|
||||||
public SimpleCallList(File writeTo) {
|
|
||||||
outputLocation = writeTo;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean add(SimpleCall call) {
|
|
||||||
boolean added = list.add(call);
|
|
||||||
writeIfNeeded();
|
|
||||||
return added;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean addAll(Collection<? extends SimpleCall> otherCalls) {
|
|
||||||
boolean added = list.addAll(otherCalls);
|
|
||||||
writeIfNeeded();
|
|
||||||
return added;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void writeIfNeeded() {
|
|
||||||
synchronized(list) {
|
|
||||||
if (list.size() > WRITE_LIMIT) {
|
|
||||||
try {
|
|
||||||
PrintWriter writer = new PrintWriter(new FileWriter(outputLocation, true));
|
|
||||||
for (SimpleCall call : list) writer.println(call.toString());
|
|
||||||
writer.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new StingException("Unable to write to file " + outputLocation);
|
|
||||||
}
|
|
||||||
list.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public int size() {
|
|
||||||
return list.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SimpleCall get(int index) {
|
|
||||||
return list.get(index);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() {
|
|
||||||
WRITE_LIMIT = 0;
|
|
||||||
writeIfNeeded();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Loading…
Reference in New Issue