Fix merge issues when incorporating new AF calculations changes
This commit is contained in:
commit
343a061b1c
2
LICENSE
2
LICENSE
|
|
@ -1,4 +1,4 @@
|
|||
Copyright (c) 2011 The Broad Institute
|
||||
Copyright (c) 2012 The Broad Institute
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
||||
|
|
|
|||
|
|
@ -26,14 +26,12 @@ package org.broadinstitute.sting.gatk.report;
|
|||
|
||||
import org.apache.commons.lang.math.NumberUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.TreeMap;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Holds values for a column in a GATK report table
|
||||
*/
|
||||
public class GATKReportColumn extends TreeMap<Object, Object> {
|
||||
public class GATKReportColumn extends LinkedHashMap<Object, Object> {
|
||||
final private String columnName;
|
||||
final private Object defaultValue;
|
||||
final private String format;
|
||||
|
|
|
|||
|
|
@ -756,10 +756,6 @@ public class GATKReportTable {
|
|||
*/
|
||||
|
||||
// Get the column widths for everything
|
||||
HashMap<String, GATKReportColumnFormat> columnFormats = new HashMap<String, GATKReportColumnFormat>();
|
||||
for (String columnName : columns.keySet()) {
|
||||
columnFormats.put(columnName, columns.get(columnName).getColumnFormat());
|
||||
}
|
||||
String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s";
|
||||
|
||||
// Emit the table definition
|
||||
|
|
@ -787,7 +783,7 @@ public class GATKReportTable {
|
|||
if (needsPadding) {
|
||||
out.printf(" ");
|
||||
}
|
||||
out.printf(columnFormats.get(columnName).getNameFormat(), columnName);
|
||||
out.printf(columns.get(columnName).getColumnFormat().getNameFormat(), columnName);
|
||||
|
||||
needsPadding = true;
|
||||
}
|
||||
|
|
@ -796,29 +792,31 @@ public class GATKReportTable {
|
|||
out.printf("%n");
|
||||
|
||||
// Emit the table body
|
||||
for (Object primaryKey : primaryKeyColumn) {
|
||||
for (final Object primaryKey : primaryKeyColumn) {
|
||||
needsPadding = false;
|
||||
if (primaryKeyDisplay) {
|
||||
out.printf(primaryKeyFormat, primaryKey);
|
||||
needsPadding = true;
|
||||
}
|
||||
|
||||
for (String columnName : columns.keySet()) {
|
||||
if (columns.get(columnName).isDisplayable()) {
|
||||
for (final Map.Entry<String, GATKReportColumn> entry : columns.entrySet()) {
|
||||
final GATKReportColumn column = entry.getValue();
|
||||
if (column.isDisplayable()) {
|
||||
if (needsPadding) {
|
||||
out.printf(" ");
|
||||
out.print(" ");
|
||||
}
|
||||
String value = columns.get(columnName).getStringValue(primaryKey);
|
||||
out.printf(columnFormats.get(columnName).getValueFormat(), value);
|
||||
|
||||
final String value = column.getStringValue(primaryKey);
|
||||
out.printf(column.getColumnFormat().getValueFormat(), value);
|
||||
|
||||
needsPadding = true;
|
||||
}
|
||||
}
|
||||
|
||||
out.printf("%n");
|
||||
out.println();
|
||||
}
|
||||
|
||||
out.printf("%n");
|
||||
out.println();
|
||||
}
|
||||
|
||||
public int getNumRows() {
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
|
|
@ -68,6 +69,9 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
@Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events")
|
||||
public boolean SHOW_INDEL_PILEUPS = false;
|
||||
|
||||
@Argument(fullName="showVerbose",shortName="verbose",doc="Add an extra verbose section to the pileup output")
|
||||
public boolean SHOW_VERBOSE = false;
|
||||
|
||||
@Input(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false)
|
||||
public List<RodBinding<Feature>> rods = Collections.emptyList();
|
||||
|
||||
|
|
@ -82,7 +86,10 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
|
||||
if ( context.hasBasePileup() ) {
|
||||
ReadBackedPileup basePileup = context.getBasePileup();
|
||||
out.printf("%s %s%n", basePileup.getPileupString(ref.getBaseAsChar()), rods);
|
||||
out.printf("%s %s", basePileup.getPileupString((char)ref.getBase()), rods);
|
||||
if ( SHOW_VERBOSE )
|
||||
out.printf(" %s", createVerboseOutput(basePileup));
|
||||
out.println();
|
||||
}
|
||||
|
||||
if ( context.hasExtendedEventPileup() ) {
|
||||
|
|
@ -125,6 +132,24 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
|
||||
return rodString;
|
||||
}
|
||||
|
||||
private static String createVerboseOutput(final ReadBackedPileup pileup) {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
boolean isFirst = true;
|
||||
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( isFirst )
|
||||
isFirst = false;
|
||||
else
|
||||
sb.append(",");
|
||||
sb.append(p.getRead().getReadName());
|
||||
sb.append(":");
|
||||
sb.append(p.getOffset());
|
||||
sb.append(":");
|
||||
sb.append(p.getRead().getReadLength());
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Integer result) {
|
||||
|
|
|
|||
|
|
@ -91,6 +91,25 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
|
|||
this.cycle = cycle;
|
||||
}
|
||||
|
||||
// Must overload hashCode and equals to properly work with GATKReportColumn
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return readGroup.hashCode() + 33 * cycle;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
final TableKey oKey = (TableKey) o;
|
||||
|
||||
if ( cycle != oKey.cycle ) return false;
|
||||
if ( !readGroup.equals(oKey.readGroup) ) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final TableKey tableKey) {
|
||||
final int scmp = readGroup.compareTo(tableKey.readGroup);
|
||||
|
|
|
|||
|
|
@ -70,6 +70,6 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
|||
* @return the alleles used for genotyping
|
||||
*/
|
||||
protected abstract List<Allele> getLog10PNonRef(final VariantContext vc,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result);
|
||||
}
|
||||
|
|
@ -25,6 +25,11 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: ebanks
|
||||
|
|
@ -34,23 +39,54 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
*/
|
||||
public class AlleleFrequencyCalculationResult {
|
||||
|
||||
// IMPORTANT NOTE:
|
||||
// These 2 arrays are intended to contain the likelihoods/posterior probabilities for each alternate allele over each possible frequency (from 0 to 2N).
|
||||
// For any given alternate allele and frequency, the likelihoods are marginalized over values for all other alternate alleles. What this means is that
|
||||
// the likelihoods at cell index zero (AF=0) in the array is actually that of the site's being polymorphic (because although this alternate allele may
|
||||
// be at AF=0, it is marginalized over all other alternate alleles which are not necessarily at AF=0).
|
||||
// In the bi-allelic case (where there are no other alternate alleles over which to marginalize),
|
||||
// the value at cell index zero will be equal to AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED.
|
||||
final double[][] log10AlleleFrequencyLikelihoods;
|
||||
final double[][] log10AlleleFrequencyPosteriors;
|
||||
// These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles
|
||||
private double log10MLE;
|
||||
private double log10MAP;
|
||||
final private int[] alleleCountsOfMLE;
|
||||
final private int[] alleleCountsOfMAP;
|
||||
|
||||
// These 2 variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
|
||||
double log10LikelihoodOfAFzero = 0.0;
|
||||
double log10PosteriorOfAFzero = 0.0;
|
||||
// The posteriors seen, not including that of AF=0
|
||||
// TODO -- better implementation needed here (see below)
|
||||
private ArrayList<Double> log10PosteriorMatrixValues = new ArrayList<Double>(100000);
|
||||
private Double log10PosteriorMatrixSum = null;
|
||||
|
||||
public AlleleFrequencyCalculationResult(int maxAltAlleles, int numChr) {
|
||||
log10AlleleFrequencyLikelihoods = new double[maxAltAlleles][numChr+1];
|
||||
log10AlleleFrequencyPosteriors = new double[maxAltAlleles][numChr+1];
|
||||
// These variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
|
||||
private double log10LikelihoodOfAFzero;
|
||||
private double log10PosteriorOfAFzero;
|
||||
|
||||
|
||||
public AlleleFrequencyCalculationResult(final int maxAltAlleles) {
|
||||
alleleCountsOfMLE = new int[maxAltAlleles];
|
||||
alleleCountsOfMAP = new int[maxAltAlleles];
|
||||
reset();
|
||||
}
|
||||
|
||||
public double getLog10MLE() {
|
||||
return log10MLE;
|
||||
}
|
||||
|
||||
public double getLog10MAP() {
|
||||
return log10MAP;
|
||||
}
|
||||
|
||||
public double getLog10PosteriorMatrixSum() {
|
||||
if ( log10PosteriorMatrixSum == null ) {
|
||||
// TODO -- we absolutely need a better implementation here as we don't want to store all values from the matrix in memory;
|
||||
// TODO -- will discuss with the team what the best option is
|
||||
final double[] tmp = new double[log10PosteriorMatrixValues.size()];
|
||||
for ( int i = 0; i < tmp.length; i++ )
|
||||
tmp[i] = log10PosteriorMatrixValues.get(i);
|
||||
log10PosteriorMatrixSum = MathUtils.log10sumLog10(tmp);
|
||||
}
|
||||
return log10PosteriorMatrixSum;
|
||||
}
|
||||
|
||||
public int[] getAlleleCountsOfMLE() {
|
||||
return alleleCountsOfMLE;
|
||||
}
|
||||
|
||||
public int[] getAlleleCountsOfMAP() {
|
||||
return alleleCountsOfMAP;
|
||||
}
|
||||
|
||||
public double getLog10LikelihoodOfAFzero() {
|
||||
|
|
@ -60,4 +96,47 @@ public class AlleleFrequencyCalculationResult {
|
|||
public double getLog10PosteriorOfAFzero() {
|
||||
return log10PosteriorOfAFzero;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
log10MLE = log10MAP = log10LikelihoodOfAFzero = log10PosteriorOfAFzero = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||
for ( int i = 0; i < alleleCountsOfMLE.length; i++ ) {
|
||||
alleleCountsOfMLE[i] = 0;
|
||||
alleleCountsOfMAP[i] = 0;
|
||||
}
|
||||
log10PosteriorMatrixValues.clear();
|
||||
log10PosteriorMatrixSum = null;
|
||||
}
|
||||
|
||||
public void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) {
|
||||
if ( log10LofK > log10MLE ) {
|
||||
log10MLE = log10LofK;
|
||||
for ( int i = 0; i < alleleCountsForK.length; i++ )
|
||||
alleleCountsOfMLE[i] = alleleCountsForK[i];
|
||||
}
|
||||
}
|
||||
|
||||
public void updateMAPifNeeded(final double log10LofK, final int[] alleleCountsForK) {
|
||||
log10PosteriorMatrixValues.add(log10LofK);
|
||||
if ( log10LofK > log10MAP ) {
|
||||
log10MAP = log10LofK;
|
||||
for ( int i = 0; i < alleleCountsForK.length; i++ )
|
||||
alleleCountsOfMAP[i] = alleleCountsForK[i];
|
||||
}
|
||||
}
|
||||
|
||||
public void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) {
|
||||
this.log10LikelihoodOfAFzero = log10LikelihoodOfAFzero;
|
||||
if ( log10LikelihoodOfAFzero > log10MLE ) {
|
||||
log10MLE = log10LikelihoodOfAFzero;
|
||||
Arrays.fill(alleleCountsOfMLE, 0);
|
||||
}
|
||||
}
|
||||
|
||||
public void setLog10PosteriorOfAFzero(final double log10PosteriorOfAFzero) {
|
||||
this.log10PosteriorOfAFzero = log10PosteriorOfAFzero;
|
||||
if ( log10PosteriorOfAFzero > log10MAP ) {
|
||||
log10MAP = log10PosteriorOfAFzero;
|
||||
Arrays.fill(alleleCountsOfMAP, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,286 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Code for determining which indels are segregating among the samples.
|
||||
*
|
||||
* This code is just a refactor of the original code from Guillermo in the UG.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 3/26/12
|
||||
*/
|
||||
public class ConsensusAlleleCounter {
|
||||
final protected static Logger logger = Logger.getLogger(ConsensusAlleleCounter.class);
|
||||
private final int minIndelCountForGenotyping;
|
||||
private final boolean doMultiAllelicCalls;
|
||||
private final double minFractionInOneSample;
|
||||
private final GenomeLocParser locParser;
|
||||
|
||||
public ConsensusAlleleCounter(final GenomeLocParser locParser,
|
||||
final boolean doMultiAllelicCalls,
|
||||
final int minIndelCountForGenotyping,
|
||||
final double minFractionInOneSample) {
|
||||
this.minIndelCountForGenotyping = minIndelCountForGenotyping;
|
||||
this.doMultiAllelicCalls = doMultiAllelicCalls;
|
||||
this.minFractionInOneSample = minFractionInOneSample;
|
||||
this.locParser = locParser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of Alleles at this locus that may be segregating
|
||||
*
|
||||
* @param ref
|
||||
* @param contexts
|
||||
* @param contextType
|
||||
* @return
|
||||
*/
|
||||
public List<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType) {
|
||||
final Map<String, Integer> consensusIndelStrings = countConsensusAlleles(ref, contexts, contextType);
|
||||
// logger.info("Alleles at " + ref.getLocus());
|
||||
// for ( Map.Entry<String, Integer> elt : consensusIndelStrings.entrySet() ) {
|
||||
// logger.info(" " + elt.getValue() + " => " + elt.getKey());
|
||||
// }
|
||||
return consensusCountsToAlleles(ref, consensusIndelStrings);
|
||||
}
|
||||
|
||||
//
|
||||
// TODO -- WARNING DOESN'T WORK WITH REDUCED READS
|
||||
//
|
||||
private Map<String, Integer> countConsensusAlleles(ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType) {
|
||||
final GenomeLoc loc = ref.getLocus();
|
||||
HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>();
|
||||
|
||||
int insCount = 0, delCount = 0;
|
||||
// quick check of total number of indels in pileup
|
||||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
|
||||
insCount += indelPileup.getNumberOfInsertions();
|
||||
delCount += indelPileup.getNumberOfDeletions();
|
||||
}
|
||||
|
||||
if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
|
||||
return Collections.emptyMap();
|
||||
|
||||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||
// todo -- warning, can be duplicating expensive partition here
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
|
||||
|
||||
final int nIndelReads = indelPileup.getNumberOfInsertions() + indelPileup.getNumberOfDeletions();
|
||||
final int nReadsOverall = indelPileup.getNumberOfElements();
|
||||
if ( nIndelReads == 0 || (nIndelReads / (1.0 * nReadsOverall)) < minFractionInOneSample) {
|
||||
// if ( nIndelReads > 0 )
|
||||
// logger.info("Skipping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
|
||||
continue;
|
||||
// } else {
|
||||
// logger.info("### Keeping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
|
||||
}
|
||||
|
||||
for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
|
||||
final GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
|
||||
if (read == null)
|
||||
continue;
|
||||
if (ReadUtils.is454Read(read)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* if (DEBUG && p.isIndel()) {
|
||||
System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
|
||||
read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
|
||||
p.getEventLength(),p.getType().toString(), p.getEventBases());
|
||||
}
|
||||
*/
|
||||
String indelString = p.getEventBases();
|
||||
if (p.isInsertion()) {
|
||||
boolean foundKey = false;
|
||||
// copy of hashmap into temp arrayList
|
||||
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
|
||||
for (String s : consensusIndelStrings.keySet()) {
|
||||
cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
|
||||
}
|
||||
|
||||
if (read.getAlignmentEnd() == loc.getStart()) {
|
||||
// first corner condition: a read has an insertion at the end, and we're right at the insertion.
|
||||
// In this case, the read could have any of the inserted bases and we need to build a consensus
|
||||
|
||||
for (int k=0; k < cList.size(); k++) {
|
||||
String s = cList.get(k).getFirst();
|
||||
int cnt = cList.get(k).getSecond();
|
||||
// case 1: current insertion is prefix of indel in hash map
|
||||
if (s.startsWith(indelString)) {
|
||||
cList.set(k,new Pair<String, Integer>(s,cnt+1));
|
||||
foundKey = true;
|
||||
}
|
||||
else if (indelString.startsWith(s)) {
|
||||
// case 2: indel stored in hash table is prefix of current insertion
|
||||
// In this case, new bases are new key.
|
||||
foundKey = true;
|
||||
cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
}
|
||||
if (!foundKey)
|
||||
// none of the above: event bases not supported by previous table, so add new key
|
||||
cList.add(new Pair<String, Integer>(indelString,1));
|
||||
|
||||
}
|
||||
else if (read.getAlignmentStart() == loc.getStart()+1) {
|
||||
// opposite corner condition: read will start at current locus with an insertion
|
||||
for (int k=0; k < cList.size(); k++) {
|
||||
String s = cList.get(k).getFirst();
|
||||
int cnt = cList.get(k).getSecond();
|
||||
if (s.endsWith(indelString)) {
|
||||
// case 1: current insertion (indelString) is suffix of indel in hash map (s)
|
||||
cList.set(k,new Pair<String, Integer>(s,cnt+1));
|
||||
foundKey = true;
|
||||
}
|
||||
else if (indelString.endsWith(s)) {
|
||||
// case 2: indel stored in hash table is prefix of current insertion
|
||||
// In this case, new bases are new key.
|
||||
foundKey = true;
|
||||
cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
}
|
||||
if (!foundKey)
|
||||
// none of the above: event bases not supported by previous table, so add new key
|
||||
cList.add(new Pair<String, Integer>(indelString,1));
|
||||
|
||||
|
||||
}
|
||||
else {
|
||||
// normal case: insertion somewhere in the middle of a read: add count to arrayList
|
||||
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
|
||||
cList.add(new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
|
||||
// copy back arrayList into hashMap
|
||||
consensusIndelStrings.clear();
|
||||
for (Pair<String,Integer> pair : cList) {
|
||||
consensusIndelStrings.put(pair.getFirst(),pair.getSecond());
|
||||
}
|
||||
|
||||
}
|
||||
else if (p.isDeletion()) {
|
||||
indelString = String.format("D%d",p.getEventLength());
|
||||
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
|
||||
consensusIndelStrings.put(indelString,cnt+1);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return consensusIndelStrings;
|
||||
}
|
||||
|
||||
private List<Allele> consensusCountsToAlleles(final ReferenceContext ref,
|
||||
final Map<String, Integer> consensusIndelStrings) {
|
||||
final GenomeLoc loc = ref.getLocus();
|
||||
final Collection<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
int maxAlleleCnt = 0;
|
||||
Allele refAllele, altAllele;
|
||||
|
||||
for (final Map.Entry<String, Integer> elt : consensusIndelStrings.entrySet()) {
|
||||
final String s = elt.getKey();
|
||||
final int curCnt = elt.getValue();
|
||||
int stop = 0;
|
||||
|
||||
// if observed count if above minimum threshold, we will genotype this allele
|
||||
if (curCnt < minIndelCountForGenotyping)
|
||||
continue;
|
||||
|
||||
if (s.startsWith("D")) {
|
||||
// get deletion length
|
||||
final int dLen = Integer.valueOf(s.substring(1));
|
||||
// get ref bases of accurate deletion
|
||||
final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
|
||||
stop = loc.getStart() + dLen;
|
||||
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
|
||||
|
||||
if (Allele.acceptableAlleleBases(refBases)) {
|
||||
refAllele = Allele.create(refBases, true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
}
|
||||
else continue; // don't go on with this allele if refBases are non-standard
|
||||
} else {
|
||||
// insertion case
|
||||
if (Allele.acceptableAlleleBases(s)) {
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(s, false);
|
||||
stop = loc.getStart();
|
||||
}
|
||||
else continue; // go on to next allele if consensus insertion has any non-standard base.
|
||||
}
|
||||
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||
builder.alleles(Arrays.asList(refAllele, altAllele));
|
||||
builder.referenceBaseForIndel(ref.getBase());
|
||||
builder.noGenotypes();
|
||||
if (doMultiAllelicCalls)
|
||||
vcs.add(builder.make());
|
||||
else {
|
||||
if (curCnt > maxAlleleCnt) {
|
||||
maxAlleleCnt = curCnt;
|
||||
vcs.clear();
|
||||
vcs.add(builder.make());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (vcs.isEmpty())
|
||||
return Collections.emptyList(); // nothing else to do, no alleles passed minimum count criterion
|
||||
|
||||
final VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
|
||||
return mergedVC.getAlleles();
|
||||
}
|
||||
}
|
||||
|
|
@ -52,11 +52,9 @@ import java.util.*;
|
|||
public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
||||
private final int HAPLOTYPE_SIZE;
|
||||
|
||||
private final int minIndelCountForGenotyping;
|
||||
private final boolean getAlleleListFromVCF;
|
||||
|
||||
private boolean DEBUG = false;
|
||||
private final boolean doMultiAllelicCalls = true;
|
||||
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
||||
private PairHMMIndelErrorModel pairModel;
|
||||
|
||||
|
|
@ -72,217 +70,30 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
// gdebug removeme
|
||||
// todo -cleanup
|
||||
private GenomeLoc lastSiteVisited;
|
||||
private ArrayList<Allele> alleleList;
|
||||
private List<Allele> alleleList = new ArrayList<Allele>();
|
||||
|
||||
static {
|
||||
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
|
||||
}
|
||||
|
||||
|
||||
public IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
super(UAC, logger);
|
||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
|
||||
alleleList = new ArrayList<Allele>();
|
||||
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
|
||||
HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
|
||||
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
|
||||
|
||||
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
|
||||
ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
|
||||
}
|
||||
|
||||
|
||||
public static ArrayList<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser,
|
||||
int minIndelCountForGenotyping, boolean doMultiAllelicCalls) {
|
||||
Allele refAllele = null, altAllele = null;
|
||||
GenomeLoc loc = ref.getLocus();
|
||||
ArrayList<Allele> aList = new ArrayList<Allele>();
|
||||
|
||||
HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>();
|
||||
|
||||
int insCount = 0, delCount = 0;
|
||||
// quick check of total number of indels in pileup
|
||||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
|
||||
insCount += indelPileup.getNumberOfInsertions();
|
||||
delCount += indelPileup.getNumberOfDeletions();
|
||||
}
|
||||
|
||||
if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
|
||||
return aList;
|
||||
|
||||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||
// todo -- warning, can be duplicating expensive partition here
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
|
||||
|
||||
|
||||
for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
|
||||
//SAMRecord read = p.getRead();
|
||||
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
|
||||
if (read == null)
|
||||
continue;
|
||||
if (ReadUtils.is454Read(read)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* if (DEBUG && p.isIndel()) {
|
||||
System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
|
||||
read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
|
||||
p.getEventLength(),p.getType().toString(), p.getEventBases());
|
||||
}
|
||||
*/
|
||||
|
||||
String indelString = p.getEventBases();
|
||||
if (p.isInsertion()) {
|
||||
boolean foundKey = false;
|
||||
// copy of hashmap into temp arrayList
|
||||
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
|
||||
for (String s : consensusIndelStrings.keySet()) {
|
||||
cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
|
||||
}
|
||||
|
||||
if (read.getAlignmentEnd() == loc.getStart()) {
|
||||
// first corner condition: a read has an insertion at the end, and we're right at the insertion.
|
||||
// In this case, the read could have any of the inserted bases and we need to build a consensus
|
||||
|
||||
for (int k=0; k < cList.size(); k++) {
|
||||
String s = cList.get(k).getFirst();
|
||||
int cnt = cList.get(k).getSecond();
|
||||
// case 1: current insertion is prefix of indel in hash map
|
||||
if (s.startsWith(indelString)) {
|
||||
cList.set(k,new Pair<String, Integer>(s,cnt+1));
|
||||
foundKey = true;
|
||||
}
|
||||
else if (indelString.startsWith(s)) {
|
||||
// case 2: indel stored in hash table is prefix of current insertion
|
||||
// In this case, new bases are new key.
|
||||
foundKey = true;
|
||||
cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
}
|
||||
if (!foundKey)
|
||||
// none of the above: event bases not supported by previous table, so add new key
|
||||
cList.add(new Pair<String, Integer>(indelString,1));
|
||||
|
||||
}
|
||||
else if (read.getAlignmentStart() == loc.getStart()+1) {
|
||||
// opposite corner condition: read will start at current locus with an insertion
|
||||
for (int k=0; k < cList.size(); k++) {
|
||||
String s = cList.get(k).getFirst();
|
||||
int cnt = cList.get(k).getSecond();
|
||||
if (s.endsWith(indelString)) {
|
||||
// case 1: current insertion (indelString) is suffix of indel in hash map (s)
|
||||
cList.set(k,new Pair<String, Integer>(s,cnt+1));
|
||||
foundKey = true;
|
||||
}
|
||||
else if (indelString.endsWith(s)) {
|
||||
// case 2: indel stored in hash table is prefix of current insertion
|
||||
// In this case, new bases are new key.
|
||||
foundKey = true;
|
||||
cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
}
|
||||
if (!foundKey)
|
||||
// none of the above: event bases not supported by previous table, so add new key
|
||||
cList.add(new Pair<String, Integer>(indelString,1));
|
||||
|
||||
|
||||
}
|
||||
else {
|
||||
// normal case: insertion somewhere in the middle of a read: add count to arrayList
|
||||
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
|
||||
cList.add(new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
|
||||
// copy back arrayList into hashMap
|
||||
consensusIndelStrings.clear();
|
||||
for (Pair<String,Integer> pair : cList) {
|
||||
consensusIndelStrings.put(pair.getFirst(),pair.getSecond());
|
||||
}
|
||||
|
||||
}
|
||||
else if (p.isDeletion()) {
|
||||
indelString = String.format("D%d",p.getEventLength());
|
||||
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
|
||||
consensusIndelStrings.put(indelString,cnt+1);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Collection<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
int maxAlleleCnt = 0;
|
||||
String bestAltAllele = "";
|
||||
|
||||
for (String s : consensusIndelStrings.keySet()) {
|
||||
int curCnt = consensusIndelStrings.get(s), stop = 0;
|
||||
// if observed count if above minimum threshold, we will genotype this allele
|
||||
if (curCnt < minIndelCountForGenotyping)
|
||||
continue;
|
||||
|
||||
if (s.startsWith("D")) {
|
||||
// get deletion length
|
||||
int dLen = Integer.valueOf(s.substring(1));
|
||||
// get ref bases of accurate deletion
|
||||
int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
|
||||
stop = loc.getStart() + dLen;
|
||||
byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
|
||||
|
||||
if (Allele.acceptableAlleleBases(refBases)) {
|
||||
refAllele = Allele.create(refBases, true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
}
|
||||
else continue; // don't go on with this allele if refBases are non-standard
|
||||
} else {
|
||||
// insertion case
|
||||
if (Allele.acceptableAlleleBases(s)) {
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(s, false);
|
||||
stop = loc.getStart();
|
||||
}
|
||||
else continue; // go on to next allele if consensus insertion has any non-standard base.
|
||||
}
|
||||
|
||||
|
||||
ArrayList vcAlleles = new ArrayList<Allele>();
|
||||
vcAlleles.add(refAllele);
|
||||
vcAlleles.add(altAllele);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||
builder.alleles(vcAlleles);
|
||||
builder.referenceBaseForIndel(ref.getBase());
|
||||
builder.noGenotypes();
|
||||
if (doMultiAllelicCalls)
|
||||
vcs.add(builder.make());
|
||||
else {
|
||||
if (curCnt > maxAlleleCnt) {
|
||||
maxAlleleCnt = curCnt;
|
||||
vcs.clear();
|
||||
vcs.add(builder.make());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (vcs.isEmpty())
|
||||
return aList; // nothing else to do, no alleles passed minimum count criterion
|
||||
|
||||
VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
|
||||
|
||||
aList = new ArrayList<Allele>(mergedVC.getAlleles());
|
||||
|
||||
return aList;
|
||||
|
||||
private List<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenomeLocParser locParser) {
|
||||
ConsensusAlleleCounter counter = new ConsensusAlleleCounter(locParser, true, UAC.MIN_INDEL_COUNT_FOR_GENOTYPING, UAC.MIN_INDEL_FRACTION_PER_SAMPLE);
|
||||
return counter.computeConsensusAlleles(ref, contexts, contextType);
|
||||
}
|
||||
|
||||
private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
|
||||
|
|
@ -338,7 +149,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
}
|
||||
|
||||
} else {
|
||||
alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser, minIndelCountForGenotyping,doMultiAllelicCalls);
|
||||
alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser);
|
||||
if (alleleList.isEmpty())
|
||||
return null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,209 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.apache.commons.lang.NotImplementedException;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.security.cert.CertificateNotYetValidException;
|
||||
import java.util.*;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 8/30/11
|
||||
* Time: 10:08 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class UGBoundAF extends RodWalker<VariantContext,Integer> {
|
||||
|
||||
@Output(shortName="vcf",fullName="VCF",doc="file to write to",required=true)
|
||||
VCFWriter writer;
|
||||
|
||||
@Input(shortName="V",fullName="Variants",doc="variant tracks to use in calculation",required=true)
|
||||
List<RodBinding<VariantContext>> variants;
|
||||
|
||||
private static double EPS_LOWER_LIMIT = Math.pow(10,-6.0);
|
||||
|
||||
private HashMap<Integer,Pair<Double,Double>> epsilonPosteriorCache = new HashMap<Integer,Pair<Double,Double>>(8192);
|
||||
private HashMap<Integer,Double> logAC0Cache = new HashMap<Integer, Double>(8192);
|
||||
private int QUANTIZATION_FACTOR = 1000;
|
||||
|
||||
|
||||
public void initialize() {
|
||||
Set<VCFHeaderLine> allHeaderLines = new HashSet<VCFHeaderLine>(1024);
|
||||
for ( RodBinding<VariantContext> v : variants ) {
|
||||
String trackName = v.getName();
|
||||
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
|
||||
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>(vcfHeaders.get(trackName).getMetaData());
|
||||
}
|
||||
allHeaderLines.add(new VCFInfoHeaderLine("AFB",2,VCFHeaderLineType.Float,"The 95% bounds on the allele "+
|
||||
"frequency. First value=95% probability AF>x. Second value=95% probability AF<x."));
|
||||
writer.writeHeader(new VCFHeader(allHeaderLines));
|
||||
}
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext unused ) {
|
||||
List<VariantContext> allVariants = tracker.getValues(variants);
|
||||
if ( allVariants.size() == 0 ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<Allele> alternateAlleles = getAllAlternateAlleles(allVariants);
|
||||
VariantContextBuilder builder = new VariantContextBuilder(allVariants.get(0).subContextFromSamples(new TreeSet<String>()));
|
||||
if ( alternateAlleles.size() > 1 ) {
|
||||
logger.warn("Multiple Segregating Variants at position "+ref.getLocus().toString());
|
||||
alternateAlleles.add(allVariants.get(0).getReference());
|
||||
builder.alleles(alternateAlleles);
|
||||
builder.filters(String.format("MULTIPLE_SEGREGATING[%s]", Utils.join(",",alternateAlleles)));
|
||||
} else {
|
||||
// get all the genotype likelihoods
|
||||
GenotypesContext context = GenotypesContext.create();
|
||||
int numNoCall = 0;
|
||||
for ( VariantContext v : allVariants ) {
|
||||
numNoCall += v.getNoCallCount();
|
||||
context.addAll(v.getGenotypes());
|
||||
}
|
||||
builder.attribute("AFB",boundAlleleFrequency(getACPosteriors(context)));
|
||||
}
|
||||
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
private List<Allele> getAllAlternateAlleles(List<VariantContext> variants) {
|
||||
List<Allele> alleles = new ArrayList<Allele>(3); // some overhead
|
||||
for ( VariantContext v : variants ) {
|
||||
alleles.addAll(v.getAlternateAlleles());
|
||||
}
|
||||
return alleles;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(VariantContext value, Integer sum) {
|
||||
if ( value == null )
|
||||
return sum;
|
||||
writer.add(value);
|
||||
return ++sum;
|
||||
}
|
||||
|
||||
private int N_ITERATIONS = 1;
|
||||
private double[] getACPosteriors(GenotypesContext gc) {
|
||||
// note this uses uniform priors (!)
|
||||
|
||||
double[][] zeroPriors = new double[1][1+2*gc.size()];
|
||||
AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2,2*gc.size());
|
||||
// todo -- allow multiple alleles here
|
||||
for ( int i = 0; i < N_ITERATIONS; i ++ ) {
|
||||
ExactAFCalculationModel.linearExactMultiAllelic(gc, 2, zeroPriors, result, false);
|
||||
}
|
||||
|
||||
return result.log10AlleleFrequencyPosteriors[0];
|
||||
}
|
||||
|
||||
private String boundAlleleFrequency(double[] ACLikelihoods) {
|
||||
// note that no-calls are unnecessary: the ML likelihoods take nocalls into account as 0,0,0 GLs
|
||||
// thus, for sites with K 100,40,0 likelihoods and M no-calls, the likelihoods will be
|
||||
// agnostic between 2*K alleles through 2*(K+M) alleles - exactly what we want to marginalize over
|
||||
|
||||
// want to pick a lower limit x and upper limit y such that
|
||||
// int_{f = x to y} sum_{c = 0 to 2*AN} P(AF=f | c, AN) df = 0.95
|
||||
// int_{f=x to y} calculateAFPosterior(f) df = 0.95
|
||||
// and that (y-x) is minimized
|
||||
|
||||
// this is done by quantizing [0,1] into small bins and, since the distribution is
|
||||
// unimodal, greedily adding them until the probability is >= 0.95
|
||||
|
||||
throw new ReviewedStingException("This walker is unsupported, and is not fully implemented", new NotImplementedException("bound allele frequency not implemented"));
|
||||
}
|
||||
|
||||
private double calculateAFPosterior(double[] likelihoods, double af) {
|
||||
double[] probLiks = new double[likelihoods.length];
|
||||
for ( int c = 0; c < likelihoods.length; c++) {
|
||||
probLiks[c] = calculateAFPosterior(c,likelihoods.length,af);
|
||||
}
|
||||
|
||||
return MathUtils.log10sumLog10(probLiks);
|
||||
}
|
||||
|
||||
private double calculateAFPosterior(int ac, int n, double af) {
|
||||
// evaluate the allele frequency posterior distribution at AF given AC observations of N chromosomes
|
||||
switch ( ac ) {
|
||||
case 0:
|
||||
return logAC0Coef(n) + n*Math.log10(1 - af) - Math.log10(af);
|
||||
case 1:
|
||||
return Math.log10(n) + (n-1)*Math.log10(1-af) - n*Math.log10(1-EPS_LOWER_LIMIT);
|
||||
case 2:
|
||||
return Math.log10(n) + Math.log10(n-1) + Math.log10(af) + (n-2)*Math.log10(1-af) - Math.log10(1-(n-1)*EPS_LOWER_LIMIT) - (n-1)*Math.log10(EPS_LOWER_LIMIT);
|
||||
default:
|
||||
return (ac-1)*Math.log10(af)+ac*Math.log10( (double) n-ac)-(n-ac)*af*Math.log10(Math.E) - MathUtils.log10Gamma(ac);
|
||||
}
|
||||
}
|
||||
|
||||
private double logAC0Coef(int an) {
|
||||
if ( ! logAC0Cache.containsKey(an) ) {
|
||||
double coef = -Math.log10(EPS_LOWER_LIMIT);
|
||||
for ( int k = 1; k <= an; k++ ) {
|
||||
// note this should typically just be
|
||||
// term = ( 1 - Math.pow(EPS_LOWER_LIMIT,k) ) * MathUtils.binomialCoefficient(an,k) / k
|
||||
// but the 1-E term will just be 1, so we do the following to mitigate this problem
|
||||
double binom = MathUtils.binomialCoefficient(an,k);
|
||||
double eps_correction = EPS_LOWER_LIMIT*Math.pow(binom,1/k);
|
||||
double term = binom/k - Math.pow(eps_correction,k);
|
||||
if ( k % 2 == 0 ) {
|
||||
coef += term;
|
||||
} else {
|
||||
coef -= term;
|
||||
}
|
||||
}
|
||||
|
||||
logAC0Cache.put(an,coef);
|
||||
}
|
||||
|
||||
return logAC0Cache.get(an);
|
||||
}
|
||||
|
||||
private double adaptiveSimpson(double[] likelihoods, double start, double stop, double err, int cap) {
|
||||
double mid = (start + stop)/2;
|
||||
double size = stop-start;
|
||||
double fa = calculateAFPosterior(likelihoods,start);
|
||||
double fb = calculateAFPosterior(likelihoods,mid);
|
||||
double fc = calculateAFPosterior(likelihoods,stop);
|
||||
double s = (size/6)*(fa + 4*fc + fb);
|
||||
double h = simpAux(likelihoods,start,stop,err,s,fa,fb,fc,cap);
|
||||
return h;
|
||||
}
|
||||
|
||||
private double simpAux(double[] likelihoods, double a,double b,double eps,double s,double fa,double fb,double fc,double cap){
|
||||
if ( s == 0 )
|
||||
return -300.0;
|
||||
double c = ( a + b )/2;
|
||||
double h = b-a;
|
||||
double d = (a + c)/2;
|
||||
double e = (c + b)/2;
|
||||
double fd = calculateAFPosterior(likelihoods, d);
|
||||
double fe = calculateAFPosterior(likelihoods, e);
|
||||
double s_l = (h/12)*(fa + 4*fd + fc);
|
||||
double s_r = (h/12)*(fc + 4*fe + fb);
|
||||
double s_2 = s_l + s_r;
|
||||
if ( cap <= 0 || Math.abs(s_2 - s) <= 15*eps ){
|
||||
return Math.log10(s_2 + (s_2 - s)/15.0);
|
||||
}
|
||||
|
||||
return MathUtils.approximateLog10SumLog10(simpAux(likelihoods,a,c,eps/2,s_l,fa,fc,fd,cap-1),simpAux(likelihoods, c, b, eps / 2, s_r, fc, fb, fe, cap - 1));
|
||||
}
|
||||
}
|
||||
|
|
@ -118,6 +118,17 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "min_indel_count_for_genotyping", shortName = "minIndelCnt", doc = "Minimum number of consensus indels required to trigger genotyping run", required = false)
|
||||
public int MIN_INDEL_COUNT_FOR_GENOTYPING = 5;
|
||||
|
||||
/**
|
||||
* Complementary argument to minIndelCnt. Only samples with at least this fraction of indel-containing reads will contribute
|
||||
* to counting and overcoming the threshold minIndelCnt. This parameter ensures that in deep data you don't end
|
||||
* up summing lots of super rare errors up to overcome the 5 read default threshold. Should work equally well for
|
||||
* low-coverage and high-coverage samples, as low coverage samples with any indel containing reads should easily over
|
||||
* come this threshold.
|
||||
*/
|
||||
@Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false)
|
||||
public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25;
|
||||
|
||||
|
||||
/**
|
||||
* This argument informs the prior probability of having an indel at a site.
|
||||
*/
|
||||
|
|
@ -165,6 +176,7 @@ public class UnifiedArgumentCollection {
|
|||
uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;
|
||||
uac.MAX_DELETION_FRACTION = MAX_DELETION_FRACTION;
|
||||
uac.MIN_INDEL_COUNT_FOR_GENOTYPING = MIN_INDEL_COUNT_FOR_GENOTYPING;
|
||||
uac.MIN_INDEL_FRACTION_PER_SAMPLE = MIN_INDEL_FRACTION_PER_SAMPLE;
|
||||
uac.INDEL_HETEROZYGOSITY = INDEL_HETEROZYGOSITY;
|
||||
uac.INDEL_GAP_OPEN_PENALTY = INDEL_GAP_OPEN_PENALTY;
|
||||
uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY;
|
||||
|
|
|
|||
|
|
@ -137,6 +137,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
@Output(doc="File to which variants should be written",required=true)
|
||||
protected VCFWriter writer = null;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "debug_file", shortName = "debug_file", doc = "File to print all of the annotated and detailed debugging output", required = false)
|
||||
protected PrintStream verboseWriter = null;
|
||||
|
||||
|
|
@ -218,7 +219,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
|
||||
// initialize the verbose writer
|
||||
if ( verboseWriter != null )
|
||||
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
|
||||
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
|
||||
|
||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, 2*samples.size());
|
||||
|
|
|
|||
|
|
@ -84,8 +84,8 @@ public class UnifiedGenotyperEngine {
|
|||
private ThreadLocal<double[]> posteriorsArray = new ThreadLocal<double[]>();
|
||||
|
||||
// because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
|
||||
private final double[][] log10AlleleFrequencyPriorsSNPs;
|
||||
private final double[][] log10AlleleFrequencyPriorsIndels;
|
||||
private final double[] log10AlleleFrequencyPriorsSNPs;
|
||||
private final double[] log10AlleleFrequencyPriorsIndels;
|
||||
|
||||
// the priors object
|
||||
private final GenotypePriors genotypePriorsSNPs;
|
||||
|
|
@ -131,9 +131,8 @@ public class UnifiedGenotyperEngine {
|
|||
this.annotationEngine = engine;
|
||||
|
||||
this.N = N;
|
||||
|
||||
log10AlleleFrequencyPriorsSNPs = new double[UAC.MAX_ALTERNATE_ALLELES][N+1];
|
||||
log10AlleleFrequencyPriorsIndels = new double[UAC.MAX_ALTERNATE_ALLELES][N+1];
|
||||
log10AlleleFrequencyPriorsSNPs = new double[N+1];
|
||||
log10AlleleFrequencyPriorsIndels = new double[N+1];
|
||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
|
||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY);
|
||||
genotypePriorsSNPs = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||
|
|
@ -269,8 +268,8 @@ public class UnifiedGenotyperEngine {
|
|||
// initialize the data for this thread if that hasn't been done yet
|
||||
if ( afcm.get() == null ) {
|
||||
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
|
||||
alleleFrequencyCalculationResult.set(new AlleleFrequencyCalculationResult(UAC.MAX_ALTERNATE_ALLELES, N));
|
||||
posteriorsArray.set(new double[N + 2]);
|
||||
alleleFrequencyCalculationResult.set(new AlleleFrequencyCalculationResult(UAC.MAX_ALTERNATE_ALLELES));
|
||||
posteriorsArray.set(new double[2]);
|
||||
}
|
||||
AlleleFrequencyCalculationResult AFresult = alleleFrequencyCalculationResult.get();
|
||||
|
||||
|
|
@ -283,9 +282,7 @@ public class UnifiedGenotyperEngine {
|
|||
generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
|
||||
}
|
||||
|
||||
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
||||
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
||||
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
||||
AFresult.reset();
|
||||
List<Allele> allelesUsedInGenotyping = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model), AFresult);
|
||||
|
||||
// is the most likely frequency conformation AC=0 for all alternate alleles?
|
||||
|
|
@ -300,12 +297,11 @@ public class UnifiedGenotyperEngine {
|
|||
// the genotyping model may have stripped it out
|
||||
if ( indexOfAllele == -1 )
|
||||
continue;
|
||||
|
||||
int indexOfBestAC = MathUtils.maxElementIndex(AFresult.log10AlleleFrequencyPosteriors[indexOfAllele-1]);
|
||||
|
||||
// if the most likely AC is not 0, then this is a good alternate allele to use;
|
||||
// make sure to test against log10PosteriorOfAFzero since that no longer is an entry in the array
|
||||
if ( indexOfBestAC != 0 && AFresult.log10AlleleFrequencyPosteriors[indexOfAllele-1][indexOfBestAC] > AFresult.log10PosteriorOfAFzero ) {
|
||||
int indexOfBestAC = AFresult.getAlleleCountsOfMAP()[indexOfAllele-1];
|
||||
|
||||
// if the most likely AC is not 0, then this is a good alternate allele to use
|
||||
if ( indexOfBestAC != 0 ) {
|
||||
myAlleles.add(alternateAllele);
|
||||
bestGuessIsRef = false;
|
||||
}
|
||||
|
|
@ -316,7 +312,6 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
// calculate p(f>0):
|
||||
// because the likelihoods are marginalized for each alternate allele, we only need to compare log10PosteriorOfAFzero against any one of them
|
||||
final double[] normalizedPosteriors = generateNormalizedPosteriors(AFresult, posteriorsArray.get());
|
||||
final double PofF = 1.0 - normalizedPosteriors[0];
|
||||
|
||||
|
|
@ -324,18 +319,11 @@ public class UnifiedGenotyperEngine {
|
|||
if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
|
||||
if ( Double.isInfinite(phredScaledConfidence) )
|
||||
phredScaledConfidence = -10.0 * AFresult.log10PosteriorOfAFzero;
|
||||
phredScaledConfidence = -10.0 * AFresult.getLog10PosteriorOfAFzero();
|
||||
} else {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
|
||||
if ( Double.isInfinite(phredScaledConfidence) ) {
|
||||
double sum = AFresult.log10AlleleFrequencyPosteriors[0][0];
|
||||
if ( sum == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
|
||||
sum = 0.0;
|
||||
for (int i = 1; i <= N; i++) {
|
||||
if ( AFresult.log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
|
||||
break;
|
||||
sum += AFresult.log10AlleleFrequencyPosteriors[0][i];
|
||||
}
|
||||
final double sum = AFresult.getLog10PosteriorMatrixSum();
|
||||
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
|
||||
}
|
||||
}
|
||||
|
|
@ -364,7 +352,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// print out stats if we have a writer
|
||||
if ( verboseWriter != null && !limitedContext )
|
||||
printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model);
|
||||
printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, model);
|
||||
|
||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||
final HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
|
@ -378,29 +366,27 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// the overall lod
|
||||
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
|
||||
double overallLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
|
||||
double overallLog10PofF = AFresult.getLog10PosteriorMatrixSum();
|
||||
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||
|
||||
List<Allele> alternateAllelesToUse = builder.make().getAlternateAlleles();
|
||||
|
||||
// the forward lod
|
||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, alternateAllelesToUse, false, model);
|
||||
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
||||
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
||||
AFresult.reset();
|
||||
afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult);
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
double forwardLog10PofNull = AFresult.log10PosteriorOfAFzero;
|
||||
double forwardLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
|
||||
double forwardLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
|
||||
double forwardLog10PofF = AFresult.getLog10PosteriorMatrixSum();
|
||||
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||
|
||||
// the reverse lod
|
||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, alternateAllelesToUse, false, model);
|
||||
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
||||
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
||||
AFresult.reset();
|
||||
afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult);
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
double reverseLog10PofNull = AFresult.log10PosteriorOfAFzero;
|
||||
double reverseLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
|
||||
double reverseLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
|
||||
double reverseLog10PofF = AFresult.getLog10PosteriorMatrixSum();
|
||||
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
||||
|
||||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
||||
|
|
@ -437,9 +423,9 @@ public class UnifiedGenotyperEngine {
|
|||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
|
||||
}
|
||||
|
||||
private double[] generateNormalizedPosteriors(AlleleFrequencyCalculationResult AFresult, double[] normalizedPosteriors) {
|
||||
normalizedPosteriors[0] = AFresult.log10PosteriorOfAFzero;
|
||||
System.arraycopy(AFresult.log10AlleleFrequencyPosteriors[0], 0, normalizedPosteriors, 1, normalizedPosteriors.length-1);
|
||||
public static double[] generateNormalizedPosteriors(final AlleleFrequencyCalculationResult AFresult, final double[] normalizedPosteriors) {
|
||||
normalizedPosteriors[0] = AFresult.getLog10PosteriorOfAFzero();
|
||||
normalizedPosteriors[1] = AFresult.getLog10PosteriorMatrixSum();
|
||||
return MathUtils.normalizeFromLog10(normalizedPosteriors);
|
||||
}
|
||||
|
||||
|
|
@ -499,14 +485,6 @@ public class UnifiedGenotyperEngine {
|
|||
return stratifiedContexts;
|
||||
}
|
||||
|
||||
protected static void clearAFarray(double[][] AFs) {
|
||||
for ( int i = 0; i < AFs.length; i++ ) {
|
||||
for ( int j = 0; j < AFs[i].length; j++ ) {
|
||||
AFs[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final static double[] binomialProbabilityDepthCache = new double[10000];
|
||||
static {
|
||||
for ( int i = 1; i < binomialProbabilityDepthCache.length; i++ ) {
|
||||
|
|
@ -551,7 +529,7 @@ public class UnifiedGenotyperEngine {
|
|||
return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
|
||||
}
|
||||
|
||||
protected void printVerboseData(String pos, VariantContext vc, double PofF, double phredScaledConfidence, double[] normalizedPosteriors, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
protected void printVerboseData(String pos, VariantContext vc, double PofF, double phredScaledConfidence, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
Allele refAllele = null, altAllele = null;
|
||||
for ( Allele allele : vc.getAlleles() ) {
|
||||
if ( allele.isReference() )
|
||||
|
|
@ -574,11 +552,8 @@ public class UnifiedGenotyperEngine {
|
|||
AFline.append(i + "/" + N + "\t");
|
||||
AFline.append(String.format("%.2f\t", ((float)i)/N));
|
||||
AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
|
||||
if ( alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED)
|
||||
AFline.append("0.00000000\t");
|
||||
else
|
||||
AFline.append(String.format("%.8f\t", alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[i]));
|
||||
AFline.append(String.format("%.8f\t", normalizedPosteriors[i]));
|
||||
AFline.append(String.format("%.8f\t", alleleFrequencyCalculationResult.get().getLog10MLE()));
|
||||
AFline.append(String.format("%.8f\t", alleleFrequencyCalculationResult.get().getLog10MAP()));
|
||||
verboseWriter.println(AFline.toString());
|
||||
}
|
||||
|
||||
|
|
@ -648,31 +623,29 @@ public class UnifiedGenotyperEngine {
|
|||
return null;
|
||||
}
|
||||
|
||||
protected static void computeAlleleFrequencyPriors(final int N, final double[][] priors, final double theta) {
|
||||
protected static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {
|
||||
|
||||
// the dimension here is the number of alternate alleles; with e.g. 2 alternate alleles the prior will be theta^2 / i
|
||||
for (int alleles = 1; alleles <= priors.length; alleles++) {
|
||||
double sum = 0.0;
|
||||
double sum = 0.0;
|
||||
|
||||
// for each i
|
||||
for (int i = 1; i <= N; i++) {
|
||||
double value = Math.pow(theta, alleles) / (double)i;
|
||||
priors[alleles-1][i] = Math.log10(value);
|
||||
sum += value;
|
||||
}
|
||||
|
||||
// null frequency for AF=0 is (1 - sum(all other frequencies))
|
||||
priors[alleles-1][0] = Math.log10(1.0 - sum);
|
||||
// for each i
|
||||
for (int i = 1; i <= N; i++) {
|
||||
final double value = theta / (double)i;
|
||||
priors[i] = Math.log10(value);
|
||||
sum += value;
|
||||
}
|
||||
|
||||
// null frequency for AF=0 is (1 - sum(all other frequencies))
|
||||
priors[0] = Math.log10(1.0 - sum);
|
||||
}
|
||||
|
||||
protected double[][] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
if (model.name().toUpperCase().contains("SNP"))
|
||||
return log10AlleleFrequencyPriorsSNPs;
|
||||
else if (model.name().toUpperCase().contains("INDEL"))
|
||||
return log10AlleleFrequencyPriorsIndels;
|
||||
else
|
||||
throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
|
||||
}
|
||||
|
||||
private static GenotypePriors createGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
|
|
|
|||
|
|
@ -25,19 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
|||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.AlleleFrequencyCalculationResult;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
||||
public class GLBasedSampleSelector extends SampleSelector {
|
||||
Map<Integer,double[][]> numAllelePriorMatrix = new HashMap<Integer,double[][]>();
|
||||
double[] flatPriors = null;
|
||||
double referenceLikelihood;
|
||||
public GLBasedSampleSelector(TreeSet<String> sm, double refLik) {
|
||||
super(sm);
|
||||
|
|
@ -53,9 +47,11 @@ public class GLBasedSampleSelector extends SampleSelector {
|
|||
|
||||
// now check to see (using EXACT model) whether this should be variant
|
||||
// do we want to apply a prior? maybe user-spec?
|
||||
double[][] flatPrior = createFlatPrior(vc.getAlleles());
|
||||
AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(vc.getAlternateAlleles().size(),2*samples.size());
|
||||
ExactAFCalculationModel.linearExactMultiAllelic(subContext.getGenotypes(),vc.getAlternateAlleles().size(),flatPrior,result,true);
|
||||
if ( flatPriors == null ) {
|
||||
flatPriors = new double[1+2*samples.size()];
|
||||
}
|
||||
AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(vc.getAlternateAlleles().size());
|
||||
ExactAFCalculationModel.linearExactMultiAllelic(subContext.getGenotypes(),vc.getAlternateAlleles().size(),flatPriors,result);
|
||||
// do we want to let this qual go up or down?
|
||||
if ( result.getLog10PosteriorOfAFzero() < referenceLikelihood ) {
|
||||
return true;
|
||||
|
|
@ -63,12 +59,4 @@ public class GLBasedSampleSelector extends SampleSelector {
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
private double[][] createFlatPrior(List<Allele> alleles) {
|
||||
if ( ! numAllelePriorMatrix.containsKey(alleles.size()) ) {
|
||||
numAllelePriorMatrix.put(alleles.size(), new double[alleles.size()][1+2*samples.size()]);
|
||||
}
|
||||
|
||||
return numAllelePriorMatrix.get(alleles.size());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -371,18 +371,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
// find the comp
|
||||
final VariantContext comp = findMatchingComp(eval, compSet);
|
||||
|
||||
HashMap<VariantStratifier, List<String>> stateMap = new HashMap<VariantStratifier, List<String>>();
|
||||
for ( VariantStratifier vs : stratificationObjects ) {
|
||||
List<String> states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName);
|
||||
stateMap.put(vs, states);
|
||||
}
|
||||
|
||||
ArrayList<StateKey> stateKeys = new ArrayList<StateKey>();
|
||||
variantEvalUtils.initializeStateKeys(stateMap, null, null, stateKeys);
|
||||
|
||||
HashSet<StateKey> stateKeysHash = new HashSet<StateKey>(stateKeys);
|
||||
|
||||
for ( StateKey stateKey : stateKeysHash ) {
|
||||
for ( StateKey stateKey : getApplicableStates(tracker, ref, eval, evalRod.getName(), comp, compRod.getName(), sampleName) ) {
|
||||
NewEvaluationContext nec = evaluationContexts.get(stateKey);
|
||||
|
||||
// eval against the comp
|
||||
|
|
@ -410,6 +399,73 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
return null;
|
||||
}
|
||||
|
||||
// private Iterable<StateKey> getApplicableStates(final RefMetaDataTracker tracker,
|
||||
// final ReferenceContext ref,
|
||||
// final VariantContext eval,
|
||||
// final String evalName,
|
||||
// final VariantContext comp,
|
||||
// final String compName,
|
||||
// final String sampleName ) {
|
||||
// Set<StateKey> oldKeys = new HashSet<StateKey>(Utils.makeCollection(getApplicableStatesOld(tracker, ref, eval, evalName, comp, compName, sampleName)));
|
||||
//
|
||||
// int n = 0;
|
||||
// for ( final StateKey newKey : getApplicableStatesNew(tracker, ref, eval, evalName, comp, compName, sampleName) ) {
|
||||
// n++;
|
||||
// if ( ! oldKeys.contains(newKey) )
|
||||
// throw new ReviewedStingException("New key " + newKey + " missing from previous algorithm");
|
||||
// }
|
||||
//
|
||||
// if ( n != oldKeys.size() )
|
||||
// throw new ReviewedStingException("New keyset has " + n + " elements but previous algorithm had " + oldKeys.size());
|
||||
//
|
||||
// return oldKeys;
|
||||
// }
|
||||
|
||||
// private Iterable<StateKey> getApplicableStatesNew(final RefMetaDataTracker tracker,
|
||||
// final ReferenceContext ref,
|
||||
// final VariantContext eval,
|
||||
// final String evalName,
|
||||
// final VariantContext comp,
|
||||
// final String compName,
|
||||
// final String sampleName ) {
|
||||
// // todo -- implement optimized version
|
||||
// }
|
||||
|
||||
/**
|
||||
* Given specific eval and comp VCs and the sample name, return an iterable
|
||||
* over all of the applicable state keys.
|
||||
*
|
||||
* See header of StateKey for performance problems...
|
||||
*
|
||||
* @param tracker
|
||||
* @param ref
|
||||
* @param eval
|
||||
* @param evalName
|
||||
* @param comp
|
||||
* @param compName
|
||||
* @param sampleName
|
||||
* @return
|
||||
*/
|
||||
private Iterable<StateKey> getApplicableStates(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final VariantContext eval,
|
||||
final String evalName,
|
||||
final VariantContext comp,
|
||||
final String compName,
|
||||
final String sampleName ) {
|
||||
final HashMap<VariantStratifier, List<String>> stateMap = new HashMap<VariantStratifier, List<String>>(stratificationObjects.size());
|
||||
for ( final VariantStratifier vs : stratificationObjects ) {
|
||||
List<String> states = vs.getRelevantStates(ref, tracker, comp, compName, eval, evalName, sampleName);
|
||||
stateMap.put(vs, states);
|
||||
}
|
||||
|
||||
ArrayList<StateKey> stateKeys = new ArrayList<StateKey>();
|
||||
variantEvalUtils.initializeStateKeys(stateMap, null, null, stateKeys);
|
||||
|
||||
return new HashSet<StateKey>(stateKeys);
|
||||
}
|
||||
|
||||
|
||||
@Requires({"comp != null", "evals != null"})
|
||||
private boolean compHasMatchingEval(final VariantContext comp, final Collection<VariantContext> evals) {
|
||||
// find all of the matching comps
|
||||
|
|
@ -482,11 +538,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
public void onTraversalDone(Integer result) {
|
||||
logger.info("Finalizing variant report");
|
||||
|
||||
for ( StateKey stateKey : evaluationContexts.keySet() ) {
|
||||
NewEvaluationContext nec = evaluationContexts.get(stateKey);
|
||||
for ( Map.Entry<StateKey, NewEvaluationContext> ecElt : evaluationContexts.entrySet() ) {
|
||||
final StateKey stateKey = ecElt.getKey();
|
||||
final NewEvaluationContext nec = ecElt.getValue();
|
||||
|
||||
for ( VariantEvaluator ve : nec.getEvaluationClassList().values() ) {
|
||||
ve.finalizeEvaluation();
|
||||
final String veName = ve.getSimpleName(); // ve.getClass().getSimpleName();
|
||||
|
||||
AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
|
||||
Map<Field, DataPoint> datamap = scanner.getData();
|
||||
|
|
@ -498,7 +556,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
if (field.get(ve) instanceof TableType) {
|
||||
TableType t = (TableType) field.get(ve);
|
||||
|
||||
final String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
|
||||
final String subTableName = veName + "." + field.getName();
|
||||
final DataPoint dataPointAnn = datamap.get(field);
|
||||
|
||||
GATKReportTable table;
|
||||
|
|
@ -539,11 +597,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
}
|
||||
}
|
||||
} else {
|
||||
GATKReportTable table = report.getTable(ve.getClass().getSimpleName());
|
||||
GATKReportTable table = report.getTable(veName);
|
||||
|
||||
for ( VariantStratifier vs : stratificationObjects ) {
|
||||
String columnName = vs.getName();
|
||||
|
||||
final String columnName = vs.getName();
|
||||
table.set(stateKey.toString(), columnName, stateKey.get(vs.getName()));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
|
||||
public abstract class VariantEvaluator {
|
||||
private VariantEvalWalker walker;
|
||||
private final String simpleName;
|
||||
|
||||
protected VariantEvaluator() {
|
||||
this.simpleName = getClass().getSimpleName();
|
||||
}
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
this.walker = walker;
|
||||
|
|
@ -90,4 +95,8 @@ public abstract class VariantEvaluator {
|
|||
protected static String formattedRatio(final int num, final int denom) {
|
||||
return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
|
||||
}
|
||||
|
||||
public String getSimpleName() {
|
||||
return simpleName;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -40,6 +41,7 @@ import java.util.Map;
|
|||
* the object, a Mashalling object can serialize or deserialize a analysis module.
|
||||
*/
|
||||
public class AnalysisModuleScanner {
|
||||
final private static Map<String, Annotation[]> annotationCache = new HashMap<String, Annotation[]>();
|
||||
|
||||
// what we extracted from the class
|
||||
private Map<Field, DataPoint> datums = new LinkedHashMap<Field, DataPoint>(); // the data we've discovered
|
||||
|
|
@ -84,12 +86,22 @@ public class AnalysisModuleScanner {
|
|||
// get the fields from the class, and extract
|
||||
for ( Class superCls = cls; superCls != null; superCls=superCls.getSuperclass() ) {
|
||||
for (Field f : superCls.getDeclaredFields())
|
||||
for (Annotation annotation : f.getAnnotations()) {
|
||||
for (Annotation annotation : getAnnotations(f)) {
|
||||
if (annotation.annotationType().equals(DataPoint.class))
|
||||
datums.put(f,(DataPoint) annotation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Annotation[] getAnnotations(final Field field) {
|
||||
final String fieldName = field.toString();
|
||||
Annotation[] annotations = annotationCache.get(fieldName);
|
||||
if ( annotations == null ) {
|
||||
annotations = field.getAnnotations();
|
||||
annotationCache.put(fieldName, annotations);
|
||||
}
|
||||
return annotations;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
|
|||
|
|
@ -36,6 +36,16 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
|
|||
return new TreeMap<String, VariantEvaluator>(evaluationInstances);
|
||||
}
|
||||
|
||||
public StateKey makeStateKey() {
|
||||
Map<String, String> map = new HashMap<String, String>(size());
|
||||
|
||||
for (Map.Entry<VariantStratifier, String> elt : this.entrySet() ) {
|
||||
map.put(elt.getKey().getName(), elt.getValue());
|
||||
}
|
||||
|
||||
return new StateKey(map);
|
||||
}
|
||||
|
||||
public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) {
|
||||
for ( final VariantEvaluator evaluation : evaluationInstances.values() ) {
|
||||
// the other updateN methods don't see a null context
|
||||
|
|
|
|||
|
|
@ -3,25 +3,102 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util;
|
|||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
public class StateKey extends TreeMap<String, String> {
|
||||
// public int hashCode() {
|
||||
// int hashCode = 1;
|
||||
//
|
||||
// for (final Map.Entry<String,String> pair : this.entrySet()) {
|
||||
// hashCode *= pair.getKey().hashCode() + pair.getValue().hashCode();
|
||||
// }
|
||||
//
|
||||
// return hashCode;
|
||||
// }
|
||||
/**
|
||||
* A final constant class representing the specific state configuration
|
||||
* for a VariantEvaluator instance.
|
||||
*
|
||||
* The way this is currently implemented is by a map from the name of a VariantStratification to a
|
||||
* specific state string. For example, the stratification Novelty has states all, known, novel. A
|
||||
* specific variant and comp would be tagged as "known" by the stratification, and this could be
|
||||
* represented here by the map (Novelty -> known).
|
||||
*
|
||||
* TODO -- PERFORMANCE PROBLEM -- MAD 03/27/12
|
||||
* TODO -- PERFORMANCE PROBLEM -- MAD 03/27/12
|
||||
* TODO -- PERFORMANCE PROBLEM -- MAD 03/27/12
|
||||
* TODO -- PERFORMANCE PROBLEM -- MAD 03/27/12
|
||||
* TODO -- PERFORMANCE PROBLEM -- MAD 03/27/12
|
||||
*
|
||||
* I've been staring at this state key code for a while. It's just not right, and expensive to boot.
|
||||
* Here are my thoughts for future work. The state key is both a key with specific state values for
|
||||
* every stratification. For example, (known, sample1, ac=1). This capability is used in some places,
|
||||
* such as below, to return a set of all states that should be updated given the eval and comp
|
||||
* VCs. In principle there are a finite set of such combinations (the product of all states for all active
|
||||
* stratifications at initialization). We could represent such keys as integers into the set of all combinations.
|
||||
*
|
||||
* Note that all of the code that manipulates these things is just terrible. It's all string manipulation and
|
||||
* HashMaps. Since we are effectively always squaring off our VE analyses (i.e., we have a table with
|
||||
* all variable values for all stratification combinations) it doesn't make sense to allow so much dynamicism. Instead
|
||||
* we should just upfront create a giant table indexed by integer keys, and manage data via a simple map from
|
||||
* specific strat state to this key.
|
||||
*
|
||||
* The reason this is so important is that >80% of the runtime of VE with VCFs with >1000 samples is spent in
|
||||
* the initializeStateKey function. Instead, we should have code that looks like:
|
||||
*
|
||||
* init:
|
||||
* allStates <- initializeCombinationalStateSpace
|
||||
*
|
||||
* map:
|
||||
* for each eval / comp pair:
|
||||
* for each relevantState based on eval / comp:
|
||||
* allStates[relevantState].update(eval, comp)
|
||||
*
|
||||
*
|
||||
*/
|
||||
public final class StateKey {
|
||||
/** High-performance cache of the toString operation for a constant class */
|
||||
private final String string;
|
||||
private final TreeMap<String, String> states;
|
||||
|
||||
public String toString() {
|
||||
String value = "";
|
||||
public StateKey(final Map<String, String> states) {
|
||||
this.states = new TreeMap<String, String>(states);
|
||||
this.string = formatString();
|
||||
}
|
||||
|
||||
for ( final String key : this.keySet() ) {
|
||||
//value += "\tstate " + key + ":" + this.get(key) + "\n";
|
||||
value += String.format("%s:%s;", key, this.get(key));
|
||||
public StateKey(final StateKey toOverride, final String keyOverride, final String valueOverride) {
|
||||
if ( toOverride == null ) {
|
||||
this.states = new TreeMap<String, String>();
|
||||
} else {
|
||||
this.states = new TreeMap<String, String>(toOverride.states);
|
||||
}
|
||||
|
||||
return value;
|
||||
this.states.put(keyOverride, valueOverride);
|
||||
this.string = formatString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
final StateKey stateKey = (StateKey) o;
|
||||
|
||||
if (states != null ? !states.equals(stateKey.states) : stateKey.states != null) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return states.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return string;
|
||||
}
|
||||
|
||||
private final String formatString() {
|
||||
StringBuilder b = new StringBuilder();
|
||||
|
||||
for ( Map.Entry<String, String> entry : states.entrySet() ) {
|
||||
b.append(String.format("%s:%s;", entry.getKey(), entry.getValue()));
|
||||
}
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
// TODO -- might be slow because of tree map
|
||||
public String get(final String key) {
|
||||
return states.get(key);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -214,20 +214,9 @@ public class VariantEvalUtils {
|
|||
ecs.putAll(initializeEvaluationContexts(stratificationObjects, evaluationObjects, newStratStack, nec));
|
||||
}
|
||||
} else {
|
||||
HashMap<StateKey, NewEvaluationContext> necs = new HashMap<StateKey, NewEvaluationContext>();
|
||||
|
||||
StateKey stateKey = new StateKey();
|
||||
for (VariantStratifier vs : ec.keySet()) {
|
||||
String state = ec.get(vs);
|
||||
|
||||
stateKey.put(vs.getName(), state);
|
||||
}
|
||||
|
||||
final StateKey stateKey = ec.makeStateKey();
|
||||
ec.addEvaluationClassList(variantEvalWalker, stateKey, evaluationObjects);
|
||||
|
||||
necs.put(stateKey, ec);
|
||||
|
||||
return necs;
|
||||
return new HashMap<StateKey, NewEvaluationContext>(Collections.singletonMap(stateKey, ec));
|
||||
}
|
||||
|
||||
return ecs;
|
||||
|
|
@ -428,14 +417,8 @@ public class VariantEvalUtils {
|
|||
HashMap<VariantStratifier, List<String>> oneSetOfStates = newStateStack.pop();
|
||||
VariantStratifier vs = oneSetOfStates.keySet().iterator().next();
|
||||
|
||||
for (String state : oneSetOfStates.get(vs)) {
|
||||
StateKey newStateKey = new StateKey();
|
||||
if (stateKey != null) {
|
||||
newStateKey.putAll(stateKey);
|
||||
}
|
||||
|
||||
newStateKey.put(vs.getName(), state);
|
||||
|
||||
for (final String state : oneSetOfStates.get(vs)) {
|
||||
final StateKey newStateKey = new StateKey(stateKey, vs.getName(), state);
|
||||
initializeStateKeys(stateMap, newStateStack, newStateKey, stateKeys);
|
||||
}
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -182,7 +182,6 @@ public class Haplotype {
|
|||
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
|
||||
final int haplotypeSize, final int numPrefBases) {
|
||||
|
||||
|
||||
LinkedHashMap<Allele,Haplotype> haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
|
||||
|
||||
Allele refAllele = null;
|
||||
|
|
@ -215,13 +214,13 @@ public class Haplotype {
|
|||
|
||||
|
||||
// Create location for all haplotypes
|
||||
int startLoc = ref.getWindow().getStart() + startIdxInReference;
|
||||
int stopLoc = startLoc + haplotypeSize-1;
|
||||
final int startLoc = ref.getWindow().getStart() + startIdxInReference;
|
||||
final int stopLoc = startLoc + haplotypeSize-1;
|
||||
|
||||
GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc);
|
||||
final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc);
|
||||
|
||||
|
||||
for (Allele a : alleleList) {
|
||||
for (final Allele a : alleleList) {
|
||||
|
||||
byte[] alleleBases = a.getBases();
|
||||
// use string concatenation
|
||||
|
|
@ -315,5 +314,4 @@ public class Haplotype {
|
|||
|
||||
return (fallsInsideDeletion ? -1 : readBases);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -234,6 +234,9 @@ public class MathUtils {
|
|||
double sum = 0.0;
|
||||
|
||||
double maxValue = Utils.findMaxEntry(log10p);
|
||||
if(maxValue == Double.NEGATIVE_INFINITY)
|
||||
return sum;
|
||||
|
||||
for (int i = start; i < finish; i++) {
|
||||
sum += Math.pow(10.0, log10p[i] - maxValue);
|
||||
}
|
||||
|
|
@ -1508,6 +1511,24 @@ public class MathUtils {
|
|||
return result;
|
||||
}
|
||||
|
||||
/** Same routine, unboxed types for efficiency
|
||||
*
|
||||
* @param x
|
||||
* @param y
|
||||
* @return Vector of same length as x and y so that z[k] = x[k]+y[k]
|
||||
*/
|
||||
public static double[] vectorSum(double[]x, double[] y) {
|
||||
if (x.length != y.length)
|
||||
throw new ReviewedStingException("BUG: Lengths of x and y must be the same");
|
||||
|
||||
double[] result = new double[x.length];
|
||||
for (int k=0; k <x.length; k++)
|
||||
result[k] = x[k]+y[k];
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
public static <E extends Number> Double[] scalarTimesVector(E a, E[] v1) {
|
||||
|
||||
Double result[] = new Double[v1.length];
|
||||
|
|
|
|||
|
|
@ -703,4 +703,11 @@ public class Utils {
|
|||
return programRecord;
|
||||
}
|
||||
|
||||
public static <E> Collection<E> makeCollection(Iterable<E> iter) {
|
||||
Collection<E> list = new ArrayList<E>();
|
||||
for (E item : iter) {
|
||||
list.add(item);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
|
|
@ -18,7 +17,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
static double[] AA1, AB1, BB1;
|
||||
static double[] AA2, AB2, AC2, BB2, BC2, CC2;
|
||||
static final int numSamples = 3;
|
||||
static double[][] priors = new double[2][2*numSamples+1]; // flat priors
|
||||
static double[] priors = new double[2*numSamples+1]; // flat priors
|
||||
|
||||
@BeforeSuite
|
||||
public void before() {
|
||||
|
|
@ -83,26 +82,16 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
@Test(dataProvider = "getGLs")
|
||||
public void testGLs(GetGLsTest cfg) {
|
||||
|
||||
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2, 2*numSamples);
|
||||
for ( int i = 0; i < 2; i++ ) {
|
||||
for ( int j = 0; j < 2*numSamples+1; j++ ) {
|
||||
result.log10AlleleFrequencyLikelihoods[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||
result.log10AlleleFrequencyPosteriors[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||
}
|
||||
}
|
||||
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2);
|
||||
|
||||
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result, false);
|
||||
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result);
|
||||
|
||||
int nameIndex = 1;
|
||||
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
||||
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
|
||||
int calculatedAlleleCount = MathUtils.maxElementIndex(result.log10AlleleFrequencyPosteriors[allele]);
|
||||
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[allele];
|
||||
|
||||
if ( result.log10AlleleFrequencyPosteriors[0][0] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) {
|
||||
Assert.assertTrue(calculatedAlleleCount == expectedAlleleCount || result.log10AlleleFrequencyPosteriors[0][calculatedAlleleCount] < result.log10PosteriorOfAFzero);
|
||||
} else {
|
||||
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
|
||||
}
|
||||
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,8 +17,8 @@ import java.util.Map;
|
|||
|
||||
public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||
|
||||
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("8f81a14fffc1a59b4b066f8595dc1232"));
|
||||
Arrays.asList("ac3737b4212f634a03c640c83f670955"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("0de4aeed6a52f08ed86a7642c812478b"));
|
||||
Arrays.asList("6f70dfbaf3bb70c702f9e9dbacd67c17"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -138,7 +138,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSLOD() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("6172d2f3d370132f4c57a26aa94c256e"));
|
||||
Arrays.asList("e9d23a08472e4e27b4f25e844f5bad57"));
|
||||
executeTest("test SLOD", spec);
|
||||
}
|
||||
|
||||
|
|
@ -146,8 +146,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testOutputParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "553f6b4cbf380885bec9dd634cf68742" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "6d8624e45ad9dae5803ac705b39e4ffa" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "ecf92054c1e4bd9d6529b8002d385165" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "119c9fcefbc69e0fc10b1dc52f6438e3" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -300,13 +300,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSampleIndels() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("52340d578a708fa709b69ce48987bc9d"));
|
||||
Arrays.asList("fbc48d7d9e622c9af7922f91bc858151"));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("9566c7abef5ee5829a516d90445b347f"));
|
||||
Arrays.asList("94c52ef70e44709ccd947d32e9c27da9"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -335,4 +335,37 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
UserException.class);
|
||||
executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing SnpEff
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation
|
||||
+ "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030";
|
||||
|
||||
@Test
|
||||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("f08ff07ad49d388198c1887baad05977"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("a0945fd21369aaf68c7f1d96dbb930d1"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMinIndelFraction100() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 1", 1,
|
||||
Arrays.asList("50fe9a4c5633f6395b45d9ec1e00d56a"));
|
||||
executeTest("test minIndelFraction 1.0", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,90 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.validation;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: delangel
|
||||
* Date: 3/26/12
|
||||
* Time: 3:29 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ValidationSiteSelectorIntegrationTest extends WalkerTest {
|
||||
public static String baseTestString(String args) {
|
||||
return "-T ValidationSiteSelector -R " + b36KGReference + " -L 1 -o %s -NO_HEADER -numSites 100 " + args;
|
||||
}
|
||||
|
||||
private static String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
private static String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
private static String samplePrefix = " -sf " + samplesFile;
|
||||
private static String freqUnif = " --frequencySelectionMode UNIFORM ";
|
||||
private static String freqAF = " --frequencySelectionMode KEEP_AF_SPECTRUM ";
|
||||
private static String sampleNone = " -sampleMode NONE ";
|
||||
private static String sampleGT = samplePrefix+" -sampleMode POLY_BASED_ON_GT ";
|
||||
private static String sampleGL = samplePrefix+" -sampleMode POLY_BASED_ON_GL -samplePNonref 0.95";
|
||||
|
||||
|
||||
@Test(enabled=true)
|
||||
public void testNoSampleSelectionFreqUniform() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(sampleNone + freqUnif + "--variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("d49baeb8000a426c172ce1d81eb37963")
|
||||
);
|
||||
|
||||
executeTest("testNoSampleSelectionFreqUniform--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test(enabled=true)
|
||||
public void testNoSampleSelectionFreqAF() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(sampleNone + freqAF + "--variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("0fb0d015d462c34514fc7e96beea5f56")
|
||||
);
|
||||
|
||||
executeTest("testNoSampleSelectionFreqAF--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test(enabled=true)
|
||||
public void testPolyGTFreqUniform() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(sampleGT + freqUnif + "--variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("0672854299d42ea8af906976a3849ae6")
|
||||
);
|
||||
|
||||
executeTest("testPolyGTFreqUniform--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test(enabled=true)
|
||||
public void testPolyGTFreqAF() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(sampleGT + freqAF + "--variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("5bdffda1a063d0bddd6b236854ec627d")
|
||||
);
|
||||
|
||||
executeTest("testPolyGTFreqAF--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test(enabled=true)
|
||||
public void testPolyGLFreqAF() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(sampleGL + freqAF + "--variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("35ef16aa41303606a4b94f7b88bd9aa8")
|
||||
);
|
||||
|
||||
executeTest("testPolyGLFreqAF--" + testfile, spec);
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue